diff --git a/CMakeLists.txt b/CMakeLists.txt
index af065158699199af61aca02f563dda1b1cddf2b1..7c497e3e048c4dd8d5c1291286de2ab9d218b914 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -85,6 +85,17 @@ include(generic)
 include(flags)
 endif()
 
+if (APP)
+include(external/zlib)
+include(external/boost)
+include(external/protobuf)
+include(external/gflags)
+include(external/glog)
+include(external/pybind11)
+include(external/python)
+include(generic)
+endif()
+
 if (SERVER)
 include(external/cudnn)
 include(paddlepaddle)
diff --git a/README.md b/README.md
index 747c140ded49f279c289b0bc8a3b4b1963243040..1818ddd61cc5423c4a590815930d007303f18e81 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,12 @@
+([简体中文](./README_CN.md)|English)
+
 <p align="center">
     <br>
 <img src='doc/serving_logo.png' width = "600" height = "130">
     <br>
 <p>
 
+
 <p align="center">
     <br>
     <a href="https://travis-ci.com/PaddlePaddle/Serving">
@@ -23,28 +26,20 @@ We consider deploying deep learning inference service online to be a user-facing
     <img src="doc/demo.gif" width="700">
 </p>
 
-<h2 align="center">Some Key Features</h2>
-
-- Integrate with Paddle training pipeline seamlessly, most paddle models can be deployed **with one line command**.
-- **Industrial serving features** supported, such as models management, online loading, online A/B testing etc.
-- **Distributed Key-Value indexing** supported which is especially useful for large scale sparse features as model inputs.
-- **Highly concurrent and efficient communication** between clients and servers supported.
-- **Multiple programming languages** supported on client side, such as Golang, C++ and python.
-- **Extensible framework design** which can support model serving beyond Paddle.
 
 <h2 align="center">Installation</h2>
 
 We **highly recommend** you to **run Paddle Serving in Docker**, please visit [Run in Docker](https://github.com/PaddlePaddle/Serving/blob/develop/doc/RUN_IN_DOCKER.md)
 ```
 # Run CPU Docker
-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0
-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker pull hub.baidubce.com/paddlepaddle/serving:latest
+docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest
 docker exec -it test bash
 ```
 ```
 # Run GPU Docker
-nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
-nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
+nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:latest-gpu
+nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest-gpu
 nvidia-docker exec -it test bash
 ```
 
@@ -56,10 +51,44 @@ pip install paddle-serving-server-gpu # GPU
 
 You may need to use a domestic mirror source (in China, you can use the Tsinghua mirror source, add `-i https://pypi.tuna.tsinghua.edu.cn/simple` to pip command) to speed up the download.
 
+If you need install modules compiled with develop branch, please download packages from [latest packages list](./doc/LATEST_PACKAGES.md) and install with `pip install` command.
+
 Client package support Centos 7 and Ubuntu 18, or you can use HTTP service without install client.
 
+
+<h2 align="center"> Pre-built services with Paddle Serving</h2>
+
+<h3 align="center">Chinese Word Segmentation</h4>
+
+``` shell
+> python -m paddle_serving_app.package -get_model lac
+> tar -xzf lac.tar.gz
+> python lac_web_service.py 9292 &
+> curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "我爱北京天安门"}], "fetch":["word_seg"]}' http://127.0.0.1:9393/lac/prediction
+{"result":[{"word_seg":"我|爱|北京|天安门"}]}
+```
+
+<h3 align="center">Image Classification</h4>
+
+<p align="center">
+    <br>
+<img src='https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg' width = "200" height = "200">
+    <br>
+<p>
+    
+``` shell
+> python -m paddle_serving_app.package -get_model resnet_v2_50_imagenet
+> tar -xzf resnet_v2_50_imagenet.tar.gz
+> python resnet50_imagenet_classify.py resnet50_serving_model &
+> curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"image": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://127.0.0.1:9292/image/prediction
+{"result":{"label":["daisy"],"prob":[0.9341403245925903]}}
+```
+
+
 <h2 align="center">Quick Start Example</h2>
 
+This quick start example is only for users who already have a model to deploy and we prepare a ready-to-deploy model here. If you want to know how to use paddle serving from offline training to online serving, please reference to [Train_To_Service](https://github.com/PaddlePaddle/Serving/blob/develop/doc/TRAIN_TO_SERVICE.md)
+
 ### Boston House Price Prediction model
 ``` shell
 wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing.tar.gz
@@ -82,7 +111,9 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
 | `port` | int | `9292` | Exposed port of current service to users|
 | `name` | str | `""` | Service name, can be used to generate HTTP request url |
 | `model` | str | `""` | Path of paddle model directory to be served |
-| `mem_optim` | bool | `False` | Enable memory optimization |
+| `mem_optim` | bool | `False` | Enable memory / graphic memory optimization |
+| `ir_optim` | bool | `False` | Enable analysis and optimization of calculation graph |
+| `use_mkl` (Only for cpu version) | bool | `False` | Run inference with MKL |
 
 Here, we use `curl` to send a HTTP POST request to the service we just started. Users can use any python library to send HTTP POST as well, e.g, [requests](https://requests.readthedocs.io/en/master/).
 </center>
@@ -113,138 +144,13 @@ print(fetch_map)
 ```
 Here, `client.predict` function has two arguments. `feed` is a `python dict` with model input variable alias name and values. `fetch` assigns the prediction variables to be returned from servers. In the example, the name of `"x"` and `"price"` are assigned when the servable model is saved during training.
 
-<h2 align="center"> Pre-built services with Paddle Serving</h2>
-
-<h3 align="center">Chinese Word Segmentation</h4>
-
-- **Description**: 
-``` shell
-Chinese word segmentation HTTP service that can be deployed with one line command.
-```
-
-- **Download Servable Package**: 
-``` shell
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/lac/lac_model_jieba_web.tar.gz
-```
-- **Host web service**: 
-``` shell
-tar -xzf lac_model_jieba_web.tar.gz
-python lac_web_service.py jieba_server_model/ lac_workdir 9292
-```
-- **Request sample**: 
-``` shell
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "我爱北京天安门"}], "fetch":["word_seg"]}' http://127.0.0.1:9292/lac/prediction
-```
-- **Request result**: 
-``` shell
-{"word_seg":"我|爱|北京|天安门"}
-```
-
-<h3 align="center">Image Classification</h4>
-
-- **Description**: 
-``` shell
-Image classification trained with Imagenet dataset. A label and corresponding probability will be returned.
-Note: This demo needs paddle-serving-server-gpu. 
-```
-
-- **Download Servable Package**: 
-``` shell
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imagenet-example/imagenet_demo.tar.gz
-```
-- **Host web service**: 
-``` shell
-tar -xzf imagenet_demo.tar.gz
-python image_classification_service_demo.py resnet50_serving_model
-```
-- **Request sample**: 
-
-<p align="center">
-    <br>
-<img src='https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg' width = "200" height = "200">
-    <br>
-<p>
-
-``` shell
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"url": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://127.0.0.1:9292/image/prediction
-```
-- **Request result**: 
-``` shell
-{"label":"daisy","prob":0.9341403245925903}
-```
-
-<h3 align="center">More Demos</h3>
-
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| Model Name         | Bert-Base-Baike                                              |
-| URL                | [https://paddle-serving.bj.bcebos.com/bert_example/bert_seq128.tar.gz](https://paddle-serving.bj.bcebos.com/bert_example%2Fbert_seq128.tar.gz) |
-| Client/Server Code | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/bert |
-| Description        | Get semantic representation from a Chinese Sentence          |
-
-
-
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| Model Name         | Resnet50-Imagenet                                            |
-| URL                | [https://paddle-serving.bj.bcebos.com/imagenet-example/ResNet50_vd.tar.gz](https://paddle-serving.bj.bcebos.com/imagenet-example%2FResNet50_vd.tar.gz) |
-| Client/Server Code | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imagenet |
-| Description        | Get image semantic representation from an image              |
-
-
-
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| Model Name         | Resnet101-Imagenet                                           |
-| URL                | https://paddle-serving.bj.bcebos.com/imagenet-example/ResNet101_vd.tar.gz |
-| Client/Server Code | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imagenet |
-| Description        | Get image semantic representation from an image              |
-
-
-
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| Model Name         | CNN-IMDB                                                     |
-| URL                | https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz |
-| Client/Server Code | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imdb |
-| Description        | Get category probability from an English Sentence            |
-
-
-
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| Model Name         | LSTM-IMDB                                                    |
-| URL                | https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz |
-| Client/Server Code | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imdb |
-| Description        | Get category probability from an English Sentence            |
-
-
-
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| Model Name         | BOW-IMDB                                                     |
-| URL                | https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz |
-| Client/Server Code | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imdb |
-| Description        | Get category probability from an English Sentence            |
-
-
-
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| Model Name         | Jieba-LAC                                                    |
-| URL                | https://paddle-serving.bj.bcebos.com/lac/lac_model.tar.gz    |
-| Client/Server Code | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/lac |
-| Description        | Get word segmentation from a Chinese Sentence                |
-
-
-
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| Model Name         | DNN-CTR                                                      |
-| URL                | https://paddle-serving.bj.bcebos.com/criteo_ctr_example/criteo_ctr_demo_model.tar.gz                            |
-| Client/Server Code | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/criteo_ctr |
-| Description        | Get click probability from a feature vector of item          |
+<h2 align="center">Some Key Features of Paddle Serving</h2>
 
+- Integrate with Paddle training pipeline seamlessly, most paddle models can be deployed **with one line command**.
+- **Industrial serving features** supported, such as models management, online loading, online A/B testing etc.
+- **Distributed Key-Value indexing** supported which is especially useful for large scale sparse features as model inputs.
+- **Highly concurrent and efficient communication** between clients and servers supported.
+- **Multiple programming languages** supported on client side, such as Golang, C++ and python.
 
 <h2 align="center">Document</h2>
 
@@ -259,11 +165,13 @@ curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"url": "https://pa
 - [How to develop a new Web Service?](doc/NEW_WEB_SERVICE.md)
 - [Golang client](doc/IMDB_GO_CLIENT.md)
 - [Compile from source code](doc/COMPILE.md)
+- [Deploy Web Service with uWSGI](doc/UWSGI_DEPLOY.md)
+- [Hot loading for model file](doc/HOT_LOADING_IN_SERVING.md)
 
 ### About Efficiency
 - [How to profile Paddle Serving latency?](python/examples/util)
-- [How to optimize performance?(Chinese)](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
-- [Deploy multi-services on one GPU(Chinese)](doc/PERFORMANCE_OPTIM_CN.md)
+- [How to optimize performance?(Chinese)](doc/PERFORMANCE_OPTIM_CN.md)
+- [Deploy multi-services on one GPU(Chinese)](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
 - [CPU Benchmarks(Chinese)](doc/BENCHMARKING.md)
 - [GPU Benchmarks(Chinese)](doc/GPU_BENCHMARKING.md)
 
diff --git a/README_CN.md b/README_CN.md
index 266fca330d7597d6188fa0022e6376bc23149c74..29cf095248f4c125b3dba7146e67efe8b7abae6c 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -1,9 +1,12 @@
+(简体中文|[English](./README.md))
+
 <p align="center">
     <br>
 <img src='https://paddle-serving.bj.bcebos.com/imdb-demo%2FLogoMakr-3Bd2NM-300dpi.png' width = "600" height = "130">
     <br>
 <p>
 
+
 <p align="center">
     <br>
     <a href="https://travis-ci.com/PaddlePaddle/Serving">
@@ -24,14 +27,7 @@ Paddle Serving 旨在帮助深度学习开发者轻易部署在线预测服务
     <img src="doc/demo.gif" width="700">
 </p>
 
-<h2 align="center">核心功能</h2>
 
-- 与Paddle训练紧密连接，绝大部分Paddle模型可以 **一键部署**.
-- 支持 **工业级的服务能力** 例如模型管理，在线加载，在线A/B测试等.
-- 支持 **分布式键值对索引** 助力于大规模稀疏特征作为模型输入.
-- 支持客户端和服务端之间 **高并发和高效通信**.
-- 支持 **多种编程语言** 开发客户端，例如Golang，C++和Python.
-- **可伸缩框架设计** 可支持不限于Paddle的模型服务.
 
 <h2 align="center">安装</h2>
 
@@ -39,14 +35,14 @@ Paddle Serving 旨在帮助深度学习开发者轻易部署在线预测服务
 
 ```
 # 启动 CPU Docker
-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0
-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker pull hub.baidubce.com/paddlepaddle/serving:latest
+docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest
 docker exec -it test bash
 ```
 ```
 # 启动 GPU Docker
-nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
-nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
+nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:latest-gpu
+nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest-gpu
 nvidia-docker exec -it test bash
 ```
 ```shell
@@ -57,9 +53,42 @@ pip install paddle-serving-server-gpu # GPU
 
 您可能需要使用国内镜像源（例如清华源, 在pip命令中添加`-i https://pypi.tuna.tsinghua.edu.cn/simple`）来加速下载。
 
+如果需要使用develop分支编译的安装包，请从[最新安装包列表](./doc/LATEST_PACKAGES.md)中获取下载地址进行下载，使用`pip install`命令进行安装。
+
 客户端安装包支持Centos 7和Ubuntu 18，或者您可以使用HTTP服务，这种情况下不需要安装客户端。
 
-<h2 align="center">快速启动示例</h2>
+<h2 align="center"> Paddle Serving预装的服务 </h2>
+
+<h3 align="center">中文分词</h4>
+
+``` shell
+> python -m paddle_serving_app.package -get_model lac
+> tar -xzf lac.tar.gz
+> python lac_web_service.py 9292 &
+> curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "我爱北京天安门"}], "fetch":["word_seg"]}' http://127.0.0.1:9393/lac/prediction
+{"result":[{"word_seg":"我|爱|北京|天安门"}]}
+```
+
+<h3 align="center">图像分类</h4>
+
+<p align="center">
+    <br>
+<img src='https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg' width = "200" height = "200">
+    <br>
+<p>
+    
+``` shell
+> python -m paddle_serving_app.package -get_model resnet_v2_50_imagenet
+> tar -xzf resnet_v2_50_imagenet.tar.gz
+> python resnet50_imagenet_classify.py resnet50_serving_model &
+> curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"image": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://127.0.0.1:9292/image/prediction
+{"result":{"label":["daisy"],"prob":[0.9341403245925903]}}
+```
+
+
+<h2 align="center">快速开始示例</h2>
+
+这个快速开始示例主要是为了给那些已经有一个要部署的模型的用户准备的，而且我们也提供了一个可以用来部署的模型。如果您想知道如何从离线训练到在线服务走完全流程，请参考[从训练到部署](https://github.com/PaddlePaddle/Serving/blob/develop/doc/TRAIN_TO_SERVICE_CN.md)
 
 <h3 align="center">波士顿房价预测</h3>
 
@@ -87,6 +116,8 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
 | `name` | str | `""` | Service name, can be used to generate HTTP request url |
 | `model` | str | `""` | Path of paddle model directory to be served |
 | `mem_optim` | bool | `False` | Enable memory optimization |
+| `ir_optim` | bool | `False` | Enable analysis and optimization of calculation graph |
+| `use_mkl` (Only for cpu version) | bool | `False` | Run inference with MKL |
 
 我们使用 `curl` 命令来发送HTTP POST请求给刚刚启动的服务。用户也可以调用python库来发送HTTP POST请求，请参考英文文档 [requests](https://requests.readthedocs.io/en/master/)。
 </center>
@@ -118,139 +149,13 @@ print(fetch_map)
 ```
 在这里，`client.predict`函数具有两个参数。 `feed`是带有模型输入变量别名和值的`python dict`。 `fetch`被要从服务器返回的预测变量赋值。 在该示例中，在训练过程中保存可服务模型时，被赋值的tensor名为`"x"`和`"price"`。
 
-<h2 align="center">Paddle Serving预装的服务</h2>
-
-<h3 align="center">中文分词模型</h4>
-
-- **介绍**: 
-``` shell
-本示例为中文分词HTTP服务一键部署
-```
-
-- **下载服务包**: 
-``` shell
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/lac/lac_model_jieba_web.tar.gz
-```
-- **启动web服务**: 
-``` shell
-tar -xzf lac_model_jieba_web.tar.gz
-python lac_web_service.py jieba_server_model/ lac_workdir 9292
-```
-- **客户端请求示例**: 
-``` shell
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "我爱北京天安门"}], "fetch":["word_seg"]}' http://127.0.0.1:9292/lac/prediction
-```
-- **返回结果示例**: 
-``` shell
-{"word_seg":"我|爱|北京|天安门"}
-```
-
-<h3 align="center">图像分类模型</h4>
-
-- **介绍**: 
-``` shell
-图像分类模型由Imagenet数据集训练而成，该服务会返回一个标签及其概率
-注意：本示例需要安装paddle-serving-server-gpu
-```
-
-- **下载服务包**: 
-``` shell
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imagenet-example/imagenet_demo.tar.gz
-```
-- **启动web服务**: 
-``` shell
-tar -xzf imagenet_demo.tar.gz
-python image_classification_service_demo.py resnet50_serving_model
-```
-- **客户端请求示例**: 
-
-<p align="center">
-    <br>
-<img src='https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg' width = "200" height = "200">
-    <br>
-<p>
-
-``` shell
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"url": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://127.0.0.1:9292/image/prediction
-```
-- **返回结果示例**: 
-``` shell
-{"label":"daisy","prob":0.9341403245925903}
-```
-
-<h3 align="center">更多示例</h3>
-
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| 模型名              | Bert-Base-Baike                                              |
-| 下载链接                | [https://paddle-serving.bj.bcebos.com/bert_example/bert_seq128.tar.gz](https://paddle-serving.bj.bcebos.com/bert_example%2Fbert_seq128.tar.gz) |
-| 客户端/服务端代码     | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/bert |
-| 介绍                | 获得一个中文语句的语义表示          |
-
-
-
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| 模型名         | Resnet50-Imagenet                                            |
-| 下载链接                | [https://paddle-serving.bj.bcebos.com/imagenet-example/ResNet50_vd.tar.gz](https://paddle-serving.bj.bcebos.com/imagenet-example%2FResNet50_vd.tar.gz) |
-| 客户端/服务端代码 | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imagenet |
-| 介绍        | 获得一张图片的图像语义表示              |
-
-
-
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| 模型名       | Resnet101-Imagenet                                           |
-| 下载链接                | https://paddle-serving.bj.bcebos.com/imagenet-example/ResNet101_vd.tar.gz |
-| 客户端/服务端代码 | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imagenet |
-| 介绍      | 获得一张图片的图像语义表示              |
-
-
-
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| 模型名        | CNN-IMDB                                                     |
-| 下载链接                | https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz |
-| 客户端/服务端代码 | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imdb |
-| 介绍       | 从一个中文语句获得类别及其概率           |
-
-
-
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| 模型名         | LSTM-IMDB                                                    |
-| 下载链接               | https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz |
-| 客户端/服务端代码 | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imdb |
-| 介绍        | 从一个英文语句获得类别及其概率            |
-
-
-
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| 模型名         | BOW-IMDB                                                     |
-| 下载链接                | https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz |
-| 客户端/服务端代码 | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imdb |
-| 介绍       | 从一个英文语句获得类别及其概率            |
-
-
-
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| 模型名         | Jieba-LAC                                                    |
-| 下载链接                | https://paddle-serving.bj.bcebos.com/lac/lac_model.tar.gz    |
-| 客户端/服务端代码 | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/lac |
-| 介绍       | 获取中文语句的分词                |
-
-
-
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| 模型名         | DNN-CTR                                                      |
-| 下载链接                | https://paddle-serving.bj.bcebos.com/criteo_ctr_example/criteo_ctr_demo_model.tar.gz                    |
-| 客户端/服务端代码 | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/criteo_ctr |
-| 介绍        | 从项目的特征向量中获得点击概率        |
-
+<h2 align="center">Paddle Serving的核心功能</h2>
 
+- 与Paddle训练紧密连接，绝大部分Paddle模型可以 **一键部署**.
+- 支持 **工业级的服务能力** 例如模型管理，在线加载，在线A/B测试等.
+- 支持 **分布式键值对索引** 助力于大规模稀疏特征作为模型输入.
+- 支持客户端和服务端之间 **高并发和高效通信**.
+- 支持 **多种编程语言** 开发客户端，例如Golang，C++和Python.
 
 <h2 align="center">文档</h2>
 
@@ -265,11 +170,13 @@ curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"url": "https://pa
 - [如何开发一个新的Web Service?](doc/NEW_WEB_SERVICE_CN.md)
 - [如何在Paddle Serving使用Go Client?](doc/IMDB_GO_CLIENT_CN.md)
 - [如何编译PaddleServing?](doc/COMPILE_CN.md)
+- [如何使用uWSGI部署Web Service](doc/UWSGI_DEPLOY_CN.md)
+- [如何实现模型文件热加载](doc/HOT_LOADING_IN_SERVING_CN.md)
 
 ### 关于Paddle Serving性能
 - [如何测试Paddle Serving性能？](python/examples/util/)
-- [如何优化性能?](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
-- [在一张GPU上启动多个预测服务](doc/PERFORMANCE_OPTIM_CN.md)
+- [如何优化性能?](doc/PERFORMANCE_OPTIM_CN.md)
+- [在一张GPU上启动多个预测服务](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
 - [CPU版Benchmarks](doc/BENCHMARKING.md)
 - [GPU版Benchmarks](doc/GPU_BENCHMARKING.md)
 
diff --git a/cmake/paddlepaddle.cmake b/cmake/paddlepaddle.cmake
index c9ac3d2f04db833f34211af3cc7aaac2d5184bf9..7670444ed1e021376fa44491973bb748cf611ecf 100644
--- a/cmake/paddlepaddle.cmake
+++ b/cmake/paddlepaddle.cmake
@@ -31,7 +31,7 @@ message( "WITH_GPU = ${WITH_GPU}")
 # Paddle Version should be one of:
 # latest: latest develop build
 # version number like 1.5.2
-SET(PADDLE_VERSION "1.7.1")
+SET(PADDLE_VERSION "1.7.2")
 
 if (WITH_GPU)
     SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda${CUDA_VERSION_MAJOR}-cudnn7-avx-mkl")
diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index ce2e5e3814ae1e585976c5d9c8848b506293ee67..56296b53319fb185c772ffa10e8b31c8203862fb 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -23,6 +23,11 @@ add_subdirectory(pdcodegen)
 add_subdirectory(sdk-cpp)
 endif()
 
+if (APP)
+add_subdirectory(configure)
+endif()
+
+
 if(CLIENT)
 add_subdirectory(general-client)
 endif()
diff --git a/core/configure/CMakeLists.txt b/core/configure/CMakeLists.txt
index b6384fc99ea3df6d71a61865e3aabf5b39b510dd..d3e5b75da96ad7a0789866a4a2c474fad988c21b 100644
--- a/core/configure/CMakeLists.txt
+++ b/core/configure/CMakeLists.txt
@@ -1,3 +1,4 @@
+if (SERVER OR CLIENT)
 LIST(APPEND protofiles
         ${CMAKE_CURRENT_LIST_DIR}/proto/server_configure.proto
         ${CMAKE_CURRENT_LIST_DIR}/proto/sdk_configure.proto
@@ -28,6 +29,7 @@ FILE(GLOB inc ${CMAKE_CURRENT_BINARY_DIR}/*.pb.h)
 
 install(FILES ${inc}
         DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/include/configure)
+endif()
 
 py_proto_compile(general_model_config_py_proto SRCS proto/general_model_config.proto)
 add_custom_target(general_model_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
@@ -51,6 +53,14 @@ add_custom_command(TARGET general_model_config_py_proto POST_BUILD
 
 endif()
 
+if (APP)
+add_custom_command(TARGET general_model_config_py_proto POST_BUILD
+                COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/proto
+                COMMAND cp *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/proto
+                COMMENT "Copy generated general_model_config proto file into directory paddle_serving_app/proto."
+                WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+endif()
+
 if (SERVER)
 py_proto_compile(server_config_py_proto SRCS proto/server_configure.proto)
 add_custom_target(server_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
diff --git a/core/configure/proto/server_configure.proto b/core/configure/proto/server_configure.proto
index 4bdc233099cffbc7949a6b5cf8627fe6461f565c..8956022685090c94be2037445c646e9fbffd1a5c 100644
--- a/core/configure/proto/server_configure.proto
+++ b/core/configure/proto/server_configure.proto
@@ -43,6 +43,7 @@ message EngineDesc {
   optional bool enable_memory_optimization = 13;
   optional bool static_optimization = 14;
   optional bool force_update_static_cache = 15;
+  optional bool enable_ir_optimization = 16;
 };
 
 // model_toolkit conf
diff --git a/core/cube/cube-agent/src/agent/util.go b/core/cube/cube-agent/src/agent/util.go
index 29d27682a3c2e1c46d7ca8cb71de53c2e95df71f..1a0917d9810fb17cdaa4b2b1177d1e7414344a3e 100644
--- a/core/cube/cube-agent/src/agent/util.go
+++ b/core/cube/cube-agent/src/agent/util.go
@@ -83,9 +83,6 @@ func JsonReq(method, requrl string, timeout int, kv *map[string]string,
 }
 
 func GetHdfsMeta(src string) (master, ugi, path string, err error) {
-	//src = "hdfs://root:rootpasst@st1-inf-platform0.st01.baidu.com:54310/user/mis_user/news_dnn_ctr_cube_1/1501836820/news_dnn_ctr_cube_1_part54.tar"
-	//src = "hdfs://st1-inf-platform0.st01.baidu.com:54310/user/mis_user/news_dnn_ctr_cube_1/1501836820/news_dnn_ctr_cube_1_part54.tar"
-
 	ugiBegin := strings.Index(src, "//")
 	ugiPos := strings.LastIndex(src, "@")
 	if ugiPos != -1 && ugiBegin != -1 {
diff --git a/core/general-client/CMakeLists.txt b/core/general-client/CMakeLists.txt
index 88abcbcb776ae999cbf9123d1dad0864a987ecf4..d6079317a75d3f45b61920836e6695bd6b31d951 100644
--- a/core/general-client/CMakeLists.txt
+++ b/core/general-client/CMakeLists.txt
@@ -1,5 +1,5 @@
 if(CLIENT)
 add_subdirectory(pybind11)
 pybind11_add_module(serving_client src/general_model.cpp src/pybind_general_model.cpp)
-target_link_libraries(serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
+target_link_libraries(serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -Wl,-rpath,'$ORIGIN'/lib)
 endif()
diff --git a/core/general-client/include/general_model.h b/core/general-client/include/general_model.h
index 7e04ae11f2106bc8e03fb9045976abc2460e1864..b379188854c30587d24962bc827aa099c3a39183 100644
--- a/core/general-client/include/general_model.h
+++ b/core/general-client/include/general_model.h
@@ -69,15 +69,27 @@ class ModelRes {
   const std::vector<int64_t>& get_int64_by_name(const std::string& name) {
     return _int64_value_map[name];
   }
+  std::vector<int64_t>&& get_int64_by_name_with_rv(const std::string& name) {
+    return std::move(_int64_value_map[name]);
+  }
   const std::vector<float>& get_float_by_name(const std::string& name) {
     return _float_value_map[name];
   }
-  const std::vector<int>& get_shape(const std::string& name) {
+  std::vector<float>&& get_float_by_name_with_rv(const std::string& name) {
+    return std::move(_float_value_map[name]);
+  }
+  const std::vector<int>& get_shape_by_name(const std::string& name) {
     return _shape_map[name];
   }
-  const std::vector<int>& get_lod(const std::string& name) {
+  std::vector<int>&& get_shape_by_name_with_rv(const std::string& name) {
+    return std::move(_shape_map[name]);
+  }
+  const std::vector<int>& get_lod_by_name(const std::string& name) {
     return _lod_map[name];
   }
+  std::vector<int>&& get_lod_by_name_with_rv(const std::string& name) {
+    return std::move(_lod_map[name]);
+  }
   void set_engine_name(const std::string& engine_name) {
     _engine_name = engine_name;
   }
@@ -121,17 +133,33 @@ class PredictorRes {
                                                 const std::string& name) {
     return _models[model_idx].get_int64_by_name(name);
   }
+  std::vector<int64_t>&& get_int64_by_name_with_rv(const int model_idx,
+                                                   const std::string& name) {
+    return std::move(_models[model_idx].get_int64_by_name_with_rv(name));
+  }
   const std::vector<float>& get_float_by_name(const int model_idx,
                                               const std::string& name) {
     return _models[model_idx].get_float_by_name(name);
   }
-  const std::vector<int>& get_shape(const int model_idx,
-                                    const std::string& name) {
-    return _models[model_idx].get_shape(name);
+  std::vector<float>&& get_float_by_name_with_rv(const int model_idx,
+                                                 const std::string& name) {
+    return std::move(_models[model_idx].get_float_by_name_with_rv(name));
+  }
+  const std::vector<int>& get_shape_by_name(const int model_idx,
+                                            const std::string& name) {
+    return _models[model_idx].get_shape_by_name(name);
+  }
+  const std::vector<int>&& get_shape_by_name_with_rv(const int model_idx,
+                                                     const std::string& name) {
+    return std::move(_models[model_idx].get_shape_by_name_with_rv(name));
+  }
+  const std::vector<int>& get_lod_by_name(const int model_idx,
+                                          const std::string& name) {
+    return _models[model_idx].get_lod_by_name(name);
   }
-  const std::vector<int>& get_lod(const int model_idx,
-                                  const std::string& name) {
-    return _models[model_idx].get_lod(name);
+  const std::vector<int>&& get_lod_by_name_with_rv(const int model_idx,
+                                                   const std::string& name) {
+    return std::move(_models[model_idx].get_lod_by_name_with_rv(name));
   }
   void add_model_res(ModelRes&& res) {
     _engine_names.push_back(res.engine_name());
diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp
index 86f75bc1c1b401cd14f2c6651ea52ef08fdb8c40..d4e54c2ac04cf84b2a036f7abe0d426e6f186699 100644
--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -258,9 +258,10 @@ int PredictorClient::batch_predict(
       ModelRes model;
       model.set_engine_name(output.engine_name());
 
+      int idx = 0;
+
       for (auto &name : fetch_name) {
         // int idx = _fetch_name_to_idx[name];
-        int idx = 0;
         int shape_size = output.insts(0).tensor_array(idx).shape_size();
         VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
                 << shape_size;
@@ -279,9 +280,9 @@ int PredictorClient::batch_predict(
         idx += 1;
       }
 
+      idx = 0;
       for (auto &name : fetch_name) {
         // int idx = _fetch_name_to_idx[name];
-        int idx = 0;
         if (_fetch_name_to_type[name] == 0) {
           VLOG(2) << "ferch var " << name << "type int";
           model._int64_value_map[name].resize(
@@ -345,7 +346,7 @@ int PredictorClient::numpy_predict(
     PredictorRes &predict_res_batch,
     const int &pid) {
   int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size());
-
+  VLOG(2) << "batch size: " << batch_size;
   predict_res_batch.clear();
   Timer timeline;
   int64_t preprocess_start = timeline.TimeStampUS();
@@ -462,7 +463,7 @@ int PredictorClient::numpy_predict(
             for (ssize_t j = 0; j < int_array.shape(1); j++) {
               for (ssize_t k = 0; k < int_array.shape(2); k++) {
                 for (ssize_t l = 0; k < int_array.shape(3); l++) {
-                  tensor->add_float_data(int_array(i, j, k, l));
+                  tensor->add_int64_data(int_array(i, j, k, l));
                 }
               }
             }
@@ -474,7 +475,7 @@ int PredictorClient::numpy_predict(
           for (ssize_t i = 0; i < int_array.shape(0); i++) {
             for (ssize_t j = 0; j < int_array.shape(1); j++) {
               for (ssize_t k = 0; k < int_array.shape(2); k++) {
-                tensor->add_float_data(int_array(i, j, k));
+                tensor->add_int64_data(int_array(i, j, k));
               }
             }
           }
@@ -484,7 +485,7 @@ int PredictorClient::numpy_predict(
           auto int_array = int_feed[vec_idx].unchecked<2>();
           for (ssize_t i = 0; i < int_array.shape(0); i++) {
             for (ssize_t j = 0; j < int_array.shape(1); j++) {
-              tensor->add_float_data(int_array(i, j));
+              tensor->add_int64_data(int_array(i, j));
             }
           }
           break;
@@ -492,7 +493,7 @@ int PredictorClient::numpy_predict(
         case 1: {
           auto int_array = int_feed[vec_idx].unchecked<1>();
           for (ssize_t i = 0; i < int_array.shape(0); i++) {
-            tensor->add_float_data(int_array(i));
+            tensor->add_int64_data(int_array(i));
           }
           break;
         }
@@ -536,9 +537,9 @@ int PredictorClient::numpy_predict(
       ModelRes model;
       model.set_engine_name(output.engine_name());
 
+      int idx = 0;
       for (auto &name : fetch_name) {
         // int idx = _fetch_name_to_idx[name];
-        int idx = 0;
         int shape_size = output.insts(0).tensor_array(idx).shape_size();
         VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
                 << shape_size;
@@ -557,9 +558,10 @@ int PredictorClient::numpy_predict(
         idx += 1;
       }
 
+      idx = 0;
+
       for (auto &name : fetch_name) {
         // int idx = _fetch_name_to_idx[name];
-        int idx = 0;
         if (_fetch_name_to_type[name] == 0) {
           VLOG(2) << "ferch var " << name << "type int";
           model._int64_value_map[name].resize(
diff --git a/core/general-client/src/pybind_general_model.cpp b/core/general-client/src/pybind_general_model.cpp
index b0d1d2d624d616a1df3805364cf7802cc19fc46b..3e065e4de1ff3c01ff6bc05cb39a2607620915b4 100644
--- a/core/general-client/src/pybind_general_model.cpp
+++ b/core/general-client/src/pybind_general_model.cpp
@@ -32,24 +32,41 @@ PYBIND11_MODULE(serving_client, m) {
       .def(py::init())
       .def("get_int64_by_name",
            [](PredictorRes &self, int model_idx, std::string &name) {
-             return self.get_int64_by_name(model_idx, name);
-           },
-           py::return_value_policy::reference)
+             // see more: https://github.com/pybind/pybind11/issues/1042
+             std::vector<int64_t> *ptr = new std::vector<int64_t>(
+                 std::move(self.get_int64_by_name_with_rv(model_idx, name)));
+             auto capsule = py::capsule(ptr, [](void *p) {
+               delete reinterpret_cast<std::vector<int64_t> *>(p);
+             });
+             return py::array(ptr->size(), ptr->data(), capsule);
+           })
       .def("get_float_by_name",
            [](PredictorRes &self, int model_idx, std::string &name) {
-             return self.get_float_by_name(model_idx, name);
-           },
-           py::return_value_policy::reference)
+             std::vector<float> *ptr = new std::vector<float>(
+                 std::move(self.get_float_by_name_with_rv(model_idx, name)));
+             auto capsule = py::capsule(ptr, [](void *p) {
+               delete reinterpret_cast<std::vector<float> *>(p);
+             });
+             return py::array(ptr->size(), ptr->data(), capsule);
+           })
       .def("get_shape",
            [](PredictorRes &self, int model_idx, std::string &name) {
-             return self.get_shape(model_idx, name);
-           },
-           py::return_value_policy::reference)
+             std::vector<int> *ptr = new std::vector<int>(
+                 std::move(self.get_shape_by_name_with_rv(model_idx, name)));
+             auto capsule = py::capsule(ptr, [](void *p) {
+               delete reinterpret_cast<std::vector<int> *>(p);
+             });
+             return py::array(ptr->size(), ptr->data(), capsule);
+           })
       .def("get_lod",
            [](PredictorRes &self, int model_idx, std::string &name) {
-             return self.get_lod(model_idx, name);
-           },
-           py::return_value_policy::reference)
+             std::vector<int> *ptr = new std::vector<int>(
+                 std::move(self.get_lod_by_name_with_rv(model_idx, name)));
+             auto capsule = py::capsule(ptr, [](void *p) {
+               delete reinterpret_cast<std::vector<int> *>(p);
+             });
+             return py::array(ptr->size(), ptr->data(), capsule);
+           })
       .def("variant_tag", [](PredictorRes &self) { return self.variant_tag(); })
       .def("get_engine_names",
            [](PredictorRes &self) { return self.get_engine_names(); });
@@ -100,7 +117,8 @@ PYBIND11_MODULE(serving_client, m) {
                                        fetch_name,
                                        predict_res_batch,
                                        pid);
-           })
+           },
+           py::call_guard<py::gil_scoped_release>())
       .def("numpy_predict",
            [](PredictorClient &self,
               const std::vector<std::vector<py::array_t<float>>>
diff --git a/core/general-server/op/general_reader_op.cpp b/core/general-server/op/general_reader_op.cpp
index 8695da2591a30725d5b2390ad287f9ceae40052b..7d48949b22d0ace289ab3b9214f092819f5476e0 100644
--- a/core/general-server/op/general_reader_op.cpp
+++ b/core/general-server/op/general_reader_op.cpp
@@ -131,7 +131,7 @@ int GeneralReaderOp::inference() {
       lod_tensor.dtype = paddle::PaddleDType::FLOAT32;
     }
 
-    if (req->insts(0).tensor_array(i).shape(0) == -1) {
+    if (model_config->_is_lod_feed[i]) {
       lod_tensor.lod.resize(1);
       lod_tensor.lod[0].push_back(0);
       VLOG(2) << "var[" << i << "] is lod_tensor";
@@ -153,6 +153,7 @@ int GeneralReaderOp::inference() {
   // specify the memory needed for output tensor_vector
   for (int i = 0; i < var_num; ++i) {
     if (out->at(i).lod.size() == 1) {
+      int tensor_size = 0;
       for (int j = 0; j < batch_size; ++j) {
         const Tensor &tensor = req->insts(j).tensor_array(i);
         int data_len = 0;
@@ -162,15 +163,28 @@ int GeneralReaderOp::inference() {
           data_len = tensor.float_data_size();
         }
         VLOG(2) << "tensor size for var[" << i << "]: " << data_len;
+        tensor_size += data_len;
 
         int cur_len = out->at(i).lod[0].back();
         VLOG(2) << "current len: " << cur_len;
 
-        out->at(i).lod[0].push_back(cur_len + data_len);
-        VLOG(2) << "new len: " << cur_len + data_len;
+        int sample_len = 0;
+        if (tensor.shape_size() == 1) {
+          sample_len = data_len;
+        } else {
+          sample_len = tensor.shape(0);
+        }
+        out->at(i).lod[0].push_back(cur_len + sample_len);
+        VLOG(2) << "new len: " << cur_len + sample_len;
+      }
+      out->at(i).data.Resize(tensor_size * elem_size[i]);
+      out->at(i).shape = {out->at(i).lod[0].back()};
+      for (int j = 1; j < req->insts(0).tensor_array(i).shape_size(); ++j) {
+        out->at(i).shape.push_back(req->insts(0).tensor_array(i).shape(j));
+      }
+      if (out->at(i).shape.size() == 1) {
+        out->at(i).shape.push_back(1);
       }
-      out->at(i).data.Resize(out->at(i).lod[0].back() * elem_size[i]);
-      out->at(i).shape = {out->at(i).lod[0].back(), 1};
       VLOG(2) << "var[" << i
               << "] is lod_tensor and len=" << out->at(i).lod[0].back();
     } else {
diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp
index 4d853f88eef88716c498b2b95c1498f1abdeb3d0..5667a174d9bb6e134e58de72524c60839dc82356 100644
--- a/core/general-server/op/general_response_op.cpp
+++ b/core/general-server/op/general_response_op.cpp
@@ -15,8 +15,10 @@
 #include "core/general-server/op/general_response_op.h"
 #include <algorithm>
 #include <iostream>
+#include <map>
 #include <memory>
 #include <sstream>
+#include <utility>
 #include "core/general-server/op/general_infer_helper.h"
 #include "core/predictor/framework/infer.h"
 #include "core/predictor/framework/memory.h"
@@ -86,17 +88,20 @@ int GeneralResponseOp::inference() {
     // To get the order of model return values
     output->set_engine_name(pre_name);
     FetchInst *fetch_inst = output->add_insts();
+
     for (auto &idx : fetch_index) {
       Tensor *tensor = fetch_inst->add_tensor_array();
       tensor->set_elem_type(1);
       if (model_config->_is_lod_fetch[idx]) {
-        VLOG(2) << "out[" << idx << "] is lod_tensor";
+        VLOG(2) << "out[" << idx << "] " << model_config->_fetch_name[idx]
+                << " is lod_tensor";
         for (int k = 0; k < in->at(idx).shape.size(); ++k) {
           VLOG(2) << "shape[" << k << "]: " << in->at(idx).shape[k];
           tensor->add_shape(in->at(idx).shape[k]);
         }
       } else {
-        VLOG(2) << "out[" << idx << "] is tensor";
+        VLOG(2) << "out[" << idx << "] " << model_config->_fetch_name[idx]
+                << " is tensor";
         for (int k = 0; k < in->at(idx).shape.size(); ++k) {
           VLOG(2) << "shape[" << k << "]: " << in->at(idx).shape[k];
           tensor->add_shape(in->at(idx).shape[k]);
@@ -111,6 +116,8 @@ int GeneralResponseOp::inference() {
         cap *= in->at(idx).shape[j];
       }
       if (in->at(idx).dtype == paddle::PaddleDType::INT64) {
+        VLOG(2) << "Prepare float var [" << model_config->_fetch_name[idx]
+                << "].";
         int64_t *data_ptr = static_cast<int64_t *>(in->at(idx).data.data());
         if (model_config->_is_lod_fetch[idx]) {
           FetchInst *fetch_p = output->mutable_insts(0);
@@ -127,8 +134,11 @@ int GeneralResponseOp::inference() {
             fetch_p->mutable_tensor_array(var_idx)->add_int64_data(data_ptr[j]);
           }
         }
+        VLOG(2) << "fetch var [" << model_config->_fetch_name[idx] << "] ready";
         var_idx++;
       } else if (in->at(idx).dtype == paddle::PaddleDType::FLOAT32) {
+        VLOG(2) << "Prepare float var [" << model_config->_fetch_name[idx]
+                << "].";
         float *data_ptr = static_cast<float *>(in->at(idx).data.data());
         if (model_config->_is_lod_fetch[idx]) {
           FetchInst *fetch_p = output->mutable_insts(0);
@@ -145,6 +155,7 @@ int GeneralResponseOp::inference() {
             fetch_p->mutable_tensor_array(var_idx)->add_float_data(data_ptr[j]);
           }
         }
+        VLOG(2) << "fetch var [" << model_config->_fetch_name[idx] << "] ready";
         var_idx++;
       }
     }
diff --git a/core/predictor/framework/infer.h b/core/predictor/framework/infer.h
index 4bb3be9ad2c3dc7ef94a32200b014325aceedf45..e8c0ff47d86f081516a35576655f843a28b0591b 100644
--- a/core/predictor/framework/infer.h
+++ b/core/predictor/framework/infer.h
@@ -35,6 +35,7 @@ class InferEngineCreationParams {
   InferEngineCreationParams() {
     _path = "";
     _enable_memory_optimization = false;
+    _enable_ir_optimization = false;
     _static_optimization = false;
     _force_update_static_cache = false;
   }
@@ -45,10 +46,16 @@ class InferEngineCreationParams {
     _enable_memory_optimization = enable_memory_optimization;
   }
 
+  void set_enable_ir_optimization(bool enable_ir_optimization) {
+    _enable_ir_optimization = enable_ir_optimization;
+  }
+
   bool enable_memory_optimization() const {
     return _enable_memory_optimization;
   }
 
+  bool enable_ir_optimization() const { return _enable_ir_optimization; }
+
   void set_static_optimization(bool static_optimization = false) {
     _static_optimization = static_optimization;
   }
@@ -68,6 +75,7 @@ class InferEngineCreationParams {
               << "model_path = " << _path << ", "
               << "enable_memory_optimization = " << _enable_memory_optimization
               << ", "
+              << "enable_ir_optimization = " << _enable_ir_optimization << ", "
               << "static_optimization = " << _static_optimization << ", "
               << "force_update_static_cache = " << _force_update_static_cache;
   }
@@ -75,6 +83,7 @@ class InferEngineCreationParams {
  private:
   std::string _path;
   bool _enable_memory_optimization;
+  bool _enable_ir_optimization;
   bool _static_optimization;
   bool _force_update_static_cache;
 };
@@ -150,6 +159,11 @@ class ReloadableInferEngine : public InferEngine {
       force_update_static_cache = conf.force_update_static_cache();
     }
 
+    if (conf.has_enable_ir_optimization()) {
+      _infer_engine_params.set_enable_ir_optimization(
+          conf.enable_ir_optimization());
+    }
+
     _infer_engine_params.set_path(_model_data_path);
     if (enable_memory_optimization) {
       _infer_engine_params.set_enable_memory_optimization(true);
diff --git a/core/sdk-cpp/include/endpoint_config.h b/core/sdk-cpp/include/endpoint_config.h
index 6edb6ed8ab3b7c62be12c35c1658a2adf3140341..f814b659a24c4e5cb6e352c9a39bcfef1df147c1 100644
--- a/core/sdk-cpp/include/endpoint_config.h
+++ b/core/sdk-cpp/include/endpoint_config.h
@@ -22,23 +22,23 @@ namespace baidu {
 namespace paddle_serving {
 namespace sdk_cpp {
 
-#define PARSE_CONF_ITEM(conf, item, name, fail)             \
-  do {                                                      \
-    if (conf.has_##name()) {                                \
-      item.set(conf.name());                                \
-    } else {                                                \
-      LOG(ERROR) << "Not found key in configue: " << #name; \
-    }                                                       \
+#define PARSE_CONF_ITEM(conf, item, name, fail)          \
+  do {                                                   \
+    if (conf.has_##name()) {                             \
+      item.set(conf.name());                             \
+    } else {                                             \
+      VLOG(2) << "Not found key in configue: " << #name; \
+    }                                                    \
   } while (0)
 
-#define ASSIGN_CONF_ITEM(dest, src, fail)                          \
-  do {                                                             \
-    if (!src.init) {                                               \
-      LOG(ERROR) << "Cannot assign an unintialized item: " << #src \
-                 << " to dest: " << #dest;                         \
-      return fail;                                                 \
-    }                                                              \
-    dest = src.value;                                              \
+#define ASSIGN_CONF_ITEM(dest, src, fail)                       \
+  do {                                                          \
+    if (!src.init) {                                            \
+      VLOG(2) << "Cannot assign an unintialized item: " << #src \
+              << " to dest: " << #dest;                         \
+      return fail;                                              \
+    }                                                           \
+    dest = src.value;                                           \
   } while (0)
 
 template <typename T>
diff --git a/doc/ABTEST_IN_PADDLE_SERVING.md b/doc/ABTEST_IN_PADDLE_SERVING.md
index 69e5ff4b6fdf11d3764f94cba83beee82f959c85..3ae23504bff2621c9a814a3ac15e5157626f8999 100644
--- a/doc/ABTEST_IN_PADDLE_SERVING.md
+++ b/doc/ABTEST_IN_PADDLE_SERVING.md
@@ -21,7 +21,7 @@ The following Python code will process the data `test_data/part-0` and write to
 
 [//file]:#process.py
 ``` python
-from imdb_reader import IMDBDataset
+from paddle_serving_app.reader import IMDBDataset
 imdb_dataset = IMDBDataset()
 imdb_dataset.load_resource('imdb.vocab')
 
@@ -39,7 +39,7 @@ Here, we [use docker](https://github.com/PaddlePaddle/Serving/blob/develop/doc/R
 First, start the BOW server, which enables the `8000` port:
 
 ``` shell
-docker run -dit -v $PWD/imdb_bow_model:/model -p 8000:8000 --name bow-server hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker run -dit -v $PWD/imdb_bow_model:/model -p 8000:8000 --name bow-server hub.baidubce.com/paddlepaddle/serving:latest
 docker exec -it bow-server bash
 pip install paddle-serving-server
 python -m paddle_serving_server.serve --model model --port 8000 >std.log 2>err.log &
@@ -49,7 +49,7 @@ exit
 Similarly, start the LSTM server, which enables the `9000` port:
 
 ```bash
-docker run -dit -v $PWD/imdb_lstm_model:/model -p 9000:9000 --name lstm-server hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker run -dit -v $PWD/imdb_lstm_model:/model -p 9000:9000 --name lstm-server hub.baidubce.com/paddlepaddle/serving:latest
 docker exec -it lstm-server bash
 pip install paddle-serving-server
 python -m paddle_serving_server.serve --model model --port 9000 >std.log 2>err.log &
@@ -78,7 +78,7 @@ with open('processed.data') as f:
         feed = {"words": word_ids}
         fetch = ["acc", "cost", "prediction"]
         [fetch_map, tag] = client.predict(feed=feed, fetch=fetch, need_variant_tag=True)
-        if (float(fetch_map["prediction"][1]) - 0.5) * (float(label[0]) - 0.5) > 0:
+        if (float(fetch_map["prediction"][0][1]) - 0.5) * (float(label[0]) - 0.5) > 0:
             cnt[tag]['acc'] += 1
         cnt[tag]['total'] += 1
 
@@ -88,7 +88,7 @@ with open('processed.data') as f:
 
 In the code, the function `client.add_variant(tag, clusters, variant_weight)` is to add a variant with label `tag` and flow weight `variant_weight`. In this example, a BOW variant with label of `bow` and flow weight of `10`, and an LSTM variant with label of `lstm` and a flow weight of `90` are added. The flow on the client side will be distributed to two variants according to the ratio of `10:90`.
 
-When making prediction on the client side, if the parameter `need_variant_tag=True` is specified, the response will contains the variant tag corresponding to the distribution flow.
+When making prediction on the client side, if the parameter `need_variant_tag=True` is specified, the response will contain the variant tag corresponding to the distribution flow.
 
 ### Expected Results
 
diff --git a/doc/ABTEST_IN_PADDLE_SERVING_CN.md b/doc/ABTEST_IN_PADDLE_SERVING_CN.md
index 1991c7e665aae97e36a690fcd4f96c4f85450cea..43bb702bd8b0317d7449313c0e1362953ed87744 100644
--- a/doc/ABTEST_IN_PADDLE_SERVING_CN.md
+++ b/doc/ABTEST_IN_PADDLE_SERVING_CN.md
@@ -20,7 +20,7 @@ sh get_data.sh
 下面Python代码将处理`test_data/part-0`的数据，写入`processed.data`文件中。
 
 ```python
-from imdb_reader import IMDBDataset
+from paddle_serving_app.reader import IMDBDataset
 imdb_dataset = IMDBDataset()
 imdb_dataset.load_resource('imdb.vocab')
 
@@ -38,7 +38,7 @@ with open('test_data/part-0') as fin:
 首先启动BOW Server，该服务启用`8000`端口：
 
 ```bash
-docker run -dit -v $PWD/imdb_bow_model:/model -p 8000:8000 --name bow-server hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker run -dit -v $PWD/imdb_bow_model:/model -p 8000:8000 --name bow-server hub.baidubce.com/paddlepaddle/serving:latest
 docker exec -it bow-server bash
 pip install paddle-serving-server -i https://pypi.tuna.tsinghua.edu.cn/simple
 python -m paddle_serving_server.serve --model model --port 8000 >std.log 2>err.log &
@@ -48,7 +48,7 @@ exit
 同理启动LSTM Server，该服务启用`9000`端口：
 
 ```bash
-docker run -dit -v $PWD/imdb_lstm_model:/model -p 9000:9000 --name lstm-server hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker run -dit -v $PWD/imdb_lstm_model:/model -p 9000:9000 --name lstm-server hub.baidubce.com/paddlepaddle/serving:latest
 docker exec -it lstm-server bash
 pip install paddle-serving-server -i https://pypi.tuna.tsinghua.edu.cn/simple
 python -m paddle_serving_server.serve --model model --port 9000 >std.log 2>err.log &
@@ -76,7 +76,7 @@ with open('processed.data') as f:
         feed = {"words": word_ids}
         fetch = ["acc", "cost", "prediction"]
         [fetch_map, tag] = client.predict(feed=feed, fetch=fetch, need_variant_tag=True)
-        if (float(fetch_map["prediction"][1]) - 0.5) * (float(label[0]) - 0.5) > 0:
+        if (float(fetch_map["prediction"][0][1]) - 0.5) * (float(label[0]) - 0.5) > 0:
             cnt[tag]['acc'] += 1
         cnt[tag]['total'] += 1
 
diff --git a/doc/BERT_10_MINS_CN.md b/doc/BERT_10_MINS_CN.md
index 17592000f016f1f1e939e8f3dc6dab6e05f35fe7..b7a5180da1bae2dafc431251f2b98c8a2041856a 100644
--- a/doc/BERT_10_MINS_CN.md
+++ b/doc/BERT_10_MINS_CN.md
@@ -13,10 +13,10 @@ import paddlehub as hub
 model_name = "bert_chinese_L-12_H-768_A-12"
 module = hub.Module(model_name)
 inputs, outputs, program = module.context(trainable=True, max_seq_len=20)
-feed_keys = ["input_ids", "position_ids", "segment_ids", "input_mask", "pooled_output", "sequence_output"]
+feed_keys = ["input_ids", "position_ids", "segment_ids", "input_mask"]
 fetch_keys = ["pooled_output", "sequence_output"]
 feed_dict = dict(zip(feed_keys, [inputs[x] for x in feed_keys]))
-fetch_dict = dict(zip(fetch_keys, [outputs[x]] for x in fetch_keys))
+fetch_dict = dict(zip(fetch_keys, [outputs[x] for x in fetch_keys]))
 
 import paddle_serving_client.io as serving_io
 serving_io.save_model("bert_seq20_model", "bert_seq20_client", feed_dict, fetch_dict, program)
diff --git a/doc/COMPILE.md b/doc/COMPILE.md
index 41a79f082494b0ac22bb4479a5d246cdb6882a3d..f4a6639bdb38fac97662084f7d927d24b6179717 100644
--- a/doc/COMPILE.md
+++ b/doc/COMPILE.md
@@ -9,14 +9,18 @@
 - Golang: 1.9.2 and later
 - Git：2.17.1 and later
 - CMake：3.2.2 and later
-- Python：2.7.2 and later
+- Python：2.7.2 and later / 3.6 and later
 
 It is recommended to use Docker for compilation. We have prepared the Paddle Serving compilation environment for you: 
 
-- CPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-devel`，dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel)
-- GPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu-devel`，dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
+- CPU: `hub.baidubce.com/paddlepaddle/serving:latest-devel`，dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel)
+- GPU: `hub.baidubce.com/paddlepaddle/serving:latest-gpu-devel`，dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
 
-This document will take Python2 as an example to show how to compile Paddle Serving. If you want to compile with Python 3, just adjust the Python options of cmake.
+This document will take Python2 as an example to show how to compile Paddle Serving. If you want to compile with Python3, just adjust the Python options of cmake:
+
+- Set `DPYTHON_INCLUDE_DIR` to `$PYTHONROOT/include/python3.6m/`
+- Set  `DPYTHON_LIBRARIES` to `$PYTHONROOT/lib64/libpython3.6.so`
+- Set `DPYTHON_EXECUTABLE` to `$PYTHONROOT/bin/python3.6`
 
 ## Get Code
 
@@ -32,6 +36,8 @@ cd Serving && git submodule update --init --recursive
 export PYTHONROOT=/usr/
 ```
 
+In the default centos7 image we provide, the Python path is `/usr/bin/python`. If you want to use our centos6 image, you need to set it to `export PYTHONROOT=/usr/local/python2.7/`.
+
 ## Compile Server
 
 ### Integrated CPU version paddle inference library
@@ -54,6 +60,8 @@ make -j10
 
 execute `make install` to put targets under directory `./output`
 
+**Attention：** After the compilation is successful, you need to set the path of `SERVING_BIN`. See [Note](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE.md#Note) for details.
+
 ## Compile Client
 
 ``` shell
diff --git a/doc/COMPILE_CN.md b/doc/COMPILE_CN.md
index eb334232d98f26e68d719d10cbe458a356738d2f..d8fd277131d7d169c1a47689e15556e5d10a0fdb 100644
--- a/doc/COMPILE_CN.md
+++ b/doc/COMPILE_CN.md
@@ -9,14 +9,18 @@
 - Golang: 1.9.2及以上
 - Git：2.17.1及以上
 - CMake：3.2.2及以上
-- Python：2.7.2及以上
+- Python：2.7.2及以上 / 3.6及以上
 
 推荐使用Docker编译，我们已经为您准备好了Paddle Serving编译环境：
 
-- CPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-devel`，dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel)
-- GPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu-devel`，dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
+- CPU: `hub.baidubce.com/paddlepaddle/serving:latest-devel`，dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel)
+- GPU: `hub.baidubce.com/paddlepaddle/serving:latest-gpu-devel`，dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
 
-本文档将以Python2为例介绍如何编译Paddle Serving。如果您想用Python3进行编译，只需要调整cmake的Python相关选项即可。
+本文档将以Python2为例介绍如何编译Paddle Serving。如果您想用Python3进行编译，只需要调整cmake的Python相关选项即可：
+
+- 将`DPYTHON_INCLUDE_DIR`设置为`$PYTHONROOT/include/python3.6m/`
+- 将`DPYTHON_LIBRARIES`设置为`$PYTHONROOT/lib64/libpython3.6.so`
+- 将`DPYTHON_EXECUTABLE`设置为`$PYTHONROOT/bin/python3.6`
 
 ## 获取代码
 
@@ -32,6 +36,8 @@ cd Serving && git submodule update --init --recursive
 export PYTHONROOT=/usr/
 ```
 
+我们提供默认Centos7的Python路径为`/usr/bin/python`，如果您要使用我们的Centos6镜像，需要将其设置为`export PYTHONROOT=/usr/local/python2.7/`。
+
 ## 编译Server部分
 
 ### 集成CPU版本Paddle Inference Library
@@ -54,6 +60,8 @@ make -j10
 
 执行`make install`可以把目标产出放在`./output`目录下。
 
+**注意：** 编译成功后，需要设置`SERVING_BIN`路径，详见后面的[注意事项](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE_CN.md#注意事项)。
+
 ## 编译Client部分
 
 ``` shell
diff --git a/doc/DESIGN_DOC_CN.md b/doc/DESIGN_DOC_CN.md
index c068ac35bb6beebe70a6f873318c6d5059fc51e7..7b6e237f787c12a7201898ee9403a6467473ef8c 100644
--- a/doc/DESIGN_DOC_CN.md
+++ b/doc/DESIGN_DOC_CN.md
@@ -26,7 +26,7 @@ serving_io.save_model("serving_model", "client_conf",
                       {"words": data}, {"prediction": prediction},
                       fluid.default_main_program())
 ```
-代码示例中，`{"words": data}`和`{"prediction": prediction}`分别指定了模型的输入和输出，`"words"`和`"prediction"`是输出和输出变量的别名，设计别名的目的是为了使开发者能够记忆自己训练模型的输入输出对应的字段。`data`和`prediction`则是Paddle训练过程中的`[Variable](https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/fluid_cn/Variable_cn.html#variable)`，通常代表张量([Tensor](https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/fluid_cn/Tensor_cn.html#tensor))或变长张量([LodTensor](https://www.paddlepaddle.org.cn/documentation/docs/zh/beginners_guide/basic_concept/lod_tensor.html#lodtensor))。调用保存命令后，会按照用户指定的`"serving_model"`和`"client_conf"`生成两个目录，内容如下：
+代码示例中，`{"words": data}`和`{"prediction": prediction}`分别指定了模型的输入和输出，`"words"`和`"prediction"`是输入和输出变量的别名，设计别名的目的是为了使开发者能够记忆自己训练模型的输入输出对应的字段。`data`和`prediction`则是Paddle训练过程中的`[Variable](https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/fluid_cn/Variable_cn.html#variable)`，通常代表张量([Tensor](https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/fluid_cn/Tensor_cn.html#tensor))或变长张量([LodTensor](https://www.paddlepaddle.org.cn/documentation/docs/zh/beginners_guide/basic_concept/lod_tensor.html#lodtensor))。调用保存命令后，会按照用户指定的`"serving_model"`和`"client_conf"`生成两个目录，内容如下：
 ``` shell
 .
 ├── client_conf
diff --git a/doc/HOT_LOADING_IN_SERVING.md b/doc/HOT_LOADING_IN_SERVING.md
index 299b49d4c9b58af413e5507b5523e93a02acc7d1..94575ca51368e4b9d03cdc65ce391a0ae43f0175 100644
--- a/doc/HOT_LOADING_IN_SERVING.md
+++ b/doc/HOT_LOADING_IN_SERVING.md
@@ -46,7 +46,7 @@ In this example, the production model is uploaded to HDFS in `product_path` fold
 
 ### Product model
 
-Run the following Python code products model in `product_path` folder. Every 60 seconds, the package file of Boston house price prediction model `uci_housing.tar.gz` will be generated and uploaded to the path of HDFS `/`. After uploading, the timestamp file `donefile` will be updated and uploaded to the path of HDFS `/`.
+Run the following Python code products model in `product_path` folder(You need to modify Hadoop related parameters before running). Every 60 seconds, the package file of Boston house price prediction model `uci_housing.tar.gz` will be generated and uploaded to the path of HDFS `/`. After uploading, the timestamp file `donefile` will be updated and uploaded to the path of HDFS `/`.
 
 ```python
 import os
@@ -82,9 +82,14 @@ exe = fluid.Executor(place)
 exe.run(fluid.default_startup_program())
 
 def push_to_hdfs(local_file_path, remote_path):
-    hadoop_bin = '/hadoop-3.1.2/bin/hadoop'
-    os.system('{} fs -put -f {} {}'.format(
-      hadoop_bin, local_file_path, remote_path))
+    afs = 'afs://***.***.***.***:***' # User needs to change
+    uci = '***,***' # User needs to change
+    hadoop_bin = '/path/to/haddop/bin' # User needs to change
+    prefix = '{} fs -Dfs.default.name={} -Dhadoop.job.ugi={}'.format(hadoop_bin, afs, uci)
+    os.system('{} -rmr {}/{}'.format(
+      prefix, remote_path, local_file_path))
+    os.system('{} -put {} {}'.format(
+      prefix, local_file_path, remote_path))
 
 name = "uci_housing"
 for pass_id in range(30):
diff --git a/doc/HOT_LOADING_IN_SERVING_CN.md b/doc/HOT_LOADING_IN_SERVING_CN.md
index 83cb20a3f661c6aa4bbcc3312ac131da1bb5038e..97a2272cffed18e7753859e9991757a5cccb7439 100644
--- a/doc/HOT_LOADING_IN_SERVING_CN.md
+++ b/doc/HOT_LOADING_IN_SERVING_CN.md
@@ -46,7 +46,7 @@ Paddle Serving提供了一个自动监控脚本，远端地址更新模型后会
 
 ### 生产模型
 
-在`product_path`下运行下面的Python代码生产模型，每隔 60 秒会产出 Boston 房价预测模型的打包文件`uci_housing.tar.gz`并上传至hdfs的`/`路径下，上传完毕后更新时间戳文件`donefile`并上传至hdfs的`/`路径下。
+在`product_path`下运行下面的Python代码生产模型（运行前需要修改hadoop相关的参数），每隔 60 秒会产出 Boston 房价预测模型的打包文件`uci_housing.tar.gz`并上传至hdfs的`/`路径下，上传完毕后更新时间戳文件`donefile`并上传至hdfs的`/`路径下。
 
 ```python
 import os
@@ -82,9 +82,14 @@ exe = fluid.Executor(place)
 exe.run(fluid.default_startup_program())
 
 def push_to_hdfs(local_file_path, remote_path):
-    hadoop_bin = '/hadoop-3.1.2/bin/hadoop'
-    os.system('{} fs -put -f {} {}'.format(
-      hadoop_bin, local_file_path, remote_path))
+    afs = 'afs://***.***.***.***:***' # User needs to change
+    uci = '***,***' # User needs to change
+    hadoop_bin = '/path/to/haddop/bin' # User needs to change
+    prefix = '{} fs -Dfs.default.name={} -Dhadoop.job.ugi={}'.format(hadoop_bin, afs, uci)
+    os.system('{} -rmr {}/{}'.format(
+      prefix, remote_path, local_file_path))
+    os.system('{} -put {} {}'.format(
+      prefix, local_file_path, remote_path))
 
 name = "uci_housing"
 for pass_id in range(30):
diff --git a/doc/LATEST_PACKAGES.md b/doc/LATEST_PACKAGES.md
new file mode 100644
index 0000000000000000000000000000000000000000..8756743a5c23778ea2d4753a693a272d5f6eb992
--- /dev/null
+++ b/doc/LATEST_PACKAGES.md
@@ -0,0 +1,47 @@
+# Latest Wheel Packages
+
+## CPU server
+### Python 3
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.0-py3-none-any.whl
+```
+
+### Python 2
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.0-py2-none-any.whl
+```
+
+## GPU server
+### Python 3
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.0-py3-none-any.whl
+```
+### Python 2
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.0-py2-none-any.whl
+```
+
+## Client
+### Python 3.7
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.0-cp37-none-manylinux1_x86_64.whl
+```
+### Python 3.6
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.0-cp36-none-manylinux1_x86_64.whl
+```
+### Python 2.7
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.0-cp27-none-manylinux1_x86_64.whl
+```
+
+## App
+### Python 3
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.0-py3-none-any.whl
+```
+
+### Python 2
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.0-py2-none-any.whl
+```
diff --git a/doc/PERFORMANCE_OPTIM.md b/doc/PERFORMANCE_OPTIM.md
new file mode 100644
index 0000000000000000000000000000000000000000..eae128c40c0b5d40c0fc50346ca3f6e6c4c02eb5
--- /dev/null
+++ b/doc/PERFORMANCE_OPTIM.md
@@ -0,0 +1,20 @@
+# Performance Optimization
+
+([简体中文](./PERFORMANCE_OPTIM_CN.md)|English)
+
+Due to different model structures, different prediction services consume different computing resources when performing predictions. For online prediction services, models that require less computing resources will have a higher proportion of communication time cost, which is called communication-intensive service. Models that require more computing resources have a higher time cost for inference calculations, which is called computation-intensive services.
+
+For a prediction service, the easiest way to determine the type of service is to look at the time ratio. Paddle Serving provides [Timeline tool](../python/examples/util/README_CN.md), which can intuitively display the time spent in each stage of the prediction service.
+
+For communication-intensive prediction services, requests can be aggregated, and within a limit that can tolerate delay, multiple prediction requests can be combined into a batch for prediction.
+
+For computation-intensive prediction services, you can use GPU prediction services instead of CPU prediction services, or increase the number of graphics cards for GPU prediction services.
+
+Under the same conditions, the communication time of the HTTP prediction service provided by Paddle Serving is longer than that of the RPC prediction service, so for communication-intensive services, please give priority to using RPC communication.
+
+Parameters for performance optimization:
+
+| Parameters | Type | Default | Description                                                  |
+| ---------- | ---- | ------- | ------------------------------------------------------------ |
+| mem_optim  | bool | False   | Enable memory / graphic memory optimization                                   |
+| ir_optim   | bool | Fasle   | Enable analysis and optimization of calculation graph,including OP fusion, etc |
diff --git a/doc/PERFORMANCE_OPTIM_CN.md b/doc/PERFORMANCE_OPTIM_CN.md
index dd17bc8afab8472f8f55b4870f73e4c481e97cd3..1a2c3840942930060a1805bcb999f01b5780cbae 100644
--- a/doc/PERFORMANCE_OPTIM_CN.md
+++ b/doc/PERFORMANCE_OPTIM_CN.md
@@ -1,6 +1,8 @@
 # 性能优化
 
-由于模型结构的不同，在执行预测时不同的预测对计算资源的消耗也不相同，对于在线的预测服务来说，对计算资源要求较少的模型，通信的时间成本占比就会较高，称为通信密集型服务，对计算资源要求较多的模型，推理计算的时间成本较高，称为计算密集型服务。对于这两种服务类型，可以根据实际需求采取不同的方式进行优化
+(简体中文|[English](./PERFORMANCE_OPTIM.md))
+
+由于模型结构的不同，在执行预测时不同的预测服务对计算资源的消耗也不相同。对于在线的预测服务来说，对计算资源要求较少的模型，通信的时间成本占比就会较高，称为通信密集型服务，对计算资源要求较多的模型，推理计算的时间成本较高，称为计算密集型服务。对于这两种服务类型，可以根据实际需求采取不同的方式进行优化
 
 对于一个预测服务来说，想要判断属于哪种类型，最简单的方法就是看时间占比，Paddle Serving提供了[Timeline工具](../python/examples/util/README_CN.md)，可以直观的展现预测服务中各阶段的耗时。
 
@@ -10,4 +12,9 @@
 
 在相同条件下，Paddle Serving提供的HTTP预测服务的通信时间是大于RPC预测服务的，因此对于通信密集型的服务请优先考虑使用RPC的通信方式。
 
-对于模型较大，预测服务内存或显存占用较多的情况，可以通过将--mem_optim选项设置为True来开启内存/显存优化。
+性能优化相关参数：
+
+| 参数      | 类型 | 默认值 | 含义                      |
+| --------- | ---- | ------ | -------------------------------- |
+| mem_optim | bool | False  | 开启内存/显存优化                |
+| ir_optim  | bool | Fasle  | 开启计算图分析优化，包括OP融合等 |
diff --git a/doc/RUN_IN_DOCKER.md b/doc/RUN_IN_DOCKER.md
index e7b25362d113b18f6e779ccb9b92a3e3c8d13343..32a4aae1fb2bf866fe250de0b4ed055a707c8fd0 100644
--- a/doc/RUN_IN_DOCKER.md
+++ b/doc/RUN_IN_DOCKER.md
@@ -17,7 +17,7 @@ You can get images in two ways:
 1. Pull image directly
 
    ```bash
-   docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0
+   docker pull hub.baidubce.com/paddlepaddle/serving:latest
    ```
 
 2. Building image based on dockerfile
@@ -25,13 +25,13 @@ You can get images in two ways:
    Create a new folder and copy [Dockerfile](../tools/Dockerfile) to this folder, and run the following command:
 
    ```bash
-   docker build -t hub.baidubce.com/paddlepaddle/serving:0.2.0 .
+   docker build -t hub.baidubce.com/paddlepaddle/serving:latest .
    ```
 
 ### Create container
 
 ```bash
-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest
 docker exec -it test bash
 ```
 
@@ -53,12 +53,6 @@ pip install paddle-serving-server -i https://pypi.tuna.tsinghua.edu.cn/simple
 
 ### Test example
 
-Before running the GPU version of the Server side code, you need to set the `CUDA_VISIBLE_DEVICES` environment variable to specify which GPUs the prediction service uses. The following example specifies two GPUs with indexes 0 and 1:
-
-```bash
-export CUDA_VISIBLE_DEVICES=0,1
-```
-
 Get the trained Boston house price prediction model by the following command:
 
 ```bash
@@ -71,13 +65,13 @@ tar -xzf uci_housing.tar.gz
   Running on the Server side (inside the container):
 
   ```bash
-  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 --name uci &>std.log 2>err.log &
+  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 --name uci >std.log 2>err.log &
   ```
 
   Running on the Client side (inside or outside the container):
 
   ```bash
-  curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
+  curl -H "Content-Type:application/json" -X POST -d '{"feed":{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}, "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
   ```
 
 - Test RPC service
@@ -85,7 +79,7 @@ tar -xzf uci_housing.tar.gz
   Running on the Server side (inside the container):
 
   ```bash
-  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 &>std.log 2>err.log &
+  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 >std.log 2>err.log &
   ```
 
   Running following Python code on the Client side (inside or outside the container, The `paddle-serving-client` package needs to be installed):
@@ -115,7 +109,7 @@ You can also get images in two ways:
 1. Pull image directly
 
    ```bash
-   nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
+   nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:latest-gpu
    ```
 
 2. Building image based on dockerfile
@@ -123,13 +117,13 @@ You can also get images in two ways:
    Create a new folder and copy [Dockerfile.gpu](../tools/Dockerfile.gpu) to this folder, and run the following command:
 
    ```bash
-   nvidia-docker build -t hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu .
+   nvidia-docker build -t hub.baidubce.com/paddlepaddle/serving:latest-gpu .
    ```
 
 ### Create container
 
 ```bash
-nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
+nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest-gpu
 nvidia-docker exec -it test bash
 ```
 
@@ -176,7 +170,7 @@ tar -xzf uci_housing.tar.gz
   Running on the Client side (inside or outside the container):
 
   ```bash
-  curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
+  curl -H "Content-Type:application/json" -X POST -d '{"feed":{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}, "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
   ```
 
 - Test RPC service
diff --git a/doc/RUN_IN_DOCKER_CN.md b/doc/RUN_IN_DOCKER_CN.md
index 3e84cf08c015b7fda0d957bf621173ec18c19498..b95344923605ade590b8bed509a2dd6f59640433 100644
--- a/doc/RUN_IN_DOCKER_CN.md
+++ b/doc/RUN_IN_DOCKER_CN.md
@@ -17,7 +17,7 @@ Docker（GPU版本需要在GPU机器上安装nvidia-docker）
 1. 直接拉取镜像
 
    ```bash
-   docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0
+   docker pull hub.baidubce.com/paddlepaddle/serving:latest
    ```
 
 2. 基于Dockerfile构建镜像
@@ -25,13 +25,13 @@ Docker（GPU版本需要在GPU机器上安装nvidia-docker）
    建立新目录，复制[Dockerfile](../tools/Dockerfile)内容到该目录下Dockerfile文件。执行
 
    ```bash
-   docker build -t hub.baidubce.com/paddlepaddle/serving:0.2.0 .
+   docker build -t hub.baidubce.com/paddlepaddle/serving:latest .
    ```
 
 ### 创建容器并进入
 
 ```bash
-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest
 docker exec -it test bash
 ```
 
@@ -65,13 +65,13 @@ tar -xzf uci_housing.tar.gz
   在Server端（容器内）运行：
 
   ```bash
-  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 --name uci &>std.log 2>err.log &
+  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 --name uci >std.log 2>err.log &
   ```
 
   在Client端（容器内或容器外）运行：
 
   ```bash
-  curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
+  curl -H "Content-Type:application/json" -X POST -d '{"feed":{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}, "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
   ```
 
 - 测试RPC服务
@@ -79,7 +79,7 @@ tar -xzf uci_housing.tar.gz
   在Server端（容器内）运行：
 
   ```bash
-  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 &>std.log 2>err.log &
+  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 >std.log 2>err.log &
   ```
 
   在Client端（容器内或容器外，需要安装`paddle-serving-client`包）运行下面Python代码：
@@ -107,7 +107,7 @@ GPU版本与CPU版本基本一致，只有部分接口命名的差别（GPU版
 1. 直接拉取镜像
 
    ```bash
-   nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
+   nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:latest-gpu
    ```
 
 2. 基于Dockerfile构建镜像
@@ -115,13 +115,13 @@ GPU版本与CPU版本基本一致，只有部分接口命名的差别（GPU版
    建立新目录，复制[Dockerfile.gpu](../tools/Dockerfile.gpu)内容到该目录下Dockerfile文件。执行
 
    ```bash
-   nvidia-docker build -t hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu .
+   nvidia-docker build -t hub.baidubce.com/paddlepaddle/serving:latest-gpu .
    ```
 
 ### 创建容器并进入
 
 ```bash
-nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
+nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest-gpu
 nvidia-docker exec -it test bash
 ```
 
@@ -168,7 +168,7 @@ tar -xzf uci_housing.tar.gz
   在Client端（容器内或容器外）运行：
 
   ```bash
-  curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
+  curl -H "Content-Type:application/json" -X POST -d '{"feed":{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}, "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
   ```
 
 - 测试RPC服务
diff --git a/doc/SAVE.md b/doc/SAVE.md
index c1e6b19a45c75a64207802984f52c734d44f8fc8..54800fa06ab4b8c20c0ffe75d417e1b42ab6ebe6 100644
--- a/doc/SAVE.md
+++ b/doc/SAVE.md
@@ -1,16 +1,18 @@
-## How to save a servable model of Paddle Serving?
+# How to save a servable model of Paddle Serving?
 
 ([简体中文](./SAVE_CN.md)|English)
 
-- Currently, paddle serving provides a save_model interface for users to access, the interface is similar with `save_inference_model` of Paddle.
+## Save from training or prediction script 
+Currently, paddle serving provides a save_model interface for users to access, the interface is similar with `save_inference_model` of Paddle.
 ``` python
 import paddle_serving_client.io as serving_io
 serving_io.save_model("imdb_model", "imdb_client_conf",
                       {"words": data}, {"prediction": prediction},
                       fluid.default_main_program())
 ```
-`imdb_model` is the server side model with serving configurations. `imdb_client_conf` is the client rpc configurations. Serving has a 
-dictionary for `Feed` and `Fetch` variables for client to assign. In the example, `{"words": data}` is the feed dict that specify the input of saved inference model. `{"prediction": prediction}` is the fetch dic that specify the output of saved inference model. An alias name can be defined for feed and fetch variables. An example of how to use alias name
+`imdb_model` is the server side model with serving configurations. `imdb_client_conf` is the client rpc configurations. 
+
+Serving has a dictionary for `Feed` and `Fetch` variables for client to assign. In the example, `{"words": data}` is the feed dict that specify the input of saved inference model. `{"prediction": prediction}` is the fetch dic that specify the output of saved inference model. An alias name can be defined for feed and fetch variables. An example of how to use alias name
  is as follows:
  ``` python
  from paddle_serving_client import Client
@@ -29,3 +31,19 @@ for line in sys.stdin:
     fetch_map = client.predict(feed=feed, fetch=fetch)
     print("{} {}".format(fetch_map["prediction"][1], label[0]))
  ```
+
+## Export from saved model files
+If you have saved model files using Paddle's `save_inference_model` API, you can use Paddle Serving's` inference_model_to_serving` API to convert it into a model file that can be used for Paddle Serving.
+```python
+import paddle_serving_client.io as serving_io
+serving_io.inference_model_to_serving(dirname, serving_server="serving_server", serving_client="serving_client", model_filename=None, params_filename=None )
+```
+dirname (str) - Path of saved model files. Program file and parameter files are saved in this directory.
+
+serving_server (str, optional) - The path of model files and configuration files for server. Default: "serving_server".
+
+serving_client (str, optional) - The path of configuration files for client. Default: "serving_client".
+
+model_filename (str, optional) - The name of file to load the inference program. If it is None, the default filename `__model__` will be used. Default: None.
+
+paras_filename (str, optional) - The name of file to load all parameters. It is only used for the case that all parameters were saved in a single binary file. If parameters were saved in separate files, set it as None. Default: None.
diff --git a/doc/SAVE_CN.md b/doc/SAVE_CN.md
index 43b62c2ac623b386505356194ac136ea305fe683..aaf0647fd1c4e95584bb7aa42a6671620adeb6d0 100644
--- a/doc/SAVE_CN.md
+++ b/doc/SAVE_CN.md
@@ -1,8 +1,9 @@
-## 怎样保存用于Paddle Serving的模型？
+# 怎样保存用于Paddle Serving的模型？
 
 (简体中文|[English](./SAVE.md))
 
-- 目前，Paddle Serving提供了一个save_model接口供用户访问，该接口与Paddle的`save_inference_model`类似。
+## 从训练或预测脚本中保存
+目前，Paddle Serving提供了一个save_model接口供用户访问，该接口与Paddle的`save_inference_model`类似。
 
 ``` python
 import paddle_serving_client.io as serving_io
@@ -10,7 +11,9 @@ serving_io.save_model("imdb_model", "imdb_client_conf",
                       {"words": data}, {"prediction": prediction},
                       fluid.default_main_program())
 ```
-imdb_model是具有服务配置的服务器端模型。 imdb_client_conf是客户端rpc配置。 Serving有一个 提供给用户存放Feed和Fetch变量信息的字典。 在示例中，`{words”：data}` 是用于指定已保存推理模型输入的提要字典。`{"prediction"：projection}`是指定保存的推理模型输出的字典。可以为feed和fetch变量定义一个别名。 如何使用别名的例子 示例如下：
+imdb_model是具有服务配置的服务器端模型。 imdb_client_conf是客户端rpc配置。
+
+Serving有一个提供给用户存放Feed和Fetch变量信息的字典。 在示例中，`{"words"：data}` 是用于指定已保存推理模型输入的提要字典。`{"prediction"：projection}`是指定保存的推理模型输出的字典。可以为feed和fetch变量定义一个别名。 如何使用别名的例子 示例如下：
 
  ``` python
  from paddle_serving_client import Client
@@ -29,3 +32,19 @@ for line in sys.stdin:
     fetch_map = client.predict(feed=feed, fetch=fetch)
     print("{} {}".format(fetch_map["prediction"][1], label[0]))
  ```
+
+## 从已保存的模型文件中导出
+如果已使用Paddle 的`save_inference_model`接口保存出预测要使用的模型，则可以通过Paddle Serving的`inference_model_to_serving`接口转换成可用于Paddle Serving的模型文件。
+```python
+import paddle_serving_client.io as serving_io
+serving_io.inference_model_to_serving(dirname, serving_server="serving_server", serving_client="serving_client",  model_filename=None, params_filename=None)
+```
+dirname (str) – 需要转换的模型文件存储路径，Program结构文件和参数文件均保存在此目录。
+
+serving_server (str, 可选) - 转换后的模型文件和配置文件的存储路径。默认值为serving_server。
+
+serving_client (str, 可选) - 转换后的客户端配置文件存储路径。默认值为serving_client。
+
+model_filename (str，可选) – 存储需要转换的模型Inference Program结构的文件名称。如果设置为None，则使用 `__model__` 作为默认的文件名。默认值为None。
+
+params_filename (str，可选) – 存储需要转换的模型所有参数的文件名称。当且仅当所有模型参数被保存在一个单独的二进制文件中，它才需要被指定。如果模型参数是存储在各自分离的文件中，设置它的值为None。默认值为None。
diff --git a/doc/TRAIN_TO_SERVICE.md b/doc/TRAIN_TO_SERVICE.md
index 40d5dd95e4d7aad3b198898559321419b4b17833..90046b03ebc4af1394fb85fb41fccf1d844f6917 100644
--- a/doc/TRAIN_TO_SERVICE.md
+++ b/doc/TRAIN_TO_SERVICE.md
@@ -350,12 +350,12 @@ In the above command, the first parameter is the saved server-side model and con
 After starting the HTTP prediction service, you can make prediction with a single command:
 
 ```
-curl -H "Content-Type: application/json" -X POST -d '{"words": "i am very sad | 0", "fetch": ["prediction"]}' http://127.0.0.1:9292/imdb/prediction
+curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction
 ```
 When the inference process is normal, the prediction probability is returned, as shown below.
 
 ```
-{"prediction": [0.5592559576034546,0.44074398279190063]}
+{"result":{"prediction":[[0.4389057457447052,0.561094343662262]]}}
 ```
 
 **Note**: The effect of each model training may be slightly different, and the inferred probability value using the trained model may not be consistent with the example.
diff --git a/doc/TRAIN_TO_SERVICE_CN.md b/doc/TRAIN_TO_SERVICE_CN.md
index ad2a43c30b1cd0d4701ebb3c8b3a46a4b07c1bda..1c8a2848bcc198c66e617be145c43d2651b7f885 100644
--- a/doc/TRAIN_TO_SERVICE_CN.md
+++ b/doc/TRAIN_TO_SERVICE_CN.md
@@ -353,12 +353,12 @@ python text_classify_service.py imdb_cnn_model/ workdir/ 9292 imdb.vocab
 启动完HTTP预测服务，即可通过一行命令进行预测：
 
 ```
-curl -H "Content-Type:application/json" -X POST -d '{"words": "i am very sad | 0", "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction
+curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction
 ```
 预测流程正常时，会返回预测概率，示例如下。
 
 ```
-{"prediction":[0.5592559576034546,0.44074398279190063]}
+{"result":{"prediction":[[0.4389057457447052,0.561094343662262]]}}
 ```
 
 **注意**：每次模型训练的效果可能略有不同，使用训练出的模型预测概率数值可能与示例不一致。
diff --git a/doc/UWSGI_DEPLOY.md b/doc/UWSGI_DEPLOY.md
index 02c0488d1bc0c43e050421e0991125fb3a4d644e..1aa9c1fce452d8f3525d3646133d90356fce25e6 100644
--- a/doc/UWSGI_DEPLOY.md
+++ b/doc/UWSGI_DEPLOY.md
@@ -1,6 +1,8 @@
-# 使用uwsgi启动HTTP预测服务
+# Deploy HTTP service with uWSGI
 
-在提供的fit_a_line示例中，启动HTTP预测服务后会看到有以下信息：
+([简体中文](./UWSGI_DEPLOY_CN.md)|English)
+
+In fit_a_line example, after starting the HTTP prediction service, you will see the following information:
 
 ```shell
 web service address:
@@ -13,46 +15,31 @@ http://10.127.3.150:9393/uci/prediction
  * Running on http://0.0.0.0:9393/ (Press CTRL+C to quit)
 ```
 
-这里会提示启动的HTTP服务是开发模式，并不能用于生产环境的部署。Flask启动的服务环境不够稳定也无法承受大量请求的并发，实际部署过程中配合需要WSGI（Web Server Gateway Interface）使用。
+Here you will be prompted that the HTTP service started is in development mode and cannot be used for production deployment. 
+The prediction service started by Flask is not stable enough to withstand the concurrency of a large number of requests. In the actual deployment process, WSGI (Web Server Gateway Interface) is used.
 
-下面我们展示一下如何使用[uWSGI](https://github.com/unbit/uwsgi)模块来部署HTTP预测服务用于生产环境。
+Next, we will show how to use the [uWSGI](https://github.com/unbit/uwsgi) module to deploy HTTP prediction services for production environments.
 
-编写HTTP服务脚本
 
 ```python
 #uwsgi_service.py
 from paddle_serving_server.web_service import WebService
-from flask import Flask, request
 
-#配置预测服务
+#Define prediction service
 uci_service = WebService(name = "uci")
 uci_service.load_model_config("./uci_housing_model")
 uci_service.prepare_server(workdir="./workdir", port=int(9500), device="cpu")
-uci_service.run_server()
-
-#配置flask服务
-app_instance = Flask(__name__)
-@app_instance.before_first_request
-def init():
-    global uci_service
-    uci_service._launch_web_service()
-
-service_name = "/" + uci_service.name + "/prediction"
-@app_instance.route(service_name, methods=["POST"])
-def run():
-    return uci_service.get_prediction(request)
-
-#run方法用于直接调试中直接启动服务
-if __name__ == "__main__":
-    app_instance.run()
+uci_service.run_rpc_service()
+#Get flask application
+app_instance = uci_service.get_app_instance()
 ```
 
-使用uwsgi启动HTTP服务
+Start service with uWSGI
 
 ```bash
-uwsgi --http :9000 --wsgi-file uwsgi_service.py --callable app_instance --processes 4
+uwsgi --http :9393 --module uwsgi_service:app_instance
 ```
 
-使用--processes参数可以指定服务的进程数，请注意目前Serving HTTP 服务暂时不支持多线程的方式使用。
+Use the --processes parameter to specify the number of service processes. 
 
-更多uWSGI的信息请参考[uWSGI使用文档](https://uwsgi-docs.readthedocs.io/en/latest/)
+For more information about uWSGI, please refer to [uWSGI documentation](https://uwsgi-docs.readthedocs.io/en/latest/)
diff --git a/doc/UWSGI_DEPLOY_CN.md b/doc/UWSGI_DEPLOY_CN.md
new file mode 100644
index 0000000000000000000000000000000000000000..966155162f5ff90e88f9b743a3047b5d86440a46
--- /dev/null
+++ b/doc/UWSGI_DEPLOY_CN.md
@@ -0,0 +1,45 @@
+# 使用uwsgi启动HTTP预测服务
+
+(简体中文|[English](./UWSGI_DEPLOY.md))
+
+在提供的fit_a_line示例中，启动HTTP预测服务后会看到有以下信息：
+
+```shell
+web service address:
+http://10.127.3.150:9393/uci/prediction
+ * Serving Flask app "serve" (lazy loading)
+ * Environment: production
+   WARNING: This is a development server. Do not use it in a production deployment.
+   Use a production WSGI server instead.
+ * Debug mode: off
+ * Running on http://0.0.0.0:9393/ (Press CTRL+C to quit)
+```
+
+这里会提示启动的HTTP服务是开发模式，并不能用于生产环境的部署。Flask启动的服务环境不够稳定也无法承受大量请求的并发，实际部署过程中配合需要WSGI（Web Server Gateway Interface）使用。
+
+下面我们展示一下如何使用[uWSGI](https://github.com/unbit/uwsgi)模块来部署HTTP预测服务用于生产环境。
+
+编写HTTP服务脚本
+
+```python
+#uwsgi_service.py
+from paddle_serving_server.web_service import WebService
+
+#配置预测服务
+uci_service = WebService(name = "uci")
+uci_service.load_model_config("./uci_housing_model")
+uci_service.prepare_server(workdir="./workdir", port=int(9500), device="cpu")
+uci_service.run_rpc_service()
+#获取flask服务
+app_instance = uci_service.get_app_instance()
+```
+
+使用uwsgi启动HTTP服务
+
+```bash
+uwsgi --http :9393 --module uwsgi_service:app_instance
+```
+
+使用--processes参数可以指定服务的进程数。
+
+更多uWSGI的信息请参考[uWSGI使用文档](https://uwsgi-docs.readthedocs.io/en/latest/)
diff --git a/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h b/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h
index 24148e374e51cb42cb0d8d1423e0ca009e9e8294..a4d8dda71a7977185106bb1552cb8f39ef6bc50e 100644
--- a/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h
+++ b/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h
@@ -194,6 +194,12 @@ class FluidCpuAnalysisDirCore : public FluidFamilyCore {
       analysis_config.EnableMemoryOptim();
     }
 
+    if (params.enable_ir_optimization()) {
+      analysis_config.SwitchIrOptim(true);
+    } else {
+      analysis_config.SwitchIrOptim(false);
+    }
+
     AutoLock lock(GlobalPaddleCreateMutex::instance());
     _core =
         paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);
diff --git a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h
index a3fa365444a40d505b16b22e702d4a8b69699073..2fc6ae587ff26f5f05ff9332f08067ab49d06254 100644
--- a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h
+++ b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h
@@ -198,6 +198,12 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
       analysis_config.EnableMemoryOptim();
     }
 
+    if (params.enable_ir_optimization()) {
+      analysis_config.SwitchIrOptim(true);
+    } else {
+      analysis_config.SwitchIrOptim(false);
+    }
+
     AutoLock lock(GlobalPaddleCreateMutex::instance());
     _core =
         paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index c1590fb1b36de669f89711f95c4d49aedadb0c91..07699da458ab62ad1a5b9ece83547799d08f8cf7 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -19,6 +19,8 @@ endif()
 if (CLIENT)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.client.in
     ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../tools/python_tag.py
+    ${CMAKE_CURRENT_BINARY_DIR}/python_tag.py)
 endif()
 
 if (APP)
@@ -43,7 +45,8 @@ if (APP)
 add_custom_command(
         OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
         COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_app/ ${PADDLE_SERVING_BINARY_DIR}/python/
-        COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel)
+        COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
+        DEPENDS ${SERVING_APP_CORE} general_model_config_py_proto ${PY_FILES})
 add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
 endif()
 
@@ -52,6 +55,7 @@ add_custom_command(
 	OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
 	COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_client/ ${PADDLE_SERVING_BINARY_DIR}/python/
 	COMMAND ${CMAKE_COMMAND} -E copy ${SERVING_CLIENT_CORE} ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/serving_client.so
+    COMMAND env ${py_env} ${PYTHON_EXECUTABLE} python_tag.py
 	COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
 	DEPENDS ${SERVING_CLIENT_CORE} sdk_configure_py_proto ${PY_FILES})
 add_custom_target(paddle_python ALL DEPENDS serving_client ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
diff --git a/python/examples/bert/README.md b/python/examples/bert/README.md
index d598fc3b057c85d80e8d10549f7c5b0cf1e725fb..4cfa5590ffb4501c78e9e6ff886f5f82c94dd2db 100644
--- a/python/examples/bert/README.md
+++ b/python/examples/bert/README.md
@@ -71,28 +71,3 @@ set environmental variable to specify which gpus are used, the command above mea
 ```
 curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction
 ```
-
-### Benchmark
-
-Model：bert_chinese_L-12_H-768_A-12
-
-GPU：GPU V100 * 1
-
-CUDA/cudnn Version：CUDA 9.2，cudnn 7.1.4
-
-
-In the test, 10 thousand samples in the sample data are copied into 100 thousand samples. Each client thread sends a sample of the number of threads. The batch size is 1, the max_seq_len is 20(not 128 as described above), and the time unit is seconds.
-
-When the number of client threads is 4, the prediction speed can reach 432 samples per second.
-Because a single GPU can only perform serial calculations internally, increasing the number of client threads can only reduce the idle time of the GPU. Therefore, after the number of threads reaches 4, the increase in the number of threads does not improve the prediction speed.
-
-| client  thread num | prepro | client infer | op0   | op1    | op2  | postpro | total  |
-| ------------------ | ------ | ------------ | ----- | ------ | ---- | ------- | ------ |
-| 1                  | 3.05   | 290.54       | 0.37  | 239.15 | 6.43 | 0.71    | 365.63 |
-| 4                  | 0.85   | 213.66       | 0.091 | 200.39 | 1.62 | 0.2     | 231.45 |
-| 8                  | 0.42   | 223.12       | 0.043 | 110.99 | 0.8  | 0.098   | 232.05 |
-| 12                 | 0.32   | 225.26       | 0.029 | 73.87  | 0.53 | 0.078   | 231.45 |
-| 16                 | 0.23   | 227.26       | 0.022 | 55.61  | 0.4  | 0.056   | 231.9  |
-
-the following is the client thread num - latency bar chart:
-![bert benchmark](../../../doc/bert-benchmark-batch-size-1.png)
diff --git a/python/examples/bert/README_CN.md b/python/examples/bert/README_CN.md
index 7f1d2911ba4a5017137e659fe1f1367e64026de4..93ec8f2adbd9ae31489011900472a0077cb33783 100644
--- a/python/examples/bert/README_CN.md
+++ b/python/examples/bert/README_CN.md
@@ -67,27 +67,3 @@ head data-c.txt | python bert_client.py --model bert_seq128_client/serving_clien
 ```
 curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction
 ```
-
-### Benchmark
-
-模型：bert_chinese_L-12_H-768_A-12
-
-设备：GPU V100 * 1
-
-环境：CUDA 9.2，cudnn 7.1.4
-
-测试中将样例数据中的1W个样本复制为10W个样本，每个client线程发送线程数分之一个样本，batch size为1，max_seq_len为20（而不是上面的128），时间单位为秒.
-
-在client线程数为4时，预测速度可以达到432样本每秒。
-由于单张GPU内部只能串行计算，client线程增多只能减少GPU的空闲时间，因此在线程数达到4之后，线程数增多对预测速度没有提升。
-
-| client  thread num | prepro | client infer | op0   | op1    | op2  | postpro | total  |
-| ------------------ | ------ | ------------ | ----- | ------ | ---- | ------- | ------ |
-| 1                  | 3.05   | 290.54       | 0.37  | 239.15 | 6.43 | 0.71    | 365.63 |
-| 4                  | 0.85   | 213.66       | 0.091 | 200.39 | 1.62 | 0.2     | 231.45 |
-| 8                  | 0.42   | 223.12       | 0.043 | 110.99 | 0.8  | 0.098   | 232.05 |
-| 12                 | 0.32   | 225.26       | 0.029 | 73.87  | 0.53 | 0.078   | 231.45 |
-| 16                 | 0.23   | 227.26       | 0.022 | 55.61  | 0.4  | 0.056   | 231.9  |
-
-总耗时变化规律如下：  
-![bert benchmark](../../../doc/bert-benchmark-batch-size-1.png)
diff --git a/python/examples/bert/benchmark.py b/python/examples/bert/benchmark.py
index 639b717ca7a9d8fcd2a767437dc6a93c26125ecd..f1533d9710d3149a37818d3f1bc146fad6ce6537 100644
--- a/python/examples/bert/benchmark.py
+++ b/python/examples/bert/benchmark.py
@@ -22,11 +22,8 @@ import time
 from paddle_serving_client import Client
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args, show_latency
-from batching import pad_batch_data
-import tokenization
-import requests
-import json
-from bert_reader import BertReader
+from paddle_serving_app.reader import ChineseBertReader
+
 args = benchmark_args()
 
 
@@ -45,8 +42,7 @@ def single_func(idx, resource):
         latency_list = []
 
     if args.request == "rpc":
-        reader = BertReader(vocab_file="vocab.txt", max_seq_len=20)
-
+        reader = ChineseBertReader({"max_seq_len": 128})
         fetch = ["pooled_output"]
         client = Client()
         client.load_client_config(args.model)
@@ -78,7 +74,10 @@ def single_func(idx, resource):
     elif args.request == "http":
         raise ("not implemented")
     end = time.time()
-    return [[end - start], latency_list]
+    if latency_flags:
+        return [[end - start], latency_list]
+    else:
+        return [[end - start]]
 
 
 if __name__ == '__main__':
@@ -86,7 +85,7 @@ if __name__ == '__main__':
     endpoint_list = [
         "127.0.0.1:9292", "127.0.0.1:9293", "127.0.0.1:9294", "127.0.0.1:9295"
     ]
-    turns = 1000
+    turns = 10
     start = time.time()
     result = multi_thread_runner.run(
         single_func, args.thread, {"endpoint": endpoint_list,
diff --git a/python/examples/bert/benchmark.sh b/python/examples/bert/benchmark.sh
index 960ad48c44e86f44c4399792f1f4a664ef750ad8..7ee5f32e9e5d89a836f8962a256bcdf7bf0b62e2 100644
--- a/python/examples/bert/benchmark.sh
+++ b/python/examples/bert/benchmark.sh
@@ -3,25 +3,25 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3
 export FLAGS_profile_server=1
 export FLAGS_profile_client=1
 export FLAGS_serving_latency=1
-python -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog &
+python3 -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim False --ir_optim True 2> elog > stdlog &
 
 sleep 5
 
 #warm up
-$PYTHONROOT/bin/python benchmark.py --thread 8 --batch_size 1 --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
+python3 benchmark.py --thread 8 --batch_size 1 --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
 
 for thread_num in 4 8 16
 do
 for batch_size in 1 4 16 64 256
 do
-    $PYTHONROOT/bin/python benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
+    python3 benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
     echo "model name :" $1
     echo "thread num :" $thread_num
     echo "batch size :" $batch_size
     echo "=================Done===================="
     echo "model name :$1" >> profile_log_$1
     echo "batch size :$batch_size" >> profile_log_$1
-    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log_$1
+    python3 ../util/show_profile.py profile $thread_num >> profile_log_$1
     tail -n 8 profile >> profile_log_$1
     echo "" >> profile_log_$1
 done
diff --git a/python/examples/bert/bert_client.py b/python/examples/bert/bert_client.py
index b33a80d88fcc28200a61bc6125afcea0a0352dab..b72d17f142c65bafe8ef13e1a963aacce6b3e821 100644
--- a/python/examples/bert/bert_client.py
+++ b/python/examples/bert/bert_client.py
@@ -25,7 +25,7 @@ from paddlehub.common.logger import logger
 import socket
 from paddle_serving_client import Client
 from paddle_serving_client.utils import benchmark_args
-from paddle_serving_app import ChineseBertReader
+from paddle_serving_app.reader import ChineseBertReader
 
 args = benchmark_args()
 
diff --git a/python/examples/bert/bert_web_service.py b/python/examples/bert/bert_web_service.py
index 8db64e5eb792a7365ed739bbfb05bf38fd8a0da1..b1898b2cc0ee690dd075958944a56fed27dce29a 100644
--- a/python/examples/bert/bert_web_service.py
+++ b/python/examples/bert/bert_web_service.py
@@ -14,19 +14,22 @@
 # limitations under the License.
 # pylint: disable=doc-string-missing
 from paddle_serving_server_gpu.web_service import WebService
-from bert_reader import BertReader
+from paddle_serving_app.reader import ChineseBertReader
 import sys
 import os
 
 
 class BertService(WebService):
     def load(self):
-        self.reader = BertReader(vocab_file="vocab.txt", max_seq_len=128)
+        self.reader = ChineseBertReader({
+            "vocab_file": "vocab.txt",
+            "max_seq_len": 128
+        })
 
-    def preprocess(self, feed={}, fetch=[]):
-        feed_res = [{
-            "words": self.reader.process(ins["words"].encode("utf-8"))
-        } for ins in feed]
+    def preprocess(self, feed=[], fetch=[]):
+        feed_res = [
+            self.reader.process(ins["words"].encode("utf-8")) for ins in feed
+        ]
         return feed_res, fetch
 
 
@@ -37,5 +40,5 @@ gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"]
 bert_service.set_gpus(gpu_ids)
 bert_service.prepare_server(
     workdir="workdir", port=int(sys.argv[2]), device="gpu")
-bert_service.run_server()
-bert_service.run_flask()
+bert_service.run_rpc_service()
+bert_service.run_web_service()
diff --git a/python/examples/bert/test_multi_fetch_client.py b/python/examples/bert/test_multi_fetch_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..c15c4d4deaf282c432ff0990ee03c6e80daeee74
--- /dev/null
+++ b/python/examples/bert/test_multi_fetch_client.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle_serving_client import Client
+from paddle_serving_app.reader import ChineseBertReader
+import sys
+
+client = Client()
+client.load_client_config("./bert_seq32_client/serving_client_conf.prototxt")
+client.connect(["127.0.0.1:9292"])
+
+reader = ChineseBertReader({"max_seq_len": 32})
+fetch = ["sequence_10", "sequence_12", "pooled_output"]
+expected_shape = {
+    "sequence_10": (4, 32, 768),
+    "sequence_12": (4, 32, 768),
+    "pooled_output": (4, 768)
+}
+batch_size = 4
+feed_batch = []
+
+for line in sys.stdin:
+    feed = reader.process(line)
+    if len(feed_batch) < batch_size:
+        feed_batch.append(feed)
+    else:
+        fetch_map = client.predict(feed=feed_batch, fetch=fetch)
+        feed_batch = []
+        for var_name in fetch:
+            if fetch_map[var_name].shape != expected_shape[var_name]:
+                print("fetch var {} shape error.".format(var_name))
+                sys.exit(1)
diff --git a/python/examples/cascade_rcnn/README.md b/python/examples/cascade_rcnn/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..87617a842fcdb78d039b71634521c9d370f755fa
--- /dev/null
+++ b/python/examples/cascade_rcnn/README.md
@@ -0,0 +1,21 @@
+# Cascade RCNN model on Paddle Serving
+
+([简体中文](./README_CN.md)|English)
+
+### Get The Cascade RCNN Model
+```
+sh get_data.sh
+```
+If you want to have more detection models, please refer to [Paddle Detection Model Zoo](https://github.com/PaddlePaddle/PaddleDetection/blob/release/0.2/docs/MODEL_ZOO_cn.md)
+
+### Start the service
+```
+python -m paddle_serving_server_gpu.serve --model serving_server --port 9292 --gpu_id 0
+```
+
+### Perform prediction
+```
+python test_client.py 
+```
+
+Image with bounding boxes and json result would be saved in `output` folder.
diff --git a/python/examples/cascade_rcnn/README_CN.md b/python/examples/cascade_rcnn/README_CN.md
new file mode 100644
index 0000000000000000000000000000000000000000..a37cb47331ce516c15587c6b2d8b9072c4d878f1
--- /dev/null
+++ b/python/examples/cascade_rcnn/README_CN.md
@@ -0,0 +1,21 @@
+# 使用Paddle Serving部署Cascade RCNN模型
+
+(简体中文|[English](./README.md))
+
+## 获得Cascade RCNN模型
+```
+sh get_data.sh
+```
+如果你想要更多的检测模型，请参考[Paddle检测模型库](https://github.com/PaddlePaddle/PaddleDetection/blob/release/0.2/docs/MODEL_ZOO_cn.md)
+
+### 启动服务
+```
+python -m paddle_serving_server_gpu.serve --model serving_server --port 9292 --gpu_id 0
+```
+
+### 执行预测
+```
+python test_client.py
+```
+
+客户端已经为图片做好了后处理，在`output`文件夹下存放各个框的json格式信息还有后处理结果图片。
diff --git a/python/examples/cascade_rcnn/get_data.sh b/python/examples/cascade_rcnn/get_data.sh
new file mode 100644
index 0000000000000000000000000000000000000000..0aa9c7dc340367790eb52f5cc0074cb5d6fd0d05
--- /dev/null
+++ b/python/examples/cascade_rcnn/get_data.sh
@@ -0,0 +1,2 @@
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/cascade_rcnn_r50_fpx_1x_serving.tar.gz
+tar xf cascade_rcnn_r50_fpx_1x_serving.tar.gz
diff --git a/python/examples/criteo_ctr_with_cube/README.md b/python/examples/criteo_ctr_with_cube/README.md
index 25f171f5b07a6c58de68809ae6092c85f92b8116..02125422af7e7ce53a05a1eff9a43159034a79dc 100755
--- a/python/examples/criteo_ctr_with_cube/README.md
+++ b/python/examples/criteo_ctr_with_cube/README.md
@@ -2,16 +2,6 @@
 
 ([简体中文](./README_CN.md)|English)
 
-### Compile Source Code
-in the root directory of this git project
-```
-mkdir build_server
-cd build_server
-cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib64/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON ..
-make -j10
-make install -j10
-```
-
 ### Get Sample Dataset
 
 go to directory `python/examples/criteo_ctr_with_cube`
@@ -31,7 +21,9 @@ the model will be in ./ctr_server_model_kv and ./ctr_client_config.
 
 ### Start Sparse Parameter Indexing Service
 ```
-cp ../../../build_server/output/bin/cube* ./cube/
+wget https://paddle-serving.bj.bcebos.com/others/cube_app.tar.gz
+tar xf cube_app.tar.gz
+mv cube_app/cube* ./cube/
 sh cube_prepare.sh &
 ```
 
diff --git a/python/examples/criteo_ctr_with_cube/README_CN.md b/python/examples/criteo_ctr_with_cube/README_CN.md
index 47279cc8c2dd781324fe4c73d98f58cbd69319c9..3b6f812ca53bd435e9b11b59e2a459c46ee3f864 100644
--- a/python/examples/criteo_ctr_with_cube/README_CN.md
+++ b/python/examples/criteo_ctr_with_cube/README_CN.md
@@ -1,16 +1,6 @@
 ## 带稀疏参数索引服务的CTR预测服务
 (简体中文|[English](./README.md))
 
-### 编译源代码
-在本项目的根目录下，执行
-```
-mkdir build_server
-cd build_server
-cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib64/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON ..
-make -j10
-make install -j10
-```
-
 ### 获取样例数据
 进入目录 `python/examples/criteo_ctr_with_cube`
 ```
@@ -29,7 +19,9 @@ mv models/data ./cube/
 
 ### 启动稀疏参数索引服务
 ```
-cp ../../../build_server/output/bin/cube* ./cube/
+wget https://paddle-serving.bj.bcebos.com/others/cube_app.tar.gz
+tar xf cube_app.tar.gz
+mv cube_app/cube* ./cube/
 sh cube_prepare.sh &
 ```
 
diff --git a/python/examples/deeplabv3/N0060.jpg b/python/examples/deeplabv3/N0060.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..feac2837eaa5ae5db414d9769a0c5a830dde268d
Binary files /dev/null and b/python/examples/deeplabv3/N0060.jpg differ
diff --git a/python/examples/deeplabv3/README.md b/python/examples/deeplabv3/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..3eb5c84e2d5be7c7a1448940c758e60d77bd56e6
--- /dev/null
+++ b/python/examples/deeplabv3/README.md
@@ -0,0 +1,22 @@
+# Image Segmentation
+
+## Get Model
+
+```
+python -m paddle_serving_app.package --get_model deeplabv3
+tar -xzvf deeplabv3.tar.gz
+```
+
+## RPC Service
+
+### Start Service
+
+```
+python -m paddle_serving_server_gpu.serve --model deeplabv3_server --gpu_ids 0 --port 9494
+```
+
+### Client Prediction
+
+```
+python deeplabv3_client.py
+```
diff --git a/python/examples/deeplabv3/README_CN.md b/python/examples/deeplabv3/README_CN.md
new file mode 100644
index 0000000000000000000000000000000000000000..a25bb2d059df49568056664493c1c96b999005b2
--- /dev/null
+++ b/python/examples/deeplabv3/README_CN.md
@@ -0,0 +1,21 @@
+# 图像分割
+
+## 获取模型
+
+```
+python -m paddle_serving_app.package --get_model deeplabv3
+tar -xzvf deeplabv3.tar.gz
+```
+
+## RPC 服务
+
+### 启动服务端
+
+```
+python -m paddle_serving_server_gpu.serve --model deeplabv3_server --gpu_ids 0 --port 9494
+```
+
+### 客户端预测
+
+```
+python deeplabv3_client.py
diff --git a/python/examples/deeplabv3/deeplabv3_client.py b/python/examples/deeplabv3/deeplabv3_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..77e25d5f5a24d0aa1dad8939c1e7845eaf5e4122
--- /dev/null
+++ b/python/examples/deeplabv3/deeplabv3_client.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle_serving_client import Client
+from paddle_serving_app.reader import Sequential, File2Image, Resize, Transpose, BGR2RGB, SegPostprocess
+import sys
+import cv2
+
+client = Client()
+client.load_client_config("deeplabv3_client/serving_client_conf.prototxt")
+client.connect(["127.0.0.1:9494"])
+
+preprocess = Sequential(
+    [File2Image(), Resize(
+        (512, 512), interpolation=cv2.INTER_LINEAR)])
+
+postprocess = SegPostprocess(2)
+
+filename = "N0060.jpg"
+im = preprocess(filename)
+fetch_map = client.predict(feed={"image": im}, fetch=["output"])
+fetch_map["filename"] = filename
+postprocess(fetch_map)
diff --git a/python/examples/faster_rcnn_model/README.md b/python/examples/faster_rcnn_model/README.md
index c1d3d40b054fb362bd20c59a9a7fc4d09e89f31b..e31f734e2b8f04ee4cd35258f9da81672b2caf88 100644
--- a/python/examples/faster_rcnn_model/README.md
+++ b/python/examples/faster_rcnn_model/README.md
@@ -12,8 +12,8 @@ If you want to have more detection models, please refer to [Paddle Detection Mod
 ### Start the service
 ```
 tar xf faster_rcnn_model.tar.gz
-mv faster_rcnn_model/pddet *.
-GLOG_v=2 python -m paddle_serving_server_gpu.serve --model pddet_serving_model --port 9494 --gpu_id 0
+mv faster_rcnn_model/pddet* .
+GLOG_v=2 python -m paddle_serving_server_gpu.serve --model pddet_serving_model --port 9494 --gpu_ids 0
 ```
 
 ### Perform prediction
diff --git a/python/examples/faster_rcnn_model/README_CN.md b/python/examples/faster_rcnn_model/README_CN.md
index a2c3618f071a3650d50c791595bc04ba0c1d378a..3ddccf9e63043e797c9e261c1f26ebe774adb81c 100644
--- a/python/examples/faster_rcnn_model/README_CN.md
+++ b/python/examples/faster_rcnn_model/README_CN.md
@@ -13,7 +13,7 @@ wget https://paddle-serving.bj.bcebos.com/pddet_demo/infer_cfg.yml
 ```
 tar xf faster_rcnn_model.tar.gz
 mv faster_rcnn_model/pddet* ./
-GLOG_v=2 python -m paddle_serving_server_gpu.serve --model pddet_serving_model --port 9494 --gpu_id 0
+GLOG_v=2 python -m paddle_serving_server_gpu.serve --model pddet_serving_model --port 9494 --gpu_ids 0
 ```
 
 ### 执行预测
diff --git a/python/examples/faster_rcnn_model/label_list.txt b/python/examples/faster_rcnn_model/label_list.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d7d43a94adf73208f997f0efd6581bef11ca734e
--- /dev/null
+++ b/python/examples/faster_rcnn_model/label_list.txt
@@ -0,0 +1,81 @@
+background
+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+dining table
+toilet
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
diff --git a/python/examples/faster_rcnn_model/test_client.py b/python/examples/faster_rcnn_model/test_client.py
index ae2e5b8f6e961d965555d8f268f38be14c0263d0..ce577a3c4396d33af33e45694a573f8b1cbcb52b 100755
--- a/python/examples/faster_rcnn_model/test_client.py
+++ b/python/examples/faster_rcnn_model/test_client.py
@@ -13,21 +13,29 @@
 # limitations under the License.
 
 from paddle_serving_client import Client
+from paddle_serving_app.reader import *
 import sys
-import os
-import time
-from paddle_serving_app.reader.pddet import Detection
 import numpy as np
 
-py_version = sys.version_info[0]
+preprocess = Sequential([
+    File2Image(), BGR2RGB(), Div(255.0),
+    Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False),
+    Resize(640, 640), Transpose((2, 0, 1))
+])
 
-feed_var_names = ['image', 'im_shape', 'im_info']
-fetch_var_names = ['multiclass_nms']
-pddet = Detection(config_path=sys.argv[2], output_dir="./output")
-feed_dict = pddet.preprocess(feed_var_names, sys.argv[3])
+postprocess = RCNNPostprocess("label_list.txt", "output")
 client = Client()
+
 client.load_client_config(sys.argv[1])
 client.connect(['127.0.0.1:9494'])
-fetch_map = client.predict(feed=feed_dict, fetch=fetch_var_names)
-outs = fetch_map.values()
-pddet.postprocess(fetch_map, fetch_var_names)
+
+im = preprocess(sys.argv[3])
+fetch_map = client.predict(
+    feed={
+        "image": im,
+        "im_info": np.array(list(im.shape[1:]) + [1.0]),
+        "im_shape": np.array(list(im.shape[1:]) + [1.0])
+    },
+    fetch=["multiclass_nms"])
+fetch_map["image"] = sys.argv[3]
+postprocess(fetch_map)
diff --git a/python/examples/fit_a_line/test_multi_process_client.py b/python/examples/fit_a_line/test_multi_process_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..5272d095df5e74f25ce0e36ca22c8d6d1884f5f0
--- /dev/null
+++ b/python/examples/fit_a_line/test_multi_process_client.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle_serving_client import Client
+from paddle_serving_client.utils import MultiThreadRunner
+import paddle
+
+
+def single_func(idx, resource):
+    client = Client()
+    client.load_client_config(
+        "./uci_housing_client/serving_client_conf.prototxt")
+    client.connect(["127.0.0.1:9293", "127.0.0.1:9292"])
+    x = [
+        0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584,
+        0.6283, 0.4919, 0.1856, 0.0795, -0.0332
+    ]
+    for i in range(1000):
+        fetch_map = client.predict(feed={"x": x}, fetch=["price"])
+        if fetch_map is None:
+            return [[None]]
+    return [[0]]
+
+
+multi_thread_runner = MultiThreadRunner()
+thread_num = 4
+result = multi_thread_runner.run(single_func, thread_num, {})
+if None in result[0]:
+    exit(1)
diff --git a/python/examples/imagenet/README.md b/python/examples/imagenet/README.md
index 5eba4892f5c394eaff999c1b36c457fc9c80b2d6..415818e715e22e97399c710a61f2463fd166bd19 100644
--- a/python/examples/imagenet/README.md
+++ b/python/examples/imagenet/README.md
@@ -8,34 +8,42 @@ The example uses the ResNet50_vd model to perform the imagenet 1000 classificati
 ```
 sh get_model.sh
 ```
-### HTTP Infer
+
+### Install preprocess module
+
+```
+pip install paddle_serving_app
+```
+
+### HTTP Service
 
 launch server side
 ```
-python image_classification_service.py ResNet50_vd_model workdir 9393 #cpu inference service
+python resnet50_web_service.py ResNet50_vd_model cpu 9696 #cpu inference service
 ```
 ```
-python image_classification_service_gpu.py ResNet50_vd_model workdir 9393 #gpu inference service
+python resnet50_web_service.py ResNet50_vd_model gpu 9696 #gpu inference service
 ```
 
 
 client send inference request
 ```
-python image_http_client.py
+curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"image": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://127.0.0.1:9696/image/prediction
 ```
-### RPC Infer
+
+### RPC Service
 
 launch server side
 ```
-python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9393 #cpu inference service
+python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu inference service
 ```
 
 ```
-python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9393 --gpu_ids 0 #gpu inference service
+python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu inference service
 ```
 
 client send inference request
 ```
-python image_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
+python resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
 ```
-*the port of server side in this example is 9393, the sample data used by client side is in the folder ./data. These parameter can be modified in practice*
+*the port of server side in this example is 9696
diff --git a/python/examples/imagenet/README_CN.md b/python/examples/imagenet/README_CN.md
index 074709a3705d83367f9cdce7cd6ba426167ccd32..081cff528c393ecb5534ec679d6e63739f720f20 100644
--- a/python/examples/imagenet/README_CN.md
+++ b/python/examples/imagenet/README_CN.md
@@ -8,34 +8,42 @@
 ```
 sh get_model.sh
 ```
-### 执行HTTP预测服务
+
+### 安装数据预处理模块
+
+```
+pip install paddle_serving_app
+```
+
+### HTTP服务
 
 启动server端
 ```
-python image_classification_service.py ResNet50_vd_model workdir 9393 #cpu预测服务
+python resnet50_web_service.py ResNet50_vd_model cpu 9696 #cpu预测服务
 ```
 ```
-python image_classification_service_gpu.py ResNet50_vd_model workdir 9393 #gpu预测服务
+python resnet50_web_service.py ResNet50_vd_model gpu 9696 #gpu预测服务
 ```
 
 
-client端进行预测
+发送HTTP POST请求
 ```
-python image_http_client.py
+curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"image": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://127.0.0.1:9696/image/prediction
 ```
-### 执行RPC预测服务
+
+### RPC服务
 
 启动server端
 ```
-python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9393 #cpu预测服务
+python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu预测服务
 ```
 
 ```
-python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9393 --gpu_ids 0 #gpu预测服务
+python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu预测服务
 ```
 
 client端进行预测
 ```
-python image_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
+python resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
 ```
-*server端示例中服务端口为9393端口，client端示例中数据来自./data文件夹，server端地址为本地9393端口，可根据实际情况更改脚本。*
+*server端示例中服务端口为9696端口
diff --git a/python/examples/imagenet/benchmark.py b/python/examples/imagenet/benchmark.py
index e531425770cbf9102b7ebd2f5b082c5c4aa14e71..ac7ba8c333d25fb23bfc7695105315bfaa4e76ee 100644
--- a/python/examples/imagenet/benchmark.py
+++ b/python/examples/imagenet/benchmark.py
@@ -19,15 +19,22 @@ from __future__ import unicode_literals, absolute_import
 import os
 import sys
 import time
+import requests
+import json
+import base64
 from paddle_serving_client import Client
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args
-import requests
-import json
-from image_reader import ImageReader
+from paddle_serving_app.reader import Sequential, URL2Image, Resize
+from paddle_serving_app.reader import CenterCrop, RGB2BGR, Transpose, Div, Normalize
 
 args = benchmark_args()
 
+seq_preprocess = Sequential([
+    URL2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
+    Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
+])
+
 
 def single_func(idx, resource):
     file_list = []
@@ -36,6 +43,10 @@ def single_func(idx, resource):
     img_list = []
     for i in range(1000):
         img_list.append(open("./image_data/n01440764/" + file_list[i]).read())
+    profile_flags = False
+    if "FLAGS_profile_client" in os.environ and os.environ[
+            "FLAGS_profile_client"]:
+        profile_flags = True
     if args.request == "rpc":
         reader = ImageReader()
         fetch = ["score"]
@@ -46,16 +57,36 @@ def single_func(idx, resource):
         for i in range(1000):
             if args.batch_size >= 1:
                 feed_batch = []
+                i_start = time.time()
                 for bi in range(args.batch_size):
-                    img = reader.process_image(img_list[i])
-                    img = img.reshape(-1)
+                    img = seq_preprocess(img_list[i])
                     feed_batch.append({"image": img})
+                i_end = time.time()
+                if profile_flags:
+                    print("PROFILE\tpid:{}\timage_pre_0:{} image_pre_1:{}".
+                          format(os.getpid(),
+                                 int(round(i_start * 1000000)),
+                                 int(round(i_end * 1000000))))
+
                 result = client.predict(feed=feed_batch, fetch=fetch)
             else:
                 print("unsupport batch size {}".format(args.batch_size))
 
     elif args.request == "http":
-        raise ("no batch predict for http")
+        py_version = 2
+        server = "http://" + resource["endpoint"][idx % len(resource[
+            "endpoint"])] + "/image/prediction"
+        start = time.time()
+        for i in range(1000):
+            if py_version == 2:
+                image = base64.b64encode(
+                    open("./image_data/n01440764/" + file_list[i]).read())
+            else:
+                image = base64.b64encode(open(image_path, "rb").read()).decode(
+                    "utf-8")
+            req = json.dumps({"feed": [{"image": image}], "fetch": ["score"]})
+            r = requests.post(
+                server, data=req, headers={"Content-Type": "application/json"})
     end = time.time()
     return [[end - start]]
 
diff --git a/python/examples/imagenet/benchmark_batch.py.lprof b/python/examples/imagenet/benchmark_batch.py.lprof
new file mode 100644
index 0000000000000000000000000000000000000000..7ff4f1411ded79aba3390e606193ec4fedacf06f
Binary files /dev/null and b/python/examples/imagenet/benchmark_batch.py.lprof differ
diff --git a/python/examples/imagenet/daisy.jpg b/python/examples/imagenet/daisy.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7edeca63e5f32e68550ef720d81f59df58a8eabc
Binary files /dev/null and b/python/examples/imagenet/daisy.jpg differ
diff --git a/python/examples/imagenet/flower.jpg b/python/examples/imagenet/flower.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..903f812c4ad87e7f608e895a8e6d26d596cc0b48
Binary files /dev/null and b/python/examples/imagenet/flower.jpg differ
diff --git a/python/examples/imagenet/image_classification_service.py b/python/examples/imagenet/image_classification_service.py
deleted file mode 100644
index 81169d6bdafa7024f2b997c48c0abdc04411e391..0000000000000000000000000000000000000000
--- a/python/examples/imagenet/image_classification_service.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle_serving_server.web_service import WebService
-import sys
-import cv2
-import base64
-import numpy as np
-from paddle_serving_app import ImageReader
-
-
-class ImageService(WebService):
-    def preprocess(self, feed={}, fetch=[]):
-        reader = ImageReader()
-        feed_batch = []
-        for ins in feed:
-            if "image" not in ins:
-                raise ("feed data error!")
-            sample = base64.b64decode(ins["image"])
-            img = reader.process_image(sample)
-            feed_batch.append({"image": img})
-        return feed_batch, fetch
-
-
-image_service = ImageService(name="image")
-image_service.load_model_config(sys.argv[1])
-image_service.prepare_server(
-    workdir=sys.argv[2], port=int(sys.argv[3]), device="cpu")
-image_service.run_server()
-image_service.run_flask()
diff --git a/python/examples/imagenet/image_classification_service_gpu.py b/python/examples/imagenet/image_classification_service_gpu.py
deleted file mode 100644
index 7cb973547982877a50e86062fe187233a32065e6..0000000000000000000000000000000000000000
--- a/python/examples/imagenet/image_classification_service_gpu.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import cv2
-import base64
-import numpy as np
-from paddle_serving_app import ImageReader
-from paddle_serving_server_gpu.web_service import WebService
-
-
-class ImageService(WebService):
-    def preprocess(self, feed={}, fetch=[]):
-        reader = ImageReader()
-        feed_batch = []
-        for ins in feed:
-            if "image" not in ins:
-                raise ("feed data error!")
-            sample = base64.b64decode(ins["image"])
-            img = reader.process_image(sample)
-            feed_batch.append({"image": img})
-        return feed_batch, fetch
-
-
-image_service = ImageService(name="image")
-image_service.load_model_config(sys.argv[1])
-image_service.set_gpus("0,1")
-image_service.prepare_server(
-    workdir=sys.argv[2], port=int(sys.argv[3]), device="gpu")
-image_service.run_server()
-image_service.run_flask()
diff --git a/python/examples/imagenet/image_http_client.py b/python/examples/imagenet/image_http_client.py
deleted file mode 100644
index 61b021be246dc4b843e608dcea21418419731b49..0000000000000000000000000000000000000000
--- a/python/examples/imagenet/image_http_client.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import requests
-import base64
-import json
-import time
-import os
-import sys
-
-py_version = sys.version_info[0]
-
-
-def predict(image_path, server):
-    if py_version == 2:
-        image = base64.b64encode(open(image_path).read())
-    else:
-        image = base64.b64encode(open(image_path, "rb").read()).decode("utf-8")
-    req = json.dumps({"feed": [{"image": image}], "fetch": ["score"]})
-    r = requests.post(
-        server, data=req, headers={"Content-Type": "application/json"})
-    try:
-        print(r.json()["result"]["score"])
-    except ValueError:
-        print(r.text)
-    return r
-
-
-if __name__ == "__main__":
-    server = "http://127.0.0.1:9393/image/prediction"
-    image_list = os.listdir("./image_data/n01440764/")
-    start = time.time()
-    for img in image_list:
-        image_file = "./image_data/n01440764/" + img
-        res = predict(image_file, server)
-    end = time.time()
-    print(end - start)
diff --git a/python/examples/imagenet/image_reader.py b/python/examples/imagenet/image_reader.py
deleted file mode 100644
index 843d9417ba37601232cb640d55f1d03f38cd7f76..0000000000000000000000000000000000000000
--- a/python/examples/imagenet/image_reader.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import cv2
-import numpy as np
-
-
-class ImageReader():
-    def __init__(self):
-        self.image_mean = [0.485, 0.456, 0.406]
-        self.image_std = [0.229, 0.224, 0.225]
-        self.image_shape = [3, 224, 224]
-        self.resize_short_size = 256
-        self.interpolation = None
-
-    def resize_short(self, img, target_size, interpolation=None):
-        """resize image
-
-        Args:
-            img: image data
-            target_size: resize short target size
-            interpolation: interpolation mode
-
-        Returns:
-            resized image data
-        """
-        percent = float(target_size) / min(img.shape[0], img.shape[1])
-        resized_width = int(round(img.shape[1] * percent))
-        resized_height = int(round(img.shape[0] * percent))
-        if interpolation:
-            resized = cv2.resize(
-                img, (resized_width, resized_height),
-                interpolation=interpolation)
-        else:
-            resized = cv2.resize(img, (resized_width, resized_height))
-        return resized
-
-    def crop_image(self, img, target_size, center):
-        """crop image
-
-        Args:
-            img: images data
-            target_size: crop target size
-            center: crop mode
-
-        Returns:
-            img: cropped image data
-        """
-        height, width = img.shape[:2]
-        size = target_size
-        if center == True:
-            w_start = (width - size) // 2
-            h_start = (height - size) // 2
-        else:
-            w_start = np.random.randint(0, width - size + 1)
-            h_start = np.random.randint(0, height - size + 1)
-        w_end = w_start + size
-        h_end = h_start + size
-        img = img[h_start:h_end, w_start:w_end, :]
-        return img
-
-    def process_image(self, sample):
-        """ process_image """
-        mean = self.image_mean
-        std = self.image_std
-        crop_size = self.image_shape[1]
-
-        data = np.fromstring(sample, np.uint8)
-        img = cv2.imdecode(data, cv2.IMREAD_COLOR)
-
-        if img is None:
-            print("img is None, pass it.")
-            return None
-
-        if crop_size > 0:
-            target_size = self.resize_short_size
-            img = self.resize_short(
-                img, target_size, interpolation=self.interpolation)
-            img = self.crop_image(img, target_size=crop_size, center=True)
-
-        img = img[:, :, ::-1]
-
-        img = img.astype('float32').transpose((2, 0, 1)) / 255
-        img_mean = np.array(mean).reshape((3, 1, 1))
-        img_std = np.array(std).reshape((3, 1, 1))
-        img -= img_mean
-        img /= img_std
-        return img
diff --git a/python/examples/imagenet/imagenet.label b/python/examples/imagenet/imagenet.label
new file mode 100644
index 0000000000000000000000000000000000000000..d7146735146ea1894173d6d0e20fb90af36be849
--- /dev/null
+++ b/python/examples/imagenet/imagenet.label
@@ -0,0 +1,1000 @@
+tench, Tinca tinca,
+goldfish, Carassius auratus,
+great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias,
+tiger shark, Galeocerdo cuvieri,
+hammerhead, hammerhead shark,
+electric ray, crampfish, numbfish, torpedo,
+stingray,
+cock,
+hen,
+ostrich, Struthio camelus,
+brambling, Fringilla montifringilla,
+goldfinch, Carduelis carduelis,
+house finch, linnet, Carpodacus mexicanus,
+junco, snowbird,
+indigo bunting, indigo finch, indigo bird, Passerina cyanea,
+robin, American robin, Turdus migratorius,
+bulbul,
+jay,
+magpie,
+chickadee,
+water ouzel, dipper,
+kite,
+bald eagle, American eagle, Haliaeetus leucocephalus,
+vulture,
+great grey owl, great gray owl, Strix nebulosa,
+European fire salamander, Salamandra salamandra,
+common newt, Triturus vulgaris,
+eft,
+spotted salamander, Ambystoma maculatum,
+axolotl, mud puppy, Ambystoma mexicanum,
+bullfrog, Rana catesbeiana,
+tree frog, tree-frog,
+tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui,
+loggerhead, loggerhead turtle, Caretta caretta,
+leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea,
+mud turtle,
+terrapin,
+box turtle, box tortoise,
+banded gecko,
+common iguana, iguana, Iguana iguana,
+American chameleon, anole, Anolis carolinensis,
+whiptail, whiptail lizard,
+agama,
+frilled lizard, Chlamydosaurus kingi,
+alligator lizard,
+Gila monster, Heloderma suspectum,
+green lizard, Lacerta viridis,
+African chameleon, Chamaeleo chamaeleon,
+Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis,
+African crocodile, Nile crocodile, Crocodylus niloticus,
+American alligator, Alligator mississipiensis,
+triceratops,
+thunder snake, worm snake, Carphophis amoenus,
+ringneck snake, ring-necked snake, ring snake,
+hognose snake, puff adder, sand viper,
+green snake, grass snake,
+king snake, kingsnake,
+garter snake, grass snake,
+water snake,
+vine snake,
+night snake, Hypsiglena torquata,
+boa constrictor, Constrictor constrictor,
+rock python, rock snake, Python sebae,
+Indian cobra, Naja naja,
+green mamba,
+sea snake,
+horned viper, cerastes, sand viper, horned asp, Cerastes cornutus,
+diamondback, diamondback rattlesnake, Crotalus adamanteus,
+sidewinder, horned rattlesnake, Crotalus cerastes,
+trilobite,
+harvestman, daddy longlegs, Phalangium opilio,
+scorpion,
+black and gold garden spider, Argiope aurantia,
+barn spider, Araneus cavaticus,
+garden spider, Aranea diademata,
+black widow, Latrodectus mactans,
+tarantula,
+wolf spider, hunting spider,
+tick,
+centipede,
+black grouse,
+ptarmigan,
+ruffed grouse, partridge, Bonasa umbellus,
+prairie chicken, prairie grouse, prairie fowl,
+peacock,
+quail,
+partridge,
+African grey, African gray, Psittacus erithacus,
+macaw,
+sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita,
+lorikeet,
+coucal,
+bee eater,
+hornbill,
+hummingbird,
+jacamar,
+toucan,
+drake,
+red-breasted merganser, Mergus serrator,
+goose,
+black swan, Cygnus atratus,
+tusker,
+echidna, spiny anteater, anteater,
+platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus,
+wallaby, brush kangaroo,
+koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus,
+wombat,
+jellyfish,
+sea anemone, anemone,
+brain coral,
+flatworm, platyhelminth,
+nematode, nematode worm, roundworm,
+conch,
+snail,
+slug,
+sea slug, nudibranch,
+chiton, coat-of-mail shell, sea cradle, polyplacophore,
+chambered nautilus, pearly nautilus, nautilus,
+Dungeness crab, Cancer magister,
+rock crab, Cancer irroratus,
+fiddler crab,
+king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica,
+American lobster, Northern lobster, Maine lobster, Homarus americanus,
+spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish,
+crayfish, crawfish, crawdad, crawdaddy,
+hermit crab,
+isopod,
+white stork, Ciconia ciconia,
+black stork, Ciconia nigra,
+spoonbill,
+flamingo,
+little blue heron, Egretta caerulea,
+American egret, great white heron, Egretta albus,
+bittern,
+crane,
+limpkin, Aramus pictus,
+European gallinule, Porphyrio porphyrio,
+American coot, marsh hen, mud hen, water hen, Fulica americana,
+bustard,
+ruddy turnstone, Arenaria interpres,
+red-backed sandpiper, dunlin, Erolia alpina,
+redshank, Tringa totanus,
+dowitcher,
+oystercatcher, oyster catcher,
+pelican,
+king penguin, Aptenodytes patagonica,
+albatross, mollymawk,
+grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus,
+killer whale, killer, orca, grampus, sea wolf, Orcinus orca,
+dugong, Dugong dugon,
+sea lion,
+Chihuahua,
+Japanese spaniel,
+Maltese dog, Maltese terrier, Maltese,
+Pekinese, Pekingese, Peke,
+Shih-Tzu,
+Blenheim spaniel,
+papillon,
+toy terrier,
+Rhodesian ridgeback,
+Afghan hound, Afghan,
+basset, basset hound,
+beagle,
+bloodhound, sleuthhound,
+bluetick,
+black-and-tan coonhound,
+Walker hound, Walker foxhound,
+English foxhound,
+redbone,
+borzoi, Russian wolfhound,
+Irish wolfhound,
+Italian greyhound,
+whippet,
+Ibizan hound, Ibizan Podenco,
+Norwegian elkhound, elkhound,
+otterhound, otter hound,
+Saluki, gazelle hound,
+Scottish deerhound, deerhound,
+Weimaraner,
+Staffordshire bullterrier, Staffordshire bull terrier,
+American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier,
+Bedlington terrier,
+Border terrier,
+Kerry blue terrier,
+Irish terrier,
+Norfolk terrier,
+Norwich terrier,
+Yorkshire terrier,
+wire-haired fox terrier,
+Lakeland terrier,
+Sealyham terrier, Sealyham,
+Airedale, Airedale terrier,
+cairn, cairn terrier,
+Australian terrier,
+Dandie Dinmont, Dandie Dinmont terrier,
+Boston bull, Boston terrier,
+miniature schnauzer,
+giant schnauzer,
+standard schnauzer,
+Scotch terrier, Scottish terrier, Scottie,
+Tibetan terrier, chrysanthemum dog,
+silky terrier, Sydney silky,
+soft-coated wheaten terrier,
+West Highland white terrier,
+Lhasa, Lhasa apso,
+flat-coated retriever,
+curly-coated retriever,
+golden retriever,
+Labrador retriever,
+Chesapeake Bay retriever,
+German short-haired pointer,
+vizsla, Hungarian pointer,
+English setter,
+Irish setter, red setter,
+Gordon setter,
+Brittany spaniel,
+clumber, clumber spaniel,
+English springer, English springer spaniel,
+Welsh springer spaniel,
+cocker spaniel, English cocker spaniel, cocker,
+Sussex spaniel,
+Irish water spaniel,
+kuvasz,
+schipperke,
+groenendael,
+malinois,
+briard,
+kelpie,
+komondor,
+Old English sheepdog, bobtail,
+Shetland sheepdog, Shetland sheep dog, Shetland,
+collie,
+Border collie,
+Bouvier des Flandres, Bouviers des Flandres,
+Rottweiler,
+German shepherd, German shepherd dog, German police dog, alsatian,
+Doberman, Doberman pinscher,
+miniature pinscher,
+Greater Swiss Mountain dog,
+Bernese mountain dog,
+Appenzeller,
+EntleBucher,
+boxer,
+bull mastiff,
+Tibetan mastiff,
+French bulldog,
+Great Dane,
+Saint Bernard, St Bernard,
+Eskimo dog, husky,
+malamute, malemute, Alaskan malamute,
+Siberian husky,
+dalmatian, coach dog, carriage dog,
+affenpinscher, monkey pinscher, monkey dog,
+basenji,
+pug, pug-dog,
+Leonberg,
+Newfoundland, Newfoundland dog,
+Great Pyrenees,
+Samoyed, Samoyede,
+Pomeranian,
+chow, chow chow,
+keeshond,
+Brabancon griffon,
+Pembroke, Pembroke Welsh corgi,
+Cardigan, Cardigan Welsh corgi,
+toy poodle,
+miniature poodle,
+standard poodle,
+Mexican hairless,
+timber wolf, grey wolf, gray wolf, Canis lupus,
+white wolf, Arctic wolf, Canis lupus tundrarum,
+red wolf, maned wolf, Canis rufus, Canis niger,
+coyote, prairie wolf, brush wolf, Canis latrans,
+dingo, warrigal, warragal, Canis dingo,
+dhole, Cuon alpinus,
+African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus,
+hyena, hyaena,
+red fox, Vulpes vulpes,
+kit fox, Vulpes macrotis,
+Arctic fox, white fox, Alopex lagopus,
+grey fox, gray fox, Urocyon cinereoargenteus,
+tabby, tabby cat,
+tiger cat,
+Persian cat,
+Siamese cat, Siamese,
+Egyptian cat,
+cougar, puma, catamount, mountain lion, painter, panther, Felis concolor,
+lynx, catamount,
+leopard, Panthera pardus,
+snow leopard, ounce, Panthera uncia,
+jaguar, panther, Panthera onca, Felis onca,
+lion, king of beasts, Panthera leo,
+tiger, Panthera tigris,
+cheetah, chetah, Acinonyx jubatus,
+brown bear, bruin, Ursus arctos,
+American black bear, black bear, Ursus americanus, Euarctos americanus,
+ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus,
+sloth bear, Melursus ursinus, Ursus ursinus,
+mongoose,
+meerkat, mierkat,
+tiger beetle,
+ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle,
+ground beetle, carabid beetle,
+long-horned beetle, longicorn, longicorn beetle,
+leaf beetle, chrysomelid,
+dung beetle,
+rhinoceros beetle,
+weevil,
+fly,
+bee,
+ant, emmet, pismire,
+grasshopper, hopper,
+cricket,
+walking stick, walkingstick, stick insect,
+cockroach, roach,
+mantis, mantid,
+cicada, cicala,
+leafhopper,
+lacewing, lacewing fly,
+"dragonfly, darning needle, devils darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk",
+damselfly,
+admiral,
+ringlet, ringlet butterfly,
+monarch, monarch butterfly, milkweed butterfly, Danaus plexippus,
+cabbage butterfly,
+sulphur butterfly, sulfur butterfly,
+lycaenid, lycaenid butterfly,
+starfish, sea star,
+sea urchin,
+sea cucumber, holothurian,
+wood rabbit, cottontail, cottontail rabbit,
+hare,
+Angora, Angora rabbit,
+hamster,
+porcupine, hedgehog,
+fox squirrel, eastern fox squirrel, Sciurus niger,
+marmot,
+beaver,
+guinea pig, Cavia cobaya,
+sorrel,
+zebra,
+hog, pig, grunter, squealer, Sus scrofa,
+wild boar, boar, Sus scrofa,
+warthog,
+hippopotamus, hippo, river horse, Hippopotamus amphibius,
+ox,
+water buffalo, water ox, Asiatic buffalo, Bubalus bubalis,
+bison,
+ram, tup,
+bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis,
+ibex, Capra ibex,
+hartebeest,
+impala, Aepyceros melampus,
+gazelle,
+Arabian camel, dromedary, Camelus dromedarius,
+llama,
+weasel,
+mink,
+polecat, fitch, foulmart, foumart, Mustela putorius,
+black-footed ferret, ferret, Mustela nigripes,
+otter,
+skunk, polecat, wood pussy,
+badger,
+armadillo,
+three-toed sloth, ai, Bradypus tridactylus,
+orangutan, orang, orangutang, Pongo pygmaeus,
+gorilla, Gorilla gorilla,
+chimpanzee, chimp, Pan troglodytes,
+gibbon, Hylobates lar,
+siamang, Hylobates syndactylus, Symphalangus syndactylus,
+guenon, guenon monkey,
+patas, hussar monkey, Erythrocebus patas,
+baboon,
+macaque,
+langur,
+colobus, colobus monkey,
+proboscis monkey, Nasalis larvatus,
+marmoset,
+capuchin, ringtail, Cebus capucinus,
+howler monkey, howler,
+titi, titi monkey,
+spider monkey, Ateles geoffroyi,
+squirrel monkey, Saimiri sciureus,
+Madagascar cat, ring-tailed lemur, Lemur catta,
+indri, indris, Indri indri, Indri brevicaudatus,
+Indian elephant, Elephas maximus,
+African elephant, Loxodonta africana,
+lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens,
+giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca,
+barracouta, snoek,
+eel,
+coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch,
+rock beauty, Holocanthus tricolor,
+anemone fish,
+sturgeon,
+gar, garfish, garpike, billfish, Lepisosteus osseus,
+lionfish,
+puffer, pufferfish, blowfish, globefish,
+abacus,
+abaya,
+"academic gown, academic robe, judges robe",
+accordion, piano accordion, squeeze box,
+acoustic guitar,
+aircraft carrier, carrier, flattop, attack aircraft carrier,
+airliner,
+airship, dirigible,
+altar,
+ambulance,
+amphibian, amphibious vehicle,
+analog clock,
+apiary, bee house,
+apron,
+ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin,
+assault rifle, assault gun,
+backpack, back pack, knapsack, packsack, rucksack, haversack,
+bakery, bakeshop, bakehouse,
+balance beam, beam,
+balloon,
+ballpoint, ballpoint pen, ballpen, Biro,
+Band Aid,
+banjo,
+bannister, banister, balustrade, balusters, handrail,
+barbell,
+barber chair,
+barbershop,
+barn,
+barometer,
+barrel, cask,
+barrow, garden cart, lawn cart, wheelbarrow,
+baseball,
+basketball,
+bassinet,
+bassoon,
+bathing cap, swimming cap,
+bath towel,
+bathtub, bathing tub, bath, tub,
+beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon,
+beacon, lighthouse, beacon light, pharos,
+beaker,
+bearskin, busby, shako,
+beer bottle,
+beer glass,
+bell cote, bell cot,
+bib,
+bicycle-built-for-two, tandem bicycle, tandem,
+bikini, two-piece,
+binder, ring-binder,
+binoculars, field glasses, opera glasses,
+birdhouse,
+boathouse,
+bobsled, bobsleigh, bob,
+bolo tie, bolo, bola tie, bola,
+bonnet, poke bonnet,
+bookcase,
+bookshop, bookstore, bookstall,
+bottlecap,
+bow,
+bow tie, bow-tie, bowtie,
+brass, memorial tablet, plaque,
+brassiere, bra, bandeau,
+breakwater, groin, groyne, mole, bulwark, seawall, jetty,
+breastplate, aegis, egis,
+broom,
+bucket, pail,
+buckle,
+bulletproof vest,
+bullet train, bullet,
+butcher shop, meat market,
+cab, hack, taxi, taxicab,
+caldron, cauldron,
+candle, taper, wax light,
+cannon,
+canoe,
+can opener, tin opener,
+cardigan,
+car mirror,
+carousel, carrousel, merry-go-round, roundabout, whirligig,
+"carpenters kit, tool kit",
+carton,
+car wheel,
+cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM,
+cassette,
+cassette player,
+castle,
+catamaran,
+CD player,
+cello, violoncello,
+cellular telephone, cellular phone, cellphone, cell, mobile phone,
+chain,
+chainlink fence,
+chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour,
+chain saw, chainsaw,
+chest,
+chiffonier, commode,
+chime, bell, gong,
+china cabinet, china closet,
+Christmas stocking,
+church, church building,
+cinema, movie theater, movie theatre, movie house, picture palace,
+cleaver, meat cleaver, chopper,
+cliff dwelling,
+cloak,
+clog, geta, patten, sabot,
+cocktail shaker,
+coffee mug,
+coffeepot,
+coil, spiral, volute, whorl, helix,
+combination lock,
+computer keyboard, keypad,
+confectionery, confectionary, candy store,
+container ship, containership, container vessel,
+convertible,
+corkscrew, bottle screw,
+cornet, horn, trumpet, trump,
+cowboy boot,
+cowboy hat, ten-gallon hat,
+cradle,
+crane,
+crash helmet,
+crate,
+crib, cot,
+Crock Pot,
+croquet ball,
+crutch,
+cuirass,
+dam, dike, dyke,
+desk,
+desktop computer,
+dial telephone, dial phone,
+diaper, nappy, napkin,
+digital clock,
+digital watch,
+dining table, board,
+dishrag, dishcloth,
+dishwasher, dish washer, dishwashing machine,
+disk brake, disc brake,
+dock, dockage, docking facility,
+dogsled, dog sled, dog sleigh,
+dome,
+doormat, welcome mat,
+drilling platform, offshore rig,
+drum, membranophone, tympan,
+drumstick,
+dumbbell,
+Dutch oven,
+electric fan, blower,
+electric guitar,
+electric locomotive,
+entertainment center,
+envelope,
+espresso maker,
+face powder,
+feather boa, boa,
+file, file cabinet, filing cabinet,
+fireboat,
+fire engine, fire truck,
+fire screen, fireguard,
+flagpole, flagstaff,
+flute, transverse flute,
+folding chair,
+football helmet,
+forklift,
+fountain,
+fountain pen,
+four-poster,
+freight car,
+French horn, horn,
+frying pan, frypan, skillet,
+fur coat,
+garbage truck, dustcart,
+gasmask, respirator, gas helmet,
+gas pump, gasoline pump, petrol pump, island dispenser,
+goblet,
+go-kart,
+golf ball,
+golfcart, golf cart,
+gondola,
+gong, tam-tam,
+gown,
+grand piano, grand,
+greenhouse, nursery, glasshouse,
+grille, radiator grille,
+grocery store, grocery, food market, market,
+guillotine,
+hair slide,
+hair spray,
+half track,
+hammer,
+hamper,
+hand blower, blow dryer, blow drier, hair dryer, hair drier,
+hand-held computer, hand-held microcomputer,
+handkerchief, hankie, hanky, hankey,
+hard disc, hard disk, fixed disk,
+harmonica, mouth organ, harp, mouth harp,
+harp,
+harvester, reaper,
+hatchet,
+holster,
+home theater, home theatre,
+honeycomb,
+hook, claw,
+hoopskirt, crinoline,
+horizontal bar, high bar,
+horse cart, horse-cart,
+hourglass,
+iPod,
+iron, smoothing iron,
+"jack-o-lantern",
+jean, blue jean, denim,
+jeep, landrover,
+jersey, T-shirt, tee shirt,
+jigsaw puzzle,
+jinrikisha, ricksha, rickshaw,
+joystick,
+kimono,
+knee pad,
+knot,
+lab coat, laboratory coat,
+ladle,
+lampshade, lamp shade,
+laptop, laptop computer,
+lawn mower, mower,
+lens cap, lens cover,
+letter opener, paper knife, paperknife,
+library,
+lifeboat,
+lighter, light, igniter, ignitor,
+limousine, limo,
+liner, ocean liner,
+lipstick, lip rouge,
+Loafer,
+lotion,
+loudspeaker, speaker, speaker unit, loudspeaker system, speaker system,
+"loupe, jewelers loupe",
+lumbermill, sawmill,
+magnetic compass,
+mailbag, postbag,
+mailbox, letter box,
+maillot,
+maillot, tank suit,
+manhole cover,
+maraca,
+marimba, xylophone,
+mask,
+matchstick,
+maypole,
+maze, labyrinth,
+measuring cup,
+medicine chest, medicine cabinet,
+megalith, megalithic structure,
+microphone, mike,
+microwave, microwave oven,
+military uniform,
+milk can,
+minibus,
+miniskirt, mini,
+minivan,
+missile,
+mitten,
+mixing bowl,
+mobile home, manufactured home,
+Model T,
+modem,
+monastery,
+monitor,
+moped,
+mortar,
+mortarboard,
+mosque,
+mosquito net,
+motor scooter, scooter,
+mountain bike, all-terrain bike, off-roader,
+mountain tent,
+mouse, computer mouse,
+mousetrap,
+moving van,
+muzzle,
+nail,
+neck brace,
+necklace,
+nipple,
+notebook, notebook computer,
+obelisk,
+oboe, hautboy, hautbois,
+ocarina, sweet potato,
+odometer, hodometer, mileometer, milometer,
+oil filter,
+organ, pipe organ,
+oscilloscope, scope, cathode-ray oscilloscope, CRO,
+overskirt,
+oxcart,
+oxygen mask,
+packet,
+paddle, boat paddle,
+paddlewheel, paddle wheel,
+padlock,
+paintbrush,
+"pajama, pyjama, pjs, jammies",
+palace,
+panpipe, pandean pipe, syrinx,
+paper towel,
+parachute, chute,
+parallel bars, bars,
+park bench,
+parking meter,
+passenger car, coach, carriage,
+patio, terrace,
+pay-phone, pay-station,
+pedestal, plinth, footstall,
+pencil box, pencil case,
+pencil sharpener,
+perfume, essence,
+Petri dish,
+photocopier,
+pick, plectrum, plectron,
+pickelhaube,
+picket fence, paling,
+pickup, pickup truck,
+pier,
+piggy bank, penny bank,
+pill bottle,
+pillow,
+ping-pong ball,
+pinwheel,
+pirate, pirate ship,
+pitcher, ewer,
+"plane, carpenters plane, woodworking plane",
+planetarium,
+plastic bag,
+plate rack,
+plow, plough,
+"plunger, plumbers helper",
+Polaroid camera, Polaroid Land camera,
+pole,
+police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria,
+poncho,
+pool table, billiard table, snooker table,
+pop bottle, soda bottle,
+pot, flowerpot,
+"potters wheel",
+power drill,
+prayer rug, prayer mat,
+printer,
+prison, prison house,
+projectile, missile,
+projector,
+puck, hockey puck,
+punching bag, punch bag, punching ball, punchball,
+purse,
+quill, quill pen,
+quilt, comforter, comfort, puff,
+racer, race car, racing car,
+racket, racquet,
+radiator,
+radio, wireless,
+radio telescope, radio reflector,
+rain barrel,
+recreational vehicle, RV, R.V.,
+reel,
+reflex camera,
+refrigerator, icebox,
+remote control, remote,
+restaurant, eating house, eating place, eatery,
+revolver, six-gun, six-shooter,
+rifle,
+rocking chair, rocker,
+rotisserie,
+rubber eraser, rubber, pencil eraser,
+rugby ball,
+rule, ruler,
+running shoe,
+safe,
+safety pin,
+saltshaker, salt shaker,
+sandal,
+sarong,
+sax, saxophone,
+scabbard,
+scale, weighing machine,
+school bus,
+schooner,
+scoreboard,
+screen, CRT screen,
+screw,
+screwdriver,
+seat belt, seatbelt,
+sewing machine,
+shield, buckler,
+shoe shop, shoe-shop, shoe store,
+shoji,
+shopping basket,
+shopping cart,
+shovel,
+shower cap,
+shower curtain,
+ski,
+ski mask,
+sleeping bag,
+slide rule, slipstick,
+sliding door,
+slot, one-armed bandit,
+snorkel,
+snowmobile,
+snowplow, snowplough,
+soap dispenser,
+soccer ball,
+sock,
+solar dish, solar collector, solar furnace,
+sombrero,
+soup bowl,
+space bar,
+space heater,
+space shuttle,
+spatula,
+speedboat,
+"spider web, spiders web",
+spindle,
+sports car, sport car,
+spotlight, spot,
+stage,
+steam locomotive,
+steel arch bridge,
+steel drum,
+stethoscope,
+stole,
+stone wall,
+stopwatch, stop watch,
+stove,
+strainer,
+streetcar, tram, tramcar, trolley, trolley car,
+stretcher,
+studio couch, day bed,
+stupa, tope,
+submarine, pigboat, sub, U-boat,
+suit, suit of clothes,
+sundial,
+sunglass,
+sunglasses, dark glasses, shades,
+sunscreen, sunblock, sun blocker,
+suspension bridge,
+swab, swob, mop,
+sweatshirt,
+swimming trunks, bathing trunks,
+swing,
+switch, electric switch, electrical switch,
+syringe,
+table lamp,
+tank, army tank, armored combat vehicle, armoured combat vehicle,
+tape player,
+teapot,
+teddy, teddy bear,
+television, television system,
+tennis ball,
+thatch, thatched roof,
+theater curtain, theatre curtain,
+thimble,
+thresher, thrasher, threshing machine,
+throne,
+tile roof,
+toaster,
+tobacco shop, tobacconist shop, tobacconist,
+toilet seat,
+torch,
+totem pole,
+tow truck, tow car, wrecker,
+toyshop,
+tractor,
+trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi,
+tray,
+trench coat,
+tricycle, trike, velocipede,
+trimaran,
+tripod,
+triumphal arch,
+trolleybus, trolley coach, trackless trolley,
+trombone,
+tub, vat,
+turnstile,
+typewriter keyboard,
+umbrella,
+unicycle, monocycle,
+upright, upright piano,
+vacuum, vacuum cleaner,
+vase,
+vault,
+velvet,
+vending machine,
+vestment,
+viaduct,
+violin, fiddle,
+volleyball,
+waffle iron,
+wall clock,
+wallet, billfold, notecase, pocketbook,
+wardrobe, closet, press,
+warplane, military plane,
+washbasin, handbasin, washbowl, lavabo, wash-hand basin,
+washer, automatic washer, washing machine,
+water bottle,
+water jug,
+water tower,
+whiskey jug,
+whistle,
+wig,
+window screen,
+window shade,
+Windsor tie,
+wine bottle,
+wing,
+wok,
+wooden spoon,
+wool, woolen, woollen,
+worm fence, snake fence, snake-rail fence, Virginia fence,
+wreck,
+yawl,
+yurt,
+web site, website, internet site, site,
+comic book,
+crossword puzzle, crossword,
+street sign,
+traffic light, traffic signal, stoplight,
+book jacket, dust cover, dust jacket, dust wrapper,
+menu,
+plate,
+guacamole,
+consomme,
+hot pot, hotpot,
+trifle,
+ice cream, icecream,
+ice lolly, lolly, lollipop, popsicle,
+French loaf,
+bagel, beigel,
+pretzel,
+cheeseburger,
+hotdog, hot dog, red hot,
+mashed potato,
+head cabbage,
+broccoli,
+cauliflower,
+zucchini, courgette,
+spaghetti squash,
+acorn squash,
+butternut squash,
+cucumber, cuke,
+artichoke, globe artichoke,
+bell pepper,
+cardoon,
+mushroom,
+Granny Smith,
+strawberry,
+orange,
+lemon,
+fig,
+pineapple, ananas,
+banana,
+jackfruit, jak, jack,
+custard apple,
+pomegranate,
+hay,
+carbonara,
+chocolate sauce, chocolate syrup,
+dough,
+meat loaf, meatloaf,
+pizza, pizza pie,
+potpie,
+burrito,
+red wine,
+espresso,
+cup,
+eggnog,
+alp,
+bubble,
+cliff, drop, drop-off,
+coral reef,
+geyser,
+lakeside, lakeshore,
+promontory, headland, head, foreland,
+sandbar, sand bar,
+seashore, coast, seacoast, sea-coast,
+valley, vale,
+volcano,
+ballplayer, baseball player,
+groom, bridegroom,
+scuba diver,
+rapeseed,
+daisy,
+"yellow ladys slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum",
+corn,
+acorn,
+hip, rose hip, rosehip,
+buckeye, horse chestnut, conker,
+coral fungus,
+agaric,
+gyromitra,
+stinkhorn, carrion fungus,
+earthstar,
+hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa,
+bolete,
+ear, spike, capitulum,
+toilet tissue, toilet paper, bathroom tissue
diff --git a/python/examples/imagenet/resnet50_rpc_client.py b/python/examples/imagenet/resnet50_rpc_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..7888ab6302b483672ec1d7270f7db0c551f1778d
--- /dev/null
+++ b/python/examples/imagenet/resnet50_rpc_client.py
@@ -0,0 +1,48 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+from paddle_serving_client import Client
+from paddle_serving_app.reader import Sequential, URL2Image, Resize
+from paddle_serving_app.reader import CenterCrop, RGB2BGR, Transpose, Div, Normalize
+import time
+
+client = Client()
+client.load_client_config(sys.argv[1])
+client.connect(["127.0.0.1:9696"])
+
+label_dict = {}
+label_idx = 0
+with open("imagenet.label") as fin:
+    for line in fin:
+        label_dict[label_idx] = line.strip()
+        label_idx += 1
+
+seq = Sequential([
+    URL2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
+    Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
+])
+
+start = time.time()
+image_file = "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"
+for i in range(10):
+    img = seq(image_file)
+    fetch_map = client.predict(feed={"image": img}, fetch=["score"])
+    prob = max(fetch_map["score"][0])
+    label = label_dict[fetch_map["score"][0].tolist().index(prob)].strip(
+    ).replace(",", "")
+    print("prediction: {}, probability: {}".format(label, prob))
+
+end = time.time()
+print(end - start)
diff --git a/python/examples/imagenet/resnet50_web_service.py b/python/examples/imagenet/resnet50_web_service.py
new file mode 100644
index 0000000000000000000000000000000000000000..3966d31c951d83d8f984e5a265504035ed273125
--- /dev/null
+++ b/python/examples/imagenet/resnet50_web_service.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+from paddle_serving_client import Client
+from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize
+
+if len(sys.argv) != 4:
+    print("python resnet50_web_service.py model device port")
+    sys.exit(-1)
+
+device = sys.argv[2]
+
+if device == "cpu":
+    from paddle_serving_server.web_service import WebService
+else:
+    from paddle_serving_server_gpu.web_service import WebService
+
+
+class ImageService(WebService):
+    def init_imagenet_setting(self):
+        self.seq = Sequential([
+            URL2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose(
+                (2, 0, 1)), Div(255), Normalize([0.485, 0.456, 0.406],
+                                                [0.229, 0.224, 0.225], True)
+        ])
+        self.label_dict = {}
+        label_idx = 0
+        with open("imagenet.label") as fin:
+            for line in fin:
+                self.label_dict[label_idx] = line.strip()
+                label_idx += 1
+
+    def preprocess(self, feed=[], fetch=[]):
+        feed_batch = []
+        for ins in feed:
+            if "image" not in ins:
+                raise ("feed data error!")
+            img = self.seq(ins["image"])
+            feed_batch.append({"image": img})
+        return feed_batch, fetch
+
+    def postprocess(self, feed=[], fetch=[], fetch_map={}):
+        score_list = fetch_map["score"]
+        result = {"label": [], "prob": []}
+        for score in score_list:
+            max_score = max(score)
+            result["label"].append(self.label_dict[score.index(max_score)]
+                                   .strip().replace(",", ""))
+            result["prob"].append(max_score)
+        return result
+
+
+image_service = ImageService(name="image")
+image_service.load_model_config(sys.argv[1])
+image_service.init_imagenet_setting()
+if device == "gpu":
+    image_service.set_gpus("0,1")
+image_service.prepare_server(
+    workdir="workdir", port=int(sys.argv[3]), device=device)
+image_service.run_rpc_service()
+image_service.run_web_service()
diff --git a/python/examples/imdb/README.md b/python/examples/imdb/README.md
index 5f4d204d368a98cb47d4dac2ff3d481e519adb9d..e2b9a74c98e8993f19b14888f3e21343f526b81d 100644
--- a/python/examples/imdb/README.md
+++ b/python/examples/imdb/README.md
@@ -30,27 +30,3 @@ python text_classify_service.py imdb_cnn_model/ workdir/ 9292 imdb.vocab
 ```
 curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction
 ```
-
-### Benchmark
-
-CPU ：Intel(R) Xeon(R)  Gold 6271 CPU @ 2.60GHz * 48
-
-Model ：[CNN](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/imdb/nets.py)
-
-server thread num ： 16
-
-In this test, client sends 25000 test samples totally, the bar chart given later is the latency of single thread, the unit is second, from which we know the predict efficiency is improved greatly by multi-thread compared to single-thread. 8.7 times improvement is made by 16 threads prediction.
-
-| client  thread num | prepro | client infer | op0    | op1   | op2    | postpro | total |
-| ------------------ | ------ | ------------ | ------ | ----- | ------ | ------- | ----- |
-| 1                  | 1.09   | 28.79        | 0.094  | 20.59 | 0.047  | 0.034   | 31.41 |
-| 4                  | 0.22   | 7.41         | 0.023  | 5.01  | 0.011  | 0.0098  | 8.01  |
-| 8                  | 0.11   | 4.7          | 0.012  | 2.61  | 0.0062 | 0.0049  | 5.01  |
-| 12                 | 0.081  | 4.69         | 0.0078 | 1.72  | 0.0042 | 0.0035  | 4.91  |
-| 16                 | 0.058  | 3.46         | 0.0061 | 1.32  | 0.0033 | 0.003   | 3.63  |
-| 20                 | 0.049  | 3.77         | 0.0047 | 1.03  | 0.0025 | 0.0022  | 3.91  |
-| 24                 | 0.041  | 3.86         | 0.0039 | 0.85  | 0.002  | 0.0017  | 3.98  |
-
-The thread-latency bar chart is as follow：
-
-![total cost](../../../doc/imdb-benchmark-server-16.png)
diff --git a/python/examples/imdb/README_CN.md b/python/examples/imdb/README_CN.md
index 2b79938bbf0625786033d13ec2960ad2bc73acda..a669e29e94f6c6cce238473a8fc33405e29e8471 100644
--- a/python/examples/imdb/README_CN.md
+++ b/python/examples/imdb/README_CN.md
@@ -29,27 +29,3 @@ python text_classify_service.py imdb_cnn_model/ workdir/ 9292 imdb.vocab
 ```
 curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction
 ```
-
-### Benchmark
-
-设备 ：Intel(R) Xeon(R)  Gold 6271 CPU @ 2.60GHz * 48
-
-模型 ：[CNN](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/imdb/nets.py)
-
-server thread num ： 16
-
-测试中，client共发送25000条测试样本，图中数据为单个线程的耗时，时间单位为秒。可以看出，client端多线程的预测速度相比单线程有明显提升，在16线程时预测速度是单线程的8.7倍。
-
-| client  thread num | prepro | client infer | op0    | op1   | op2    | postpro | total |
-| ------------------ | ------ | ------------ | ------ | ----- | ------ | ------- | ----- |
-| 1                  | 1.09   | 28.79        | 0.094  | 20.59 | 0.047  | 0.034   | 31.41 |
-| 4                  | 0.22   | 7.41         | 0.023  | 5.01  | 0.011  | 0.0098  | 8.01  |
-| 8                  | 0.11   | 4.7          | 0.012  | 2.61  | 0.0062 | 0.0049  | 5.01  |
-| 12                 | 0.081  | 4.69         | 0.0078 | 1.72  | 0.0042 | 0.0035  | 4.91  |
-| 16                 | 0.058  | 3.46         | 0.0061 | 1.32  | 0.0033 | 0.003   | 3.63  |
-| 20                 | 0.049  | 3.77         | 0.0047 | 1.03  | 0.0025 | 0.0022  | 3.91  |
-| 24                 | 0.041  | 3.86         | 0.0039 | 0.85  | 0.002  | 0.0017  | 3.98  |
-
-预测总耗时变化规律如下：
-
-![total cost](../../../doc/imdb-benchmark-server-16.png)
diff --git a/python/examples/imdb/benchmark.py b/python/examples/imdb/benchmark.py
index a734e80ef78a7710ca09a211132e248580c5a48c..632d336ebf20363e257e6e60f08d773cea659a74 100644
--- a/python/examples/imdb/benchmark.py
+++ b/python/examples/imdb/benchmark.py
@@ -16,7 +16,7 @@
 import sys
 import time
 import requests
-from imdb_reader import IMDBDataset
+from paddle_serving_app.reader import IMDBDataset
 from paddle_serving_client import Client
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args
@@ -37,26 +37,39 @@ def single_func(idx, resource):
         client.load_client_config(args.model)
         client.connect([args.endpoint])
         for i in range(1000):
-            if args.batch_size == 1:
-                word_ids, label = imdb_dataset.get_words_and_label(line)
-                fetch_map = client.predict(
-                    feed={"words": word_ids}, fetch=["prediction"])
+            if args.batch_size >= 1:
+                feed_batch = []
+                for bi in range(args.batch_size):
+                    word_ids, label = imdb_dataset.get_words_and_label(dataset[
+                        bi])
+                    feed_batch.append({"words": word_ids})
+                result = client.predict(feed=feed_batch, fetch=["prediction"])
+                if result is None:
+                    raise ("predict failed.")
             else:
                 print("unsupport batch size {}".format(args.batch_size))
 
     elif args.request == "http":
-        for fn in filelist:
-            fin = open(fn)
-            for line in fin:
-                word_ids, label = imdb_dataset.get_words_and_label(line)
-                r = requests.post(
-                    "http://{}/imdb/prediction".format(args.endpoint),
-                    data={"words": word_ids,
-                          "fetch": ["prediction"]})
+        if args.batch_size >= 1:
+            feed_batch = []
+            for bi in range(args.batch_size):
+                feed_batch.append({"words": dataset[bi]})
+            r = requests.post(
+                "http://{}/imdb/prediction".format(args.endpoint),
+                json={"feed": feed_batch,
+                      "fetch": ["prediction"]})
+            if r.status_code != 200:
+                print('HTTP status code -ne 200')
+                raise ("predict failed.")
+        else:
+            print("unsupport batch size {}".format(args.batch_size))
     end = time.time()
     return [[end - start]]
 
 
 multi_thread_runner = MultiThreadRunner()
 result = multi_thread_runner.run(single_func, args.thread, {})
-print(result)
+avg_cost = 0
+for cost in result[0]:
+    avg_cost += cost
+print("total cost {} s of each thread".format(avg_cost / args.thread))
diff --git a/python/examples/imdb/benchmark.sh b/python/examples/imdb/benchmark.sh
index d77e184180d5c36de6cb865f6b9797511410a3ba..93dbf830c84bd38f72dd0d8a32139ad6098dc6f8 100644
--- a/python/examples/imdb/benchmark.sh
+++ b/python/examples/imdb/benchmark.sh
@@ -1,9 +1,12 @@
 rm profile_log
 for thread_num in 1 2 4 8 16
 do
-    $PYTHONROOT/bin/python benchmark.py --thread $thread_num --model imdbo_bow_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
+for batch_size in 1 2 4 8 16 32 64 128 256 512
+do
+    $PYTHONROOT/bin/python benchmark.py --thread $thread_num --batch_size $batch_size --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
     echo "========================================"
     echo "batch size : $batch_size" >> profile_log
     $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
     tail -n 1 profile >> profile_log
 done
+done
diff --git a/python/examples/imdb/benchmark_batch.py b/python/examples/imdb/benchmark_batch.py
deleted file mode 100644
index 5891970b5decc34f35723187e44b166e0482c6e9..0000000000000000000000000000000000000000
--- a/python/examples/imdb/benchmark_batch.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=doc-string-missing
-
-import sys
-import time
-import requests
-from imdb_reader import IMDBDataset
-from paddle_serving_client import Client
-from paddle_serving_client.utils import MultiThreadRunner
-from paddle_serving_client.utils import benchmark_args
-
-args = benchmark_args()
-
-
-def single_func(idx, resource):
-    imdb_dataset = IMDBDataset()
-    imdb_dataset.load_resource("./imdb.vocab")
-    dataset = []
-    with open("./test_data/part-0") as fin:
-        for line in fin:
-            dataset.append(line.strip())
-    start = time.time()
-    if args.request == "rpc":
-        client = Client()
-        client.load_client_config(args.model)
-        client.connect([args.endpoint])
-        for i in range(1000):
-            if args.batch_size >= 1:
-                feed_batch = []
-                for bi in range(args.batch_size):
-                    word_ids, label = imdb_dataset.get_words_and_label(dataset[
-                        bi])
-                    feed_batch.append({"words": word_ids})
-                result = client.predict(feed=feed_batch, fetch=["prediction"])
-                if result is None:
-                    raise ("predict failed.")
-            else:
-                print("unsupport batch size {}".format(args.batch_size))
-
-    elif args.request == "http":
-        if args.batch_size >= 1:
-            feed_batch = []
-            for bi in range(args.batch_size):
-                feed_batch.append({"words": dataset[bi]})
-            r = requests.post(
-                "http://{}/imdb/prediction".format(args.endpoint),
-                json={"feed": feed_batch,
-                      "fetch": ["prediction"]})
-            if r.status_code != 200:
-                print('HTTP status code -ne 200')
-                raise ("predict failed.")
-        else:
-            print("unsupport batch size {}".format(args.batch_size))
-    end = time.time()
-    return [[end - start]]
-
-
-multi_thread_runner = MultiThreadRunner()
-result = multi_thread_runner.run(single_func, args.thread, {})
-avg_cost = 0
-for cost in result[0]:
-    avg_cost += cost
-print("total cost {} s of each thread".format(avg_cost / args.thread))
diff --git a/python/examples/imdb/benchmark_batch.sh b/python/examples/imdb/benchmark_batch.sh
deleted file mode 100644
index 15b65338b21675fd89056cf32f9a247b385a6a36..0000000000000000000000000000000000000000
--- a/python/examples/imdb/benchmark_batch.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-rm profile_log
-for thread_num in 1 2 4 8 16
-do
-for batch_size in 1 2 4 8 16 32 64 128 256 512
-do
-    $PYTHONROOT/bin/python benchmark_batch.py --thread $thread_num --batch_size $batch_size --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
-    echo "========================================"
-    echo "batch size : $batch_size" >> profile_log
-    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
-    tail -n 1 profile >> profile_log
-done
-done
diff --git a/python/examples/imdb/test_client.py b/python/examples/imdb/test_client.py
index fdc3ced25377487a2844d57c4e6121801e9fa7fa..cbdc6fe56e0f1078ad32c0d15f4e30a1a59f581b 100644
--- a/python/examples/imdb/test_client.py
+++ b/python/examples/imdb/test_client.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # pylint: disable=doc-string-missing
 from paddle_serving_client import Client
-from imdb_reader import IMDBDataset
+from paddle_serving_app.reader import IMDBDataset
 import sys
 
 client = Client()
diff --git a/python/examples/imdb/test_client_batch.py b/python/examples/imdb/test_client_batch.py
deleted file mode 100644
index 972b2c9609ca690542fa802f187fb30ed0467a04..0000000000000000000000000000000000000000
--- a/python/examples/imdb/test_client_batch.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=doc-string-missing
-
-from paddle_serving_client import Client
-import sys
-import subprocess
-from multiprocessing import Pool
-import time
-
-
-def batch_predict(batch_size=4):
-    client = Client()
-    client.load_client_config(conf_file)
-    client.connect(["127.0.0.1:9292"])
-    fetch = ["acc", "cost", "prediction"]
-    feed_batch = []
-    for line in sys.stdin:
-        group = line.strip().split()
-        words = [int(x) for x in group[1:int(group[0])]]
-        label = [int(group[-1])]
-        feed = {"words": words, "label": label}
-        feed_batch.append(feed)
-        if len(feed_batch) == batch_size:
-            fetch_batch = client.batch_predict(
-                feed_batch=feed_batch, fetch=fetch)
-            for i in range(batch_size):
-                print("{} {}".format(fetch_batch[i]["prediction"][1],
-                                     feed_batch[i]["label"][0]))
-            feed_batch = []
-    if len(feed_batch) > 0:
-        fetch_batch = client.batch_predict(feed_batch=feed_batch, fetch=fetch)
-        for i in range(len(feed_batch)):
-            print("{} {}".format(fetch_batch[i]["prediction"][1], feed_batch[i][
-                "label"][0]))
-
-
-if __name__ == '__main__':
-    conf_file = sys.argv[1]
-    batch_size = int(sys.argv[2])
-    batch_predict(batch_size)
diff --git a/python/examples/imdb/text_classify_service.py b/python/examples/imdb/text_classify_service.py
index 4420a99facc7bd3db1c8bf1df0c58765467517de..fe6ab0319deb0de5875781cf0890aa39a45c2415 100755
--- a/python/examples/imdb/text_classify_service.py
+++ b/python/examples/imdb/text_classify_service.py
@@ -14,7 +14,7 @@
 # pylint: disable=doc-string-missing
 
 from paddle_serving_server.web_service import WebService
-from imdb_reader import IMDBDataset
+from paddle_serving_app.reader import IMDBDataset
 import sys
 
 
@@ -37,5 +37,5 @@ imdb_service.load_model_config(sys.argv[1])
 imdb_service.prepare_server(
     workdir=sys.argv[2], port=int(sys.argv[3]), device="cpu")
 imdb_service.prepare_dict({"dict_file_path": sys.argv[4]})
-imdb_service.run_server()
-imdb_service.run_flask()
+imdb_service.run_rpc_service()
+imdb_service.run_web_service()
diff --git a/python/examples/lac/README.md b/python/examples/lac/README.md
index bc420186a09dfd0066c1abf0c0d95063e9cb0699..8d7adfb583f8e8e1fde0681a73f2bba65452fa87 100644
--- a/python/examples/lac/README.md
+++ b/python/examples/lac/README.md
@@ -2,28 +2,27 @@
 
 ([简体中文](./README_CN.md)|English)
 
-### Get model files and sample data
+### Get Model
 ```
-sh get_data.sh
+python -m paddle_serving_app.package --get_model lac
+tar -xzvf lac.tar.gz
 ```
 
-the package downloaded contains lac model config along with lac dictionary.
-
 #### Start RPC inference service
 
 ```
-python -m paddle_serving_server.serve --model jieba_server_model/ --port 9292
+python -m paddle_serving_server.serve --model lac_model/ --port 9292
 ```
 ### RPC Infer
 ```
-echo "我爱北京天安门" | python lac_client.py jieba_client_conf/serving_client_conf.prototxt lac_dict/
+echo "我爱北京天安门" | python lac_client.py lac_client/serving_client_conf.prototxt
 ```
 
-it will get the segmentation result
+It will get the segmentation result. 
 
 ### Start HTTP inference service
 ```
-python lac_web_service.py jieba_server_model/ lac_workdir 9292
+python lac_web_service.py lac_model/ lac_workdir 9292
 ```
 ### HTTP Infer
 
diff --git a/python/examples/lac/README_CN.md b/python/examples/lac/README_CN.md
index 449f474ca291053eb6880166c52814c9d4180f36..2379aa8ed69c026c6afd94b8b791774882eaf567 100644
--- a/python/examples/lac/README_CN.md
+++ b/python/examples/lac/README_CN.md
@@ -2,28 +2,27 @@
 
 (简体中文|[English](./README.md))
 
-### 获取模型和字典文件
+### 获取模型
 ```
-sh get_data.sh
+python -m paddle_serving_app.package --get_model lac
+tar -xzvf lac.tar.gz
 ```
 
-下载包里包含了lac模型和lac模型预测需要的字典文件
-
 #### 开启RPC预测服务
 
 ```
-python -m paddle_serving_server.serve --model jieba_server_model/ --port 9292
+python -m paddle_serving_server.serve --model lac_model/ --port 9292
 ```
 ### 执行RPC预测
 ```
-echo "我爱北京天安门" | python lac_client.py jieba_client_conf/serving_client_conf.prototxt lac_dict/
+echo "我爱北京天安门" | python lac_client.py lac_client/serving_client_conf.prototxt
 ```
 
 我们就能得到分词结果
 
 ### 开启HTTP预测服务
 ```
-python lac_web_service.py jieba_server_model/ lac_workdir 9292
+python lac_web_service.py lac_model/ lac_workdir 9292
 ```
 ### 执行HTTP预测
 
diff --git a/python/examples/lac/benchmark.py b/python/examples/lac/benchmark.py
index 53d0881ed74e5e19104a70fb93d6872141d27afd..64e935a608477d5841df1b64abf7b6eb35dd1a4b 100644
--- a/python/examples/lac/benchmark.py
+++ b/python/examples/lac/benchmark.py
@@ -16,7 +16,7 @@
 import sys
 import time
 import requests
-from lac_reader import LACReader
+from paddle_serving_app.reader import LACReader
 from paddle_serving_client import Client
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args
@@ -25,7 +25,7 @@ args = benchmark_args()
 
 
 def single_func(idx, resource):
-    reader = LACReader("lac_dict")
+    reader = LACReader()
     start = time.time()
     if args.request == "rpc":
         client = Client()
diff --git a/python/examples/lac/get_data.sh b/python/examples/lac/get_data.sh
deleted file mode 100644
index 29e6a6b2b3e995f78c37e15baf2f9a3b627ca9ef..0000000000000000000000000000000000000000
--- a/python/examples/lac/get_data.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/lac/lac_model_jieba_web.tar.gz
-tar -zxvf lac_model_jieba_web.tar.gz
diff --git a/python/examples/lac/lac_client.py b/python/examples/lac/lac_client.py
index 9c485a923e4d42b72af41f7b9ad45c5702ca93a1..22f3c511dcd2540365623ef9428b60cfcb5e5a34 100644
--- a/python/examples/lac/lac_client.py
+++ b/python/examples/lac/lac_client.py
@@ -15,7 +15,7 @@
 # pylint: disable=doc-string-missing
 
 from paddle_serving_client import Client
-from lac_reader import LACReader
+from paddle_serving_app.reader import LACReader
 import sys
 import os
 import io
@@ -24,7 +24,7 @@ client = Client()
 client.load_client_config(sys.argv[1])
 client.connect(["127.0.0.1:9292"])
 
-reader = LACReader(sys.argv[2])
+reader = LACReader()
 for line in sys.stdin:
     if len(line) <= 0:
         continue
@@ -32,4 +32,7 @@ for line in sys.stdin:
     if len(feed_data) <= 0:
         continue
     fetch_map = client.predict(feed={"words": feed_data}, fetch=["crf_decode"])
-    print(fetch_map)
+    begin = fetch_map['crf_decode.lod'][0]
+    end = fetch_map['crf_decode.lod'][1]
+    segs = reader.parse_result(line, fetch_map["crf_decode"][begin:end])
+    print("word_seg: " + "|".join(str(words) for words in segs))
diff --git a/python/examples/lac/lac_reader.py b/python/examples/lac/lac_reader.py
index c9f31c148123e1975c0102903abf2f2b3b15d3f6..488e7ced1ce27f914f299c45295e82f33c68d6d0 100644
--- a/python/examples/lac/lac_reader.py
+++ b/python/examples/lac/lac_reader.py
@@ -14,8 +14,10 @@
 
 from paddle_serving_client import Client
 import sys
-reload(sys)
-sys.setdefaultencoding('utf-8')
+py_version = sys.version_info[0]
+if py_version == 2:
+    reload(sys)
+    sys.setdefaultencoding('utf-8')
 import os
 import io
 
diff --git a/python/examples/lac/lac_web_service.py b/python/examples/lac/lac_web_service.py
index c9bd00986c62abde3ee24ddddbf08dda45bbed05..bed89f54b626c0cce55767f8edacc3dd33f0104c 100644
--- a/python/examples/lac/lac_web_service.py
+++ b/python/examples/lac/lac_web_service.py
@@ -14,12 +14,12 @@
 
 from paddle_serving_server.web_service import WebService
 import sys
-from lac_reader import LACReader
+from paddle_serving_app.reader import LACReader
 
 
 class LACService(WebService):
     def load_reader(self):
-        self.reader = LACReader("lac_dict")
+        self.reader = LACReader()
 
     def preprocess(self, feed={}, fetch=[]):
         feed_batch = []
@@ -47,5 +47,5 @@ lac_service.load_model_config(sys.argv[1])
 lac_service.load_reader()
 lac_service.prepare_server(
     workdir=sys.argv[2], port=int(sys.argv[3]), device="cpu")
-lac_service.run_server()
-lac_service.run_flask()
+lac_service.run_rpc_service()
+lac_service.run_web_service()
diff --git a/python/examples/mobilenet/README.md b/python/examples/mobilenet/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..496ebdbe2e244af8091cb28cdcdecf7627088ba3
--- /dev/null
+++ b/python/examples/mobilenet/README.md
@@ -0,0 +1,22 @@
+# Image Classification
+
+## Get Model
+
+```
+python -m paddle_serving_app.package --get_model mobilenet_v2_imagenet
+tar -xzvf mobilenet_v2_imagenet.tar.gz
+```
+
+## RPC Service
+
+### Start Service
+
+```
+python -m paddle_serving_server_gpu.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393
+```
+
+### Client Prediction
+
+```
+python mobilenet_tutorial.py
+```
diff --git a/python/examples/mobilenet/README_CN.md b/python/examples/mobilenet/README_CN.md
new file mode 100644
index 0000000000000000000000000000000000000000..7c721b4bd161fbf7c400f1a73ddb7be69c449871
--- /dev/null
+++ b/python/examples/mobilenet/README_CN.md
@@ -0,0 +1,22 @@
+# 图像分类
+
+## 获取模型
+
+```
+python -m paddle_serving_app.package --get_model mobilenet_v2_imagenet
+tar -xzvf mobilenet_v2_imagenet.tar.gz
+```
+
+## RPC 服务
+
+### 启动服务端
+
+```
+python -m paddle_serving_server_gpu.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393
+```
+
+### 客户端预测
+
+```
+python mobilenet_tutorial.py
+```
diff --git a/python/examples/mobilenet/daisy.jpg b/python/examples/mobilenet/daisy.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7edeca63e5f32e68550ef720d81f59df58a8eabc
Binary files /dev/null and b/python/examples/mobilenet/daisy.jpg differ
diff --git a/python/examples/mobilenet/mobilenet_tutorial.py b/python/examples/mobilenet/mobilenet_tutorial.py
new file mode 100644
index 0000000000000000000000000000000000000000..9550a5ff705d23d3f6a97d8498d5a8b1e4f152b7
--- /dev/null
+++ b/python/examples/mobilenet/mobilenet_tutorial.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle_serving_client import Client
+from paddle_serving_app.reader import Sequential, File2Image, Resize
+from paddle_serving_app.reader import CenterCrop, RGB2BGR, Transpose, Div, Normalize
+
+client = Client()
+client.load_client_config(
+    "mobilenet_v2_imagenet_client/serving_client_conf.prototxt")
+client.connect(["127.0.0.1:9393"])
+
+seq = Sequential([
+    File2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
+    Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
+])
+
+image_file = "daisy.jpg"
+img = seq(image_file)
+fetch_map = client.predict(feed={"image": img}, fetch=["feature_map"])
+print(fetch_map["feature_map"].reshape(-1))
diff --git a/python/examples/ocr/README.md b/python/examples/ocr/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..04c4fd3eaa304e55d980a2cf4fc34dda50f5009c
--- /dev/null
+++ b/python/examples/ocr/README.md
@@ -0,0 +1,21 @@
+# OCR 
+
+## Get Model
+```
+python -m paddle_serving_app.package --get_model ocr_rec
+tar -xzvf ocr_rec.tar.gz
+```
+
+## RPC Service
+
+### Start Service
+
+```
+python -m paddle_serving_server.serve --model ocr_rec_model --port 9292
+```
+
+### Client Prediction
+
+```
+python test_ocr_rec_client.py
+```
diff --git a/python/examples/imagenet/image_rpc_client.py b/python/examples/ocr/test_ocr_rec_client.py
similarity index 56%
rename from python/examples/imagenet/image_rpc_client.py
rename to python/examples/ocr/test_ocr_rec_client.py
index f905179629f0dfc8c9da09b0cae90bae7be3687e..b61256d03202374ada5b0d50a075fef156eca2ea 100644
--- a/python/examples/imagenet/image_rpc_client.py
+++ b/python/examples/ocr/test_ocr_rec_client.py
@@ -12,23 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import sys
-from image_reader import ImageReader
 from paddle_serving_client import Client
-import time
+from paddle_serving_app.reader import OCRReader
+import cv2
 
 client = Client()
-client.load_client_config(sys.argv[1])
-client.connect(["127.0.0.1:9393"])
-reader = ImageReader()
+client.load_client_config("ocr_rec_client/serving_client_conf.prototxt")
+client.connect(["127.0.0.1:9292"])
 
-start = time.time()
-for i in range(1000):
-    with open("./data/n01440764_10026.JPEG", "rb") as f:
-        img = f.read()
-    img = reader.process_image(img)
-    fetch_map = client.predict(feed={"image": img}, fetch=["score"])
-end = time.time()
-print(end - start)
-
-#print(fetch_map["score"])
+image_file_list = ["./test_rec.jpg"]
+img = cv2.imread(image_file_list[0])
+ocr_reader = OCRReader()
+feed = {"image": ocr_reader.preprocess([img])}
+fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
+fetch_map = client.predict(feed=feed, fetch=fetch)
+rec_res = ocr_reader.postprocess(fetch_map)
+print(image_file_list[0])
+print(rec_res[0][0])
diff --git a/python/examples/ocr/test_rec.jpg b/python/examples/ocr/test_rec.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2c34cd33eac5766a072fde041fa6c9b1d612f1db
Binary files /dev/null and b/python/examples/ocr/test_rec.jpg differ
diff --git a/python/examples/resnet_v2_50/README.md b/python/examples/resnet_v2_50/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fd86074c73177a06cd59ebb3bd0c28c7f22e95f2
--- /dev/null
+++ b/python/examples/resnet_v2_50/README.md
@@ -0,0 +1,22 @@
+# Image Classification
+
+## Get Model
+
+```
+python -m paddle_serving_app.package --get_model resnet_v2_50_imagenet
+tar -xzvf resnet_v2_50_imagenet.tar.gz
+```
+
+## RPC Service
+
+### Start Service
+
+```
+python -m paddle_serving_server_gpu.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393
+```
+
+### Client Prediction
+
+```
+python resnet50_v2_tutorial.py
+```
diff --git a/python/examples/resnet_v2_50/README_CN.md b/python/examples/resnet_v2_50/README_CN.md
new file mode 100644
index 0000000000000000000000000000000000000000..bda2916eb43d55d718af1095c21869e00fb27093
--- /dev/null
+++ b/python/examples/resnet_v2_50/README_CN.md
@@ -0,0 +1,22 @@
+# 图像分类
+
+## 获取模型
+
+```
+python -m paddle_serving_app.package --get_model resnet_v2_50_imagenet
+tar -xzvf resnet_v2_50_imagenet.tar.gz
+```
+
+## RPC 服务
+
+### 启动服务端
+
+```
+python -m paddle_serving_server_gpu.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393
+```
+
+### 客户端预测
+
+```
+python resnet50_v2_tutorial.py
+```
diff --git a/python/examples/resnet_v2_50/daisy.jpg b/python/examples/resnet_v2_50/daisy.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7edeca63e5f32e68550ef720d81f59df58a8eabc
Binary files /dev/null and b/python/examples/resnet_v2_50/daisy.jpg differ
diff --git a/python/examples/resnet_v2_50/resnet50_debug.py b/python/examples/resnet_v2_50/resnet50_debug.py
new file mode 100644
index 0000000000000000000000000000000000000000..768893c20bc3f6bfcb6e21f446d053391825c5fa
--- /dev/null
+++ b/python/examples/resnet_v2_50/resnet50_debug.py
@@ -0,0 +1,31 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle_serving_app.reader import Sequential, File2Image, Resize, CenterCrop
+from paddle_serving_app.reader import RGB2BGR, Transpose, Div, Normalize
+from paddle_serving_app.local_predict import Debugger
+import sys
+
+debugger = Debugger()
+debugger.load_model_config(sys.argv[1], gpu=True)
+
+seq = Sequential([
+    File2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
+    Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
+])
+
+image_file = "daisy.jpg"
+img = seq(image_file)
+fetch_map = debugger.predict(feed={"image": img}, fetch=["feature_map"])
+print(fetch_map["feature_map"].reshape(-1))
diff --git a/python/examples/resnet_v2_50/resnet50_v2_tutorial.py b/python/examples/resnet_v2_50/resnet50_v2_tutorial.py
new file mode 100644
index 0000000000000000000000000000000000000000..b249d2a6df85f87258f66c96aaa779eb2e299613
--- /dev/null
+++ b/python/examples/resnet_v2_50/resnet50_v2_tutorial.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle_serving_client import Client
+from paddle_serving_app.reader import Sequential, File2Image, Resize, CenterCrop
+from paddle_serving_app.reader import RGB2BGR, Transpose, Div, Normalize
+
+client = Client()
+client.load_client_config(
+    "resnet_v2_50_imagenet_client/serving_client_conf.prototxt")
+client.connect(["127.0.0.1:9393"])
+
+seq = Sequential([
+    File2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
+    Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
+])
+
+image_file = "daisy.jpg"
+img = seq(image_file)
+fetch_map = client.predict(feed={"image": img}, fetch=["score"])
+print(fetch_map["score"].reshape(-1))
diff --git a/python/examples/senta/README.md b/python/examples/senta/README.md
index 9d6c3a0221f924e5d8f1893e6c618e3b2f88a3e1..8929a9312c17264800f299f77afb583221006068 100644
--- a/python/examples/senta/README.md
+++ b/python/examples/senta/README.md
@@ -1,16 +1,23 @@
-# Chinese sentence sentiment classification
+# Chinese Sentence Sentiment Classification
 ([简体中文](./README_CN.md)|English)
-## Get model files and sample data
+
+## Get Model
 ```
-sh get_data.sh
+python -m paddle_serving_app.package --get_model senta_bilstm
+python -m paddle_serving_app.package --get_model lac
+tar -xzvf senta_bilstm.tar.gz
+tar -xzvf lac.tar.gz
 ```
-## Start http service
+
+## Start HTTP Service
 ```
-python senta_web_service.py senta_bilstm_model/ workdir 9292
+python -m paddle_serving_server.serve --model lac_model --port 9300
+python senta_web_service.py
 ```
-In the Chinese sentiment classification task, the Chinese word segmentation needs to be done through [LAC task] (../lac). Set model path by ```lac_model_path``` and dictionary path by ```lac_dict_path```. 
-In this demo, the LAC task is placed in the preprocessing part of the HTTP prediction service of the sentiment classification task. The LAC prediction service is deployed on the CPU, and the sentiment classification task is deployed on the GPU, which can be changed according to the actual situation.
+In the Chinese sentiment classification task, the Chinese word segmentation needs to be done through [LAC task] (../lac). 
+In this demo, the LAC task is placed in the preprocessing part of the HTTP prediction service of the sentiment classification task.
+
 ## Client prediction
 ```
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "天气不错"}], "fetch":["class_probs"]}' http://127.0.0.1:9292/senta/prediction
+curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "天气不错"}], "fetch":["class_probs"]}' http://127.0.0.1:9393/senta/prediction
 ```
diff --git a/python/examples/senta/README_CN.md b/python/examples/senta/README_CN.md
index bb1e706554a57b29fc784d064dd4b550846f6e76..e5624dc975e6bc00de219f68cbf74dea7cac8360 100644
--- a/python/examples/senta/README_CN.md
+++ b/python/examples/senta/README_CN.md
@@ -1,17 +1,23 @@
 # 中文语句情感分类
 (简体中文|[English](./README.md))
-## 获取模型文件和样例数据
+
+## 获取模型文件
 ```
-sh get_data.sh
+python -m paddle_serving_app.package --get_model senta_bilstm
+python -m paddle_serving_app.package --get_model lac
+tar -xzvf lac.tar.gz
+tar -xzvf senta_bilstm.tar.gz
 ```
+
 ## 启动HTTP服务
 ```
-python senta_web_service.py senta_bilstm_model/ workdir 9292
+python -m paddle_serving_server.serve --model lac_model --port 9300
+python senta_web_service.py
 ```
-中文情感分类任务中需要先通过[LAC任务](../lac)进行中文分词，在脚本中通过```lac_model_path```参数配置LAC任务的模型文件路径,```lac_dict_path```参数配置LAC任务词典路径。
-示例中将LAC任务放在情感分类任务的HTTP预测服务的预处理部分，LAC预测服务部署在CPU上，情感分类任务部署在GPU上,可以根据实际情况进行更改。
+中文情感分类任务中需要先通过[LAC任务](../lac)进行中文分词。
+示例中将LAC任务放在情感分类任务的HTTP预测服务的预处理部分。
 
 ## 客户端预测
 ```
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "天气不错"}], "fetch":["class_probs"]}' http://127.0.0.1:9292/senta/prediction
+curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "天气不错"}], "fetch":["class_probs"]}' http://127.0.0.1:9393/senta/prediction
 ```
diff --git a/python/examples/senta/get_data.sh b/python/examples/senta/get_data.sh
index f1fb3844a703503177906a029bd42810e5fa3f33..7fd5c3e21880c44f27c4f4a037be87dc24790bc4 100644
--- a/python/examples/senta/get_data.sh
+++ b/python/examples/senta/get_data.sh
@@ -1,7 +1,7 @@
 wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SentimentAnalysis/senta_bilstm.tar.gz --no-check-certificate
 tar -xzvf senta_bilstm.tar.gz
-wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/LexicalAnalysis/lac_model.tar.gz --no-check-certificate
-tar -xzvf lac_model.tar.gz
+wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/LexicalAnalysis/lac.tar.gz --no-check-certificate
+tar -xzvf lac.tar.gz
 wget https://paddle-serving.bj.bcebos.com/reader/lac/lac_dict.tar.gz  --no-check-certificate
 tar -xzvf lac_dict.tar.gz
 wget https://paddle-serving.bj.bcebos.com/reader/senta/vocab.txt --no-check-certificate
diff --git a/python/examples/senta/senta_web_service.py b/python/examples/senta/senta_web_service.py
index 0c0205e73cdd26231a94b2f0c9c41da84aaca961..25c880ef8877aed0f3f9d394d1780855130f365b 100644
--- a/python/examples/senta/senta_web_service.py
+++ b/python/examples/senta/senta_web_service.py
@@ -1,3 +1,4 @@
+#encoding=utf-8
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -12,97 +13,49 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle_serving_server_gpu.web_service import WebService
+from paddle_serving_server.web_service import WebService
 from paddle_serving_client import Client
-from paddle_serving_app import LACReader, SentaReader
-import numpy as np
+from paddle_serving_app.reader import LACReader, SentaReader
 import os
-import io
 import sys
-import subprocess
-from multiprocessing import Process, Queue
 
+#senta_web_service.py
+from paddle_serving_server.web_service import WebService
+from paddle_serving_client import Client
+from paddle_serving_app.reader import LACReader, SentaReader
 
-class SentaService(WebService):
-    def set_config(
-            self,
-            lac_model_path,
-            lac_dict_path,
-            senta_dict_path, ):
-        self.lac_model_path = lac_model_path
-        self.lac_client_config_path = lac_model_path + "/serving_server_conf.prototxt"
-        self.lac_dict_path = lac_dict_path
-        self.senta_dict_path = senta_dict_path
-        self.show = False
-
-    def show_detail(self, show=False):
-        self.show = show
-
-    def start_lac_service(self):
-        if not os.path.exists('./lac_serving'):
-            os.mkdir("./lac_serving")
-        os.chdir('./lac_serving')
-        self.lac_port = self.port + 100
-        r = os.popen(
-            "python -m paddle_serving_server.serve --model {} --port {} &".
-            format("../" + self.lac_model_path, self.lac_port))
-        os.chdir('..')
-
-    def init_lac_service(self):
-        ps = Process(target=self.start_lac_service())
-        ps.start()
-        #self.init_lac_client()
 
-    def lac_predict(self, feed_data):
-        self.init_lac_client()
-        lac_result = self.lac_client.predict(
-            feed={"words": feed_data}, fetch=["crf_decode"])
-        self.lac_client.release()
-        return lac_result
-
-    def init_lac_client(self):
+class SentaService(WebService):
+    #初始化lac模型预测服务
+    def init_lac_client(self, lac_port, lac_client_config):
+        self.lac_reader = LACReader()
+        self.senta_reader = SentaReader()
         self.lac_client = Client()
-        self.lac_client.load_client_config(self.lac_client_config_path)
-        self.lac_client.connect(["127.0.0.1:{}".format(self.lac_port)])
-
-    def init_lac_reader(self):
-        self.lac_reader = LACReader(self.lac_dict_path)
-
-    def init_senta_reader(self):
-        self.senta_reader = SentaReader(vocab_path=self.senta_dict_path)
+        self.lac_client.load_client_config(lac_client_config)
+        self.lac_client.connect(["127.0.0.1:{}".format(lac_port)])
 
+    #定义senta模型预测服务的预处理，调用顺序：lac reader->lac模型预测->预测结果后处理->senta reader
     def preprocess(self, feed=[], fetch=[]):
-        feed_data = self.lac_reader.process(feed[0]["words"])
-        if self.show:
-            print("---- lac reader ----")
-            print(feed_data)
-        lac_result = self.lac_predict(feed_data)
-        if self.show:
-            print("---- lac out ----")
-            print(lac_result)
-        segs = self.lac_reader.parse_result(feed[0]["words"],
-                                            lac_result["crf_decode"])
-        if self.show:
-            print("---- lac parse ----")
-            print(segs)
-        feed_data = self.senta_reader.process(segs)
-        if self.show:
-            print("---- senta reader ----")
-            print("feed_data", feed_data)
-        return [{"words": feed_data}], fetch
+        feed_data = [{
+            "words": self.lac_reader.process(x["words"])
+        } for x in feed]
+        lac_result = self.lac_client.predict(
+            feed=feed_data, fetch=["crf_decode"])
+        feed_batch = []
+        result_lod = lac_result["crf_decode.lod"]
+        for i in range(len(feed)):
+            segs = self.lac_reader.parse_result(
+                feed[i]["words"],
+                lac_result["crf_decode"][result_lod[i]:result_lod[i + 1]])
+            feed_data = self.senta_reader.process(segs)
+            feed_batch.append({"words": feed_data})
+        return feed_batch, fetch
 
 
 senta_service = SentaService(name="senta")
-#senta_service.show_detail(True)
-senta_service.set_config(
-    lac_model_path="./lac_model",
-    lac_dict_path="./lac_dict",
-    senta_dict_path="./vocab.txt")
-senta_service.load_model_config(sys.argv[1])
-senta_service.prepare_server(
-    workdir=sys.argv[2], port=int(sys.argv[3]), device="cpu")
-senta_service.init_lac_reader()
-senta_service.init_senta_reader()
-senta_service.init_lac_service()
-senta_service.run_server()
-senta_service.run_flask()
+senta_service.load_model_config("senta_bilstm_model")
+senta_service.prepare_server(workdir="workdir")
+senta_service.init_lac_client(
+    lac_port=9300, lac_client_config="lac_model/serving_server_conf.prototxt")
+senta_service.run_rpc_service()
+senta_service.run_web_service()
diff --git a/python/examples/unet_for_image_seg/N0060.jpg b/python/examples/unet_for_image_seg/N0060.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..feac2837eaa5ae5db414d9769a0c5a830dde268d
Binary files /dev/null and b/python/examples/unet_for_image_seg/N0060.jpg differ
diff --git a/python/examples/unet_for_image_seg/README.md b/python/examples/unet_for_image_seg/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7936ad43cbc3b53719babdf6f91ea46e74a827da
--- /dev/null
+++ b/python/examples/unet_for_image_seg/README.md
@@ -0,0 +1,22 @@
+# Image Segmentation
+
+## Get Model
+
+```
+python -m paddle_serving_app.package --get_model unet
+tar -xzvf unet.tar.gz
+```
+
+## RPC Service
+
+### Start Service
+
+```
+python -m paddle_serving_server_gpu.serve --model unet_model --gpu_ids 0 --port 9494
+```
+
+### Client Prediction
+
+```
+python seg_client.py
+```
diff --git a/python/examples/unet_for_image_seg/README_CN.md b/python/examples/unet_for_image_seg/README_CN.md
new file mode 100644
index 0000000000000000000000000000000000000000..f4b91aaff5697ff8ea3901e0a8084152f6007ff4
--- /dev/null
+++ b/python/examples/unet_for_image_seg/README_CN.md
@@ -0,0 +1,22 @@
+# 图像分割
+
+## 获取模型
+
+```
+python -m paddle_serving_app.package --get_model unet
+tar -xzvf unet.tar.gz
+```
+
+## RPC 服务
+
+### 启动服务端
+
+```
+python -m paddle_serving_server_gpu.serve --model unet_model --gpu_ids 0 --port 9494
+```
+
+### 客户端预测
+
+```
+python seg_client.py
+```
diff --git a/python/examples/unet_for_image_seg/seg_client.py b/python/examples/unet_for_image_seg/seg_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..44f634b6090159ee1bd37c176eebb7d2b7f37065
--- /dev/null
+++ b/python/examples/unet_for_image_seg/seg_client.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle_serving_client import Client
+from paddle_serving_app.reader import Sequential, File2Image, Resize, Transpose, BGR2RGB, SegPostprocess
+import sys
+import cv2
+
+client = Client()
+client.load_client_config("unet_client/serving_client_conf.prototxt")
+client.connect(["127.0.0.1:9494"])
+
+preprocess = Sequential(
+    [File2Image(), Resize(
+        (512, 512), interpolation=cv2.INTER_LINEAR)])
+
+postprocess = SegPostprocess(2)
+
+filename = "N0060.jpg"
+im = preprocess(filename)
+fetch_map = client.predict(feed={"image": im}, fetch=["output"])
+fetch_map["filename"] = filename
+postprocess(fetch_map)
diff --git a/python/paddle_serving_app/README.md b/python/paddle_serving_app/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..cb48ae376086ec4021af617337e43934dd5e5f6e
--- /dev/null
+++ b/python/paddle_serving_app/README.md
@@ -0,0 +1,171 @@
+([简体中文](./README_CN.md)|English)
+
+paddle_serving_app is a tool component of the Paddle Serving framework, and includes functions such as pre-training model download and data pre-processing methods.
+It is convenient for users to quickly test and deploy model examples, analyze the performance of prediction services, and debug model prediction services.
+
+## Install
+
+```shell
+pip install paddle_serving_app
+```
+
+## Get model list
+
+```shell
+python -m paddle_serving_app.package --list_model
+```
+
+## Download pre-training model
+
+```shell
+python -m paddle_serving_app.package --get_model senta_bilstm
+```
+
+1 pre-trained models are built into paddle_serving_app, covering 6 kinds of prediction tasks.
+The model files can be directly used for deployment, and the `--tutorial` argument can be added to obtain the deployment method.
+
+| Prediction task | Model name                                         |
+| ------------ | ------------------------------------------------ |
+| SentimentAnalysis | 'senta_bilstm', 'senta_bow', 'senta_cnn'         |
+| SemanticRepresentation | 'ernie'                                     |
+| ChineseWordSegmentation     | 'lac'                                            |
+| ObjectDetection     | 'faster_rcnn'                         |
+| ImageSegmentation     | 'unet', 'deeplabv3','deeplabv3+cityscapes'      |
+| ImageClassification     | 'resnet_v2_50_imagenet', 'mobilenet_v2_imagenet' |
+
+## Data preprocess API
+
+paddle_serving_app provides a variety of data preprocessing methods for prediction tasks in the field of CV and NLP.
+
+- class ChineseBertReader 
+  
+
+Preprocessing for Chinese semantic representation task.
+
+  - `__init__(vocab_file, max_seq_len=20)`
+
+    - vocab_file（st ）：Path of dictionary file.
+
+    - max_seq_len（in ，optional）：The length of sample after processing. The excess part will be truncated, and the insufficient part will be padding 0. Default 20.
+
+  - `process(line)`
+
+    - line（st ）：Text input.
+
+  [example](../examples/bert/bert_client.py)
+
+- class LACReader 
+  
+
+Preprocessing for Chinese word segmentation task.
+
+  - `__init__(dict_floder)`
+    - dict_floder（st ）Path of dictionary file.
+  - `process(sent)`
+    - sent（st ）：Text input.
+  - `parse_result`
+    - words（st ）：Original text input.
+    - crf_decode（np.array）：CRF code predicted by model.
+
+  [example](../examples/lac/lac_web_service.py)
+
+- class SentaReader
+
+  - `__init__(vocab_path)`
+    - vocab_path（st ）：Path of dictionary file.
+  - `process(cols)`
+    - cols（st ）：Word segmentation result.
+
+  [example](../examples/senta/senta_web_service.py)
+
+- The image preprocessing method is more flexible than the above method, and can be combined by the following multiple classes，[example](../examples/imagenet/resnet50_rpc_client.py)
+
+- class Sequentia
+
+  - `__init__(transforms)`
+    - transforms（list）：List of image preprocessing classes
+  - `__call__(img)`
+    - img：The input of image preprocessing. The data type is is related to the first preprocessing method in transforms.
+
+- class File2Image
+
+  - `__call__(img_path)`
+    - img_path（str）：Path of image file.
+
+- class URL2Image
+
+  - `__call__(img_url)`
+    - img_url（str）：url of image file.
+
+- class Normalize
+
+  - `__init__(mean,std)`
+    - mean（float）：Mean
+    - std（float）：Variance
+  - `__call__(img)`
+    - img（np.array）：Image data in (C,H,W) channels.
+
+- class CenterCrop
+
+  - `__init__(size)`
+    - size（list/int）：
+  - `__call__(img)`
+    - img（np.array）：Image data.
+
+- class Resize
+
+  - `__init__(size, max_size=2147483647, interpolation=None)`
+    - size（list/int）：The expected image size, when the input is a list type, it needs to contain the expected length and width. When the input is int type, the short side will be set to the length of size, and the long side will be scaled proportionally.
+  - `__call__(img)`
+    - img（numpy array）：Image data.
+
+
+## Timeline tools
+
+The Timeline tool can be used to visualize the start and end time of various stages such as the preparation data of the prediction service, client wait and server op.
+This tool is convenient to analyze the proportion of time occupancy in the prediction service. On this basis, prediction services can be optimized in a targeted manner.
+
+### How to use
+
+1. Before making predictions on the client side, turn on the timeline function of each stage in the Paddle Serving framework by environment variables. It will print timeline information in log.
+
+   ```shell
+   export FLAGS_profile_client=1 # Turn on timeline function of client
+   export FLAGS_profile_server=1 # Turn on timeline function of server
+   ```
+2. Perform predictions and redirect client-side logs to files, for example, named as profile.
+
+3. Export the information in the log file into a trace file.
+   ```shell
+   python -m paddle_serving_app.trace --profile_file profile --trace_file trace
+   ```
+
+4. Open the `chrome: // tracing /` URL using Chrome browser. 
+Load the trace file generated in the previous step through the load button, you can
+Visualize the time information of each stage of the forecast service.
+
+As shown in next figure, the figure shows the timeline of GPU prediction service using [bert example](https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/bert).
+The server side starts service with 4 GPU cards, the client side starts 4 processes to request, and the batch size is 1.
+In the figure, bert_pre represents the data pre-processing stage of the client, and client_infer represents the stage where the client completes the sending of the prediction request to the receiving result.
+The process in the figure represents the process number of the client, and the second line of each process shows the timeline of each op of the server.
+
+![timeline](../../doc/timeline-example.png)
+
+## Debug tools
+
+The inference op of Paddle Serving is implemented based on Paddle inference lib.
+Before deploying the prediction service, you may need to check the input and output of the prediction service or check the resource consumption.
+Therefore, a local prediction tool is built into the paddle_serving_app, which is used in the same way as sending a request to the server through the client.
+
+Taking [fit_a_line prediction service](../examples/fit_a_line) as an example, the following code can be used to run local prediction.
+
+```python
+from paddle_serving_app.local_predict import Debugger
+import numpy as np
+
+debugger = Debugger()
+debugger.load_model_config("./uci_housing_model", gpu=False)
+data = [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727,
+        -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]
+fetch_map = debugger.predict(feed={"x":data}, fetch = ["price"])
+```
diff --git a/python/paddle_serving_app/README_CN.md b/python/paddle_serving_app/README_CN.md
new file mode 100644
index 0000000000000000000000000000000000000000..181037c55a2aae578cb189525030ccba87146f6e
--- /dev/null
+++ b/python/paddle_serving_app/README_CN.md
@@ -0,0 +1,158 @@
+(简体中文|[English](./README.md))
+
+paddle_serving_app是Paddle Serving框架的工具组件，包含了预训练模型下载、数据预处理方法等功能。方便用户快速体验和部署模型示例、分析预测服务性能、调试模型预测服务等。
+
+## 安装
+
+```shell
+pip install paddle_serving_app
+```
+
+## 获取模型列表
+
+```shell
+python -m paddle_serving_app.package --list_model
+```
+
+## 下载预训练模型
+
+```shell
+python -m paddle_serving_app.package --get_model senta_bilstm
+```
+
+paddle_serving_app中内置了11种预训练模型，涵盖了6种预测任务。获取到的模型文件可以直接用于部署，添加`--tutorial`参数可以获取对应的部署方式。
+
+| 预测服务类型 | 模型名称                                         |
+| ------------ | ------------------------------------------------ |
+| 中文情感分析 | 'senta_bilstm', 'senta_bow', 'senta_cnn'         |
+| 语义理解     | 'ernie'                                          |
+| 中文分词     | 'lac'                                            |
+| 图像检测     | 'faster_rcnn'                                    |
+| 图像分割     | 'unet', 'deeplabv3', 'deeplabv3+cityscapes'                              |
+| 图像分类     | 'resnet_v2_50_imagenet', 'mobilenet_v2_imagenet' |
+
+## 数据预处理API
+
+paddle_serving_app针对CV和NLP领域的模型任务，提供了多种常见的数据预处理方法。
+
+- class ChineseBertReader 
+  
+    中文语义理解模型预处理
+
+  - `__init__(vocab_file, max_seq_len=20)`
+
+    - vocab_file（str）：词典文件路径。
+
+    - max_seq_len（int，可选）：处理后的样本长度，超出的部分会截断，不足的部分会padding 0。默认值20。
+
+  - `process(line)`
+    - line（str）：输入文本
+
+  [参考示例](../examples/bert/bert_client.py)
+
+- class LACReader 中文分词预处理
+
+  - `__init__(dict_floder)`
+    - dict_floder（str）词典文件目录
+  - `process(sent)`
+    - sent（str）：输入文本
+  - `parse_result`
+    - words（str）：原始文本
+    - crf_decode（np.array）：模型预测结果中的CRF编码
+
+  [参考示例](../examples/lac/lac_web_service.py)
+
+- class SentaReader
+
+  - `__init__(vocab_path)`
+    - vocab_path（str）：词典文件目录
+  - `process(cols)`
+    - cols（str）：分词后的文本
+
+  [参考示例](../examples/senta/senta_web_service.py)
+
+- 图像的预处理方法相比于上述的方法更加灵活多变，可以通过以下的多个类进行组合，[参考示例](../examples/imagenet/resnet50_rpc_client.py)
+
+- class Sequentia
+
+  - `__init__(transforms)`
+    - transforms（list）：图像预处理方法类的列表
+  - `__call__(img)`
+    - img：图像处理的输入，具体类型与transforms中的第一个预处理方法有关
+
+- class File2Image
+
+  - `__call__(img_path)`
+    - img_path（str）：图像文件路径
+
+- class URL2Image
+
+  - `__call__(img_url)`
+    - img_url（str）：图像url
+
+- class Normalize
+
+  - `__init__(mean,std)`
+    - mean（float）：均值
+    - std（float）：方差
+  - `__call__(img)`
+    - img（np.array）：（C,H,W）排列的图像数据
+
+- class CenterCrop
+
+  - `__init__(size)`
+    - size（list/int）：预期的裁剪后的大小，list类型时需要包含预期的长和宽，int类型时会返回边长为size的正方形图片
+  - `__call__(img)`
+    - img（np.array）：输入图像
+
+- class Resize
+
+  - `__init__(size, max_size=2147483647, interpolation=None)`
+    - size（list/int）：预期的图像大小，list类型时需要包含预期的长和宽，int类型时，短边会设置为size的长度，长边按比例缩放
+  - `__call__(img)`
+    - img（numpy array）：输入图像
+
+## Timeline 工具
+
+通过Timeline工具可以将预测服务的准备数据、client等待、server端op等各阶段起止时间可视化，方便分析预测服务中的时间占用比重，在此基础上有针对性地优化预测服务。
+
+### 使用方式
+
+1. client端在进行预测之前，通过环境变量打开Paddle Serving框架中的各阶段日志打点功能
+
+   ```shell
+   export FLAGS_profile_client=1 #开启client端各阶段时间打点
+   export FLAGS_profile_server=1 #开启server端各阶段时间打点
+   ```
+
+2. 执行预测，并将client端的日志重定向到文件中，例如profile文件。
+
+3. 将日志文件中的信息导出成为trace文件
+
+   ```shell
+   python -m paddle_serving_app.trace --profile_file profile --trace_file trace
+   ```
+
+4. 使用chrome浏览器，打开`chrome://tracing/`网址，通过load按钮加载上一步产生的trace文件，即可将预测服务的各阶段时间信息可视化。
+
+   效果如下图，图中展示了使用[bert示例](https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/bert)的GPU预测服务，server端开启4卡预测，client端启动4进程，batch size为1时的各阶段timeline。
+其中bert_pre代表client端的数据预处理阶段，client_infer代表client完成预测请求的发送到接收结果的阶段，图中的process代表的是client的进程号，每个进程的第二行展示的是server各个op的timeline。
+
+   ![timeline](../../doc/timeline-example.png)
+
+## Debug工具
+
+Paddle Serving框架的server预测op使用了Paddle 的预测框架，在部署预测服务之前可能需要对预测服务的输入输出进行检验或者查看资源占用等。因此在paddle_serving_app中内置了本地预测工具，使用方式与通过client向服务端发送请求一致。
+
+以[fit_a_line预测服务](../examples/fit_a_line)为例，使用以下代码即可执行本地预测。
+
+```python
+from paddle_serving_app.local_predict import Debugger
+import numpy as np
+
+debugger = Debugger()
+debugger.load_model_config("./uci_housing_model", gpu=False)
+data = [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727,
+        -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]
+fetch_map = debugger.predict(feed={"x":data}, fetch = ["price"])
+```
diff --git a/python/paddle_serving_app/__init__.py b/python/paddle_serving_app/__init__.py
index 3db901249df41f5d9cd5846d131ec6cfed376a18..11ad09a1d880a8b235e5cf1b99f6be91ec9cccbf 100644
--- a/python/paddle_serving_app/__init__.py
+++ b/python/paddle_serving_app/__init__.py
@@ -11,8 +11,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .reader.chinese_bert_reader import ChineseBertReader
-from .reader.image_reader import ImageReader
-from .reader.lac_reader import LACReader
-from .reader.senta_reader import SentaReader
 from .models import ServingModels
diff --git a/python/paddle_serving_app/local_predict.py b/python/paddle_serving_app/local_predict.py
new file mode 100644
index 0000000000000000000000000000000000000000..93039c6fdd467357b589bbb2889f3c2d3208b538
--- /dev/null
+++ b/python/paddle_serving_app/local_predict.py
@@ -0,0 +1,132 @@
+# -*- coding: utf-8 -*-
+"""
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+import os
+import google.protobuf.text_format
+import numpy as np
+import argparse
+import paddle.fluid as fluid
+from .proto import general_model_config_pb2 as m_config
+from paddle.fluid.core import PaddleTensor
+from paddle.fluid.core import AnalysisConfig
+from paddle.fluid.core import create_paddle_predictor
+import logging
+
+logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
+logger = logging.getLogger("fluid")
+logger.setLevel(logging.INFO)
+
+
+class Debugger(object):
+    def __init__(self):
+        self.feed_names_ = []
+        self.fetch_names_ = []
+        self.feed_types_ = {}
+        self.fetch_types_ = {}
+        self.feed_shapes_ = {}
+        self.feed_names_to_idx_ = {}
+        self.fetch_names_to_idx_ = {}
+        self.fetch_names_to_type_ = {}
+
+    def load_model_config(self, model_path, gpu=False, profile=True, cpu_num=1):
+        client_config = "{}/serving_server_conf.prototxt".format(model_path)
+        model_conf = m_config.GeneralModelConfig()
+        f = open(client_config, 'r')
+        model_conf = google.protobuf.text_format.Merge(
+            str(f.read()), model_conf)
+        config = AnalysisConfig(model_path)
+
+        self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
+        self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
+        self.feed_names_to_idx_ = {}
+        self.fetch_names_to_idx_ = {}
+
+        for i, var in enumerate(model_conf.feed_var):
+            self.feed_names_to_idx_[var.alias_name] = i
+            self.feed_types_[var.alias_name] = var.feed_type
+            self.feed_shapes_[var.alias_name] = var.shape
+
+        for i, var in enumerate(model_conf.fetch_var):
+            self.fetch_names_to_idx_[var.alias_name] = i
+            self.fetch_names_to_type_[var.alias_name] = var.fetch_type
+
+        if not gpu:
+            config.disable_gpu()
+        else:
+            config.enable_use_gpu(100, 0)
+        if profile:
+            config.enable_profile()
+        config.set_cpu_math_library_num_threads(cpu_num)
+        config.switch_ir_optim(False)
+
+        self.predictor = create_paddle_predictor(config)
+
+    def predict(self, feed=None, fetch=None):
+        if feed is None or fetch is None:
+            raise ValueError("You should specify feed and fetch for prediction")
+        fetch_list = []
+        if isinstance(fetch, str):
+            fetch_list = [fetch]
+        elif isinstance(fetch, list):
+            fetch_list = fetch
+        else:
+            raise ValueError("Fetch only accepts string and list of string")
+
+        feed_batch = []
+        if isinstance(feed, dict):
+            feed_batch.append(feed)
+        elif isinstance(feed, list):
+            feed_batch = feed
+        else:
+            raise ValueError("Feed only accepts dict and list of dict")
+
+        int_slot_batch = []
+        float_slot_batch = []
+        int_feed_names = []
+        float_feed_names = []
+        int_shape = []
+        float_shape = []
+        fetch_names = []
+        counter = 0
+        batch_size = len(feed_batch)
+
+        for key in fetch_list:
+            if key in self.fetch_names_:
+                fetch_names.append(key)
+
+        if len(fetch_names) == 0:
+            raise ValueError(
+                "Fetch names should not be empty or out of saved fetch list.")
+            return {}
+
+        inputs = []
+        for name in self.feed_names_:
+            if isinstance(feed[name], list):
+                feed[name] = np.array(feed[name]).reshape(self.feed_shapes_[
+                    name])
+                if self.feed_types_[name] == 0:
+                    feed[name] = feed[name].astype("int64")
+                else:
+                    feed[name] = feed[name].astype("float32")
+            inputs.append(PaddleTensor(feed[name][np.newaxis, :]))
+
+        outputs = self.predictor.run(inputs)
+        fetch_map = {}
+        for name in fetch:
+            fetch_map[name] = outputs[self.fetch_names_to_idx_[
+                name]].as_ndarray()
+        return fetch_map
diff --git a/python/paddle_serving_app/models/model_list.py b/python/paddle_serving_app/models/model_list.py
index ad028378e93f64ff2111de85730a4bff23570c06..1023b7ab2bbf49a21741ae6aa89a644e24e13527 100644
--- a/python/paddle_serving_app/models/model_list.py
+++ b/python/paddle_serving_app/models/model_list.py
@@ -20,69 +20,41 @@ from collections import OrderedDict
 class ServingModels(object):
     def __init__(self):
         self.model_dict = OrderedDict()
-        #senta
-        for key in [
-                "senta_bilstm", "senta_bow", "senta_cnn", "senta_gru",
-                "senta_lstm"
-        ]:
-            self.model_dict[
-                key] = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SentimentAnalysis/" + key + ".tar.gz"
-        #image classification
-        for key in [
-                "alexnet_imagenet",
-                "darknet53-imagenet",
-                "densenet121_imagenet",
-                "densenet161_imagenet",
-                "densenet169_imagenet",
-                "densenet201_imagenet",
-                "densenet264_imagenet"
-                "dpn107_imagenet",
-                "dpn131_imagenet",
-                "dpn68_imagenet",
-                "dpn92_imagenet",
-                "dpn98_imagenet",
-                "efficientnetb0_imagenet",
-                "efficientnetb1_imagenet",
-                "efficientnetb2_imagenet",
-                "efficientnetb3_imagenet",
-                "efficientnetb4_imagenet",
-                "efficientnetb5_imagenet",
-                "efficientnetb6_imagenet",
-                "googlenet_imagenet",
-                "inception_v4_imagenet",
-                "inception_v2_imagenet",
-                "nasnet_imagenet",
-                "pnasnet_imagenet",
-                "resnet_v2_101_imagenet",
-                "resnet_v2_151_imagenet",
-                "resnet_v2_18_imagenet",
-                "resnet_v2_34_imagenet",
-                "resnet_v2_50_imagenet",
-                "resnext101_32x16d_wsl",
-                "resnext101_32x32d_wsl",
-                "resnext101_32x48d_wsl",
-                "resnext101_32x8d_wsl",
-                "resnext101_32x4d_imagenet",
-                "resnext101_64x4d_imagenet",
-                "resnext101_vd_32x4d_imagenet",
-                "resnext101_vd_64x4d_imagenet",
-                "resnext152_64x4d_imagenet",
-                "resnext152_vd_64x4d_imagenet",
-                "resnext50_64x4d_imagenet",
-                "resnext50_vd_32x4d_imagenet",
-                "resnext50_vd_64x4d_imagenet",
-                "se_resnext101_32x4d_imagenet",
-                "se_resnext50_32x4d_imagenet",
-                "shufflenet_v2_imagenet",
-                "vgg11_imagenet",
-                "vgg13_imagenet",
-                "vgg16_imagenet",
-                "vgg19_imagenet",
-                "xception65_imagenet",
-                "xception71_imagenet",
-        ]:
-            self.model_dict[
-                key] = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/ImageClassification/" + key + ".tar.gz"
+        self.model_dict[
+            "SentimentAnalysis"] = ["senta_bilstm", "senta_bow", "senta_cnn"]
+        self.model_dict["SemanticRepresentation"] = ["ernie"]
+        self.model_dict["ChineseWordSegmentation"] = ["lac"]
+        self.model_dict["ObjectDetection"] = ["faster_rcnn"]
+        self.model_dict["ImageSegmentation"] = [
+            "unet", "deeplabv3", "deeplabv3+cityscapes"
+        ]
+        self.model_dict["ImageClassification"] = [
+            "resnet_v2_50_imagenet", "mobilenet_v2_imagenet"
+        ]
+        self.model_dict["OCR"] = ["ocr_rec"]
+
+        image_class_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/ImageClassification/"
+        image_seg_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/ImageSegmentation/"
+        object_detection_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/ObjectDetection/"
+        ocr_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/OCR/"
+        senta_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SentimentAnalysis/"
+        semantic_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/"
+        wordseg_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/LexicalAnalysis/"
+
+        self.url_dict = {}
+
+        def pack_url(model_dict, key, url):
+            for i, value in enumerate(model_dict[key]):
+                self.url_dict[model_dict[key][i]] = url + model_dict[key][
+                    i] + ".tar.gz"
+
+        pack_url(self.model_dict, "SentimentAnalysis", senta_url)
+        pack_url(self.model_dict, "SemanticRepresentation", semantic_url)
+        pack_url(self.model_dict, "ChineseWordSegmentation", wordseg_url)
+        pack_url(self.model_dict, "ObjectDetection", object_detection_url)
+        pack_url(self.model_dict, "ImageSegmentation", image_seg_url)
+        pack_url(self.model_dict, "ImageClassification", image_class_url)
+        pack_url(self.model_dict, "OCR", ocr_url)
 
         #SemanticModel
         for key in [
@@ -101,13 +73,17 @@ class ServingModels(object):
                 key] = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/" + key + ".tar.gz"
 
     def get_model_list(self):
-        return (self.model_dict.keys())
+        return self.model_dict
 
     def download(self, model_name):
-        if model_name in self.model_dict:
-            url = self.model_dict[model_name]
+        if model_name in self.url_dict:
+            url = self.url_dict[model_name]
             r = os.system('wget ' + url + ' --no-check-certificate')
 
+    def get_tutorial(self, model_name):
+        if model_name in self.tutorial_url:
+            return "Tutorial of {} to be added".format(model_name)
+
 
 if __name__ == "__main__":
     models = ServingModels()
diff --git a/python/paddle_serving_app/package.py b/python/paddle_serving_app/package.py
index 98e42f365397e6ecae5171c47eb1cfabee182a7d..250ee99f5130736945a6b77eb4d0bf5a2074a703 100644
--- a/python/paddle_serving_app/package.py
+++ b/python/paddle_serving_app/package.py
@@ -20,6 +20,7 @@ Usage:
 """
 
 import argparse
+import sys
 from .models import ServingModels
 
 
@@ -29,6 +30,8 @@ def parse_args():  # pylint: disable=doc-string-missing
         "--get_model", type=str, default="", help="Download a specific model")
     parser.add_argument(
         '--list_model', nargs='*', default=None, help="List Models")
+    parser.add_argument(
+        '--tutorial', type=str, default="", help="Get running command")
     return parser.parse_args()
 
 
@@ -36,25 +39,40 @@ if __name__ == "__main__":
     args = parse_args()
     if args.list_model != None:
         model_handle = ServingModels()
-        model_names = model_handle.get_model_list()
-        for key in model_names:
-            print(key)
+        model_dict = model_handle.get_model_list()
+        # Task level model list
+        # Text Classification, Semantic Representation
+        # Image Classification, Object Detection, Image Segmentation
+        for key in model_dict:
+            print("-----------------------------------------------")
+            print("{}: {}".format(key, " | ".join(model_dict[key])))
+
     elif args.get_model != "":
         model_handle = ServingModels()
-        model_names = model_handle.get_model_list()
-        if args.get_model not in model_names:
+        model_dict = model_handle.url_dict
+        if args.get_model not in model_dict:
             print(
                 "Your model name does not exist in current model list, stay tuned"
             )
             sys.exit(0)
         model_handle.download(args.get_model)
+    elif args.tutorial != "":
+        model_handle = ServingModels()
+        model_dict = model_handle.url_dict
+        if args.get_model not in model_dict:
+            print(
+                "Your model name does not exist in current model list, stay tuned"
+            )
+            sys.exit(0)
+        tutorial_str = model_handle.get_tutorial()
+        print(tutorial_str)
     else:
         print("Wrong argument")
         print("""
               Usage:
               Download a package for serving directly
               Example:
-                   python -m paddle_serving_app.models --get senta_bilstm
+                   python -m paddle_serving_app.models --get_model senta_bilstm
                    python -m paddle_serving_app.models --list_model
               """)
         pass
diff --git a/python/paddle_serving_app/reader/__init__.py b/python/paddle_serving_app/reader/__init__.py
index 847ddc47ac89114f2012bc6b9990a69abfe39fb3..b2b5e75ac430ecf897e34ec7afc994c9ccf8ee66 100644
--- a/python/paddle_serving_app/reader/__init__.py
+++ b/python/paddle_serving_app/reader/__init__.py
@@ -11,3 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from .chinese_bert_reader import ChineseBertReader
+from .image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize
+from .image_reader import CenterCrop, Resize, Transpose, Div, RGB2BGR, BGR2RGB
+from .image_reader import RCNNPostprocess, SegPostprocess, PadStride
+from .lac_reader import LACReader
+from .senta_reader import SentaReader
+from .imdb_reader import IMDBDataset
+from .ocr_reader import OCRReader
diff --git a/python/paddle_serving_app/reader/daisy.jpg b/python/paddle_serving_app/reader/daisy.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7edeca63e5f32e68550ef720d81f59df58a8eabc
Binary files /dev/null and b/python/paddle_serving_app/reader/daisy.jpg differ
diff --git a/python/paddle_serving_app/reader/functional.py b/python/paddle_serving_app/reader/functional.py
new file mode 100644
index 0000000000000000000000000000000000000000..4240641dd99fceb278ff60a5ba1dbb5275e534aa
--- /dev/null
+++ b/python/paddle_serving_app/reader/functional.py
@@ -0,0 +1,68 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import cv2
+import numpy as np
+
+
+def transpose(img, transpose_target):
+    img = img.transpose(transpose_target)
+    return img
+
+
+def normalize(img, mean, std, channel_first):
+    # need to optimize here
+    if channel_first:
+        img_mean = np.array(mean).reshape((3, 1, 1))
+        img_std = np.array(std).reshape((3, 1, 1))
+    else:
+        img_mean = np.array(mean).reshape((1, 1, 3))
+        img_std = np.array(std).reshape((1, 1, 3))
+    img -= img_mean
+    img /= img_std
+    return img
+
+
+def crop(img, target_size, center):
+    height, width = img.shape[:2]
+    size = target_size
+    if center == True:
+        w_start = (width - size) // 2
+        h_start = (height - size) // 2
+    else:
+        w_start = np.random.randint(0, width - size + 1)
+        h_start = np.random.randint(0, height - size + 1)
+    w_end = w_start + size
+    h_end = h_start + size
+    img = img[h_start:h_end, w_start:w_end, :]
+    return img
+
+
+def resize(img, target_size, max_size=2147483647, interpolation=None):
+    if isinstance(target_size, tuple):
+        resized_width = min(target_size[0], max_size)
+        resized_height = min(target_size[1], max_size)
+    else:
+        im_max_size = max(img.shape[0], img.shape[1])
+        percent = float(target_size) / min(img.shape[0], img.shape[1])
+        if np.round(percent * im_max_size) > max_size:
+            percent = float(max_size) / float(im_max_size)
+        resized_width = int(round(img.shape[1] * percent))
+        resized_height = int(round(img.shape[0] * percent))
+    if interpolation:
+        resized = cv2.resize(
+            img, (resized_width, resized_height), interpolation=interpolation)
+    else:
+        resized = cv2.resize(img, (resized_width, resized_height))
+    return resized
diff --git a/python/paddle_serving_app/reader/image_reader.py b/python/paddle_serving_app/reader/image_reader.py
index 2647eb6fdf3ca0f1682ca794051b9d0dd95a9a07..7f4a795513447d74e7f02d7741344ccae81c7c9d 100644
--- a/python/paddle_serving_app/reader/image_reader.py
+++ b/python/paddle_serving_app/reader/image_reader.py
@@ -11,9 +11,498 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import cv2
+import os
 import numpy as np
+import base64
+import sys
+from . import functional as F
+from PIL import Image, ImageDraw
+import json
+
+_cv2_interpolation_to_str = {cv2.INTER_LINEAR: "cv2.INTER_LINEAR", None: "None"}
+py_version = sys.version_info[0]
+if py_version == 2:
+    import urllib
+else:
+    import urllib.request as urllib
+
+
+def generate_colormap(num_classes):
+    color_map = num_classes * [0, 0, 0]
+    for i in range(0, num_classes):
+        j = 0
+        lab = i
+        while lab:
+            color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j))
+            color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j))
+            color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j))
+            j += 1
+            lab >>= 3
+    color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)]
+    return color_map
+
+
+class SegPostprocess(object):
+    def __init__(self, class_num):
+        self.class_num = class_num
+
+    def __call__(self, image_with_result):
+        if "filename" not in image_with_result:
+            raise ("filename should be specified in postprocess")
+        img_name = image_with_result["filename"]
+        ori_img = cv2.imread(img_name, -1)
+        ori_shape = ori_img.shape
+        mask = None
+        for key in image_with_result:
+            if ".lod" in key or "filename" in key:
+                continue
+            mask = image_with_result[key]
+        if mask is None:
+            raise ("segment mask should be specified in postprocess")
+        mask = mask.astype("uint8")
+        mask_png = mask.reshape((512, 512, 1))
+        #score_png = mask_png[:, :, np.newaxis]
+        score_png = mask_png
+        score_png = np.concatenate([score_png] * 3, axis=2)
+        color_map = generate_colormap(self.class_num)
+        for i in range(score_png.shape[0]):
+            for j in range(score_png.shape[1]):
+                score_png[i, j] = color_map[score_png[i, j, 0]]
+        ext_pos = img_name.rfind(".")
+        img_name_fix = img_name[:ext_pos] + "_" + img_name[ext_pos + 1:]
+        mask_save_name = img_name_fix + "_mask.png"
+        cv2.imwrite(mask_save_name, mask_png, [cv2.CV_8UC1])
+        vis_result_name = img_name_fix + "_result.png"
+        result_png = score_png
+
+        result_png = cv2.resize(
+            result_png,
+            ori_shape[:2],
+            fx=0,
+            fy=0,
+            interpolation=cv2.INTER_CUBIC)
+        cv2.imwrite(vis_result_name, result_png, [cv2.CV_8UC1])
+
+
+class RCNNPostprocess(object):
+    def __init__(self, label_file, output_dir):
+        self.output_dir = output_dir
+        self.label_file = label_file
+        self.label_list = []
+        with open(label_file) as fin:
+            for line in fin:
+                self.label_list.append(line.strip())
+        self.clsid2catid = {i: i for i in range(len(self.label_list))}
+        self.catid2name = {i: name for i, name in enumerate(self.label_list)}
+
+    def _offset_to_lengths(self, lod):
+        offset = lod[0]
+        lengths = [offset[i + 1] - offset[i] for i in range(len(offset) - 1)]
+        return [lengths]
+
+    def _bbox2out(self, results, clsid2catid, is_bbox_normalized=False):
+        xywh_res = []
+        for t in results:
+            bboxes = t['bbox'][0]
+            lengths = t['bbox'][1][0]
+            if bboxes.shape == (1, 1) or bboxes is None:
+                continue
+
+            k = 0
+            for i in range(len(lengths)):
+                num = lengths[i]
+                for j in range(num):
+                    dt = bboxes[k]
+                    clsid, score, xmin, ymin, xmax, ymax = dt.tolist()
+                    catid = (clsid2catid[int(clsid)])
+
+                    if is_bbox_normalized:
+                        xmin, ymin, xmax, ymax = \
+                            self.clip_bbox([xmin, ymin, xmax, ymax])
+                        w = xmax - xmin
+                        h = ymax - ymin
+                        im_shape = t['im_shape'][0][i].tolist()
+                        im_height, im_width = int(im_shape[0]), int(im_shape[1])
+                        xmin *= im_width
+                        ymin *= im_height
+                        w *= im_width
+                        h *= im_height
+                    else:
+                        w = xmax - xmin + 1
+                        h = ymax - ymin + 1
+
+                    bbox = [xmin, ymin, w, h]
+                    coco_res = {
+                        'category_id': catid,
+                        'bbox': bbox,
+                        'score': score
+                    }
+                    xywh_res.append(coco_res)
+                    k += 1
+        return xywh_res
+
+    def _get_bbox_result(self, fetch_map, fetch_name, clsid2catid):
+        result = {}
+        is_bbox_normalized = False
+        output = fetch_map[fetch_name]
+        lod = [fetch_map[fetch_name + '.lod']]
+        lengths = self._offset_to_lengths(lod)
+        np_data = np.array(output)
+        result['bbox'] = (np_data, lengths)
+        result['im_id'] = np.array([[0]])
+
+        bbox_results = self._bbox2out([result], clsid2catid, is_bbox_normalized)
+        return bbox_results
+
+    def color_map(self, num_classes):
+        color_map = num_classes * [0, 0, 0]
+        for i in range(0, num_classes):
+            j = 0
+            lab = i
+            while lab:
+                color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j))
+                color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j))
+                color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j))
+                j += 1
+                lab >>= 3
+        color_map = np.array(color_map).reshape(-1, 3)
+        return color_map
+
+    def draw_bbox(self, image, catid2name, bboxes, threshold, color_list):
+        """
+        draw bbox on image
+        """
+        draw = ImageDraw.Draw(image)
+
+        for dt in np.array(bboxes):
+            catid, bbox, score = dt['category_id'], dt['bbox'], dt['score']
+            if score < threshold:
+                continue
+
+            xmin, ymin, w, h = bbox
+            xmax = xmin + w
+            ymax = ymin + h
+
+            color = tuple(color_list[catid])
+
+            # draw bbox
+            draw.line(
+                [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin),
+                 (xmin, ymin)],
+                width=2,
+                fill=color)
+
+            # draw label
+            text = "{} {:.2f}".format(catid2name[catid], score)
+            tw, th = draw.textsize(text)
+            draw.rectangle(
+                [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill=color)
+            draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
+
+        return image
+
+    def visualize(self, infer_img, bbox_results, catid2name, num_classes):
+        image = Image.open(infer_img).convert('RGB')
+        color_list = self.color_map(num_classes)
+        image = self.draw_bbox(image, self.catid2name, bbox_results, 0.5,
+                               color_list)
+        image_path = os.path.split(infer_img)[-1]
+        if not os.path.exists(self.output_dir):
+            os.makedirs(self.output_dir)
+        out_path = os.path.join(self.output_dir, image_path)
+        image.save(out_path, quality=95)
+
+    def __call__(self, image_with_bbox):
+        fetch_name = ""
+        for key in image_with_bbox:
+            if key == "image":
+                continue
+            if ".lod" in key:
+                continue
+            fetch_name = key
+        bbox_result = self._get_bbox_result(image_with_bbox, fetch_name,
+                                            self.clsid2catid)
+        if os.path.isdir(self.output_dir) is False:
+            os.mkdir(self.output_dir)
+        self.visualize(image_with_bbox["image"], bbox_result, self.catid2name,
+                       len(self.label_list))
+        if os.path.isdir(self.output_dir) is False:
+            os.mkdir(self.output_dir)
+        bbox_file = os.path.join(self.output_dir, 'bbox.json')
+        with open(bbox_file, 'w') as f:
+            json.dump(bbox_result, f, indent=4)
+
+    def __repr__(self):
+        return self.__class__.__name__ + "label_file: {1}, output_dir: {2}".format(
+            self.label_file, self.output_dir)
+
+
+class Sequential(object):
+    """
+    Args:
+        sequence (sequence of ``Transform`` objects): list of transforms to chain.
+
+    This API references some of the design pattern of torchvision
+    Users can simply use this API in training as well
+
+    Example:
+        >>> image_reader.Sequnece([
+        >>>     transforms.CenterCrop(10),
+        >>> ])
+    """
+
+    def __init__(self, transforms):
+        self.transforms = transforms
+
+    def __call__(self, img):
+        for t in self.transforms:
+            img = t(img)
+        return img
+
+    def __repr__(self):
+        format_string_ = self.__class__.__name__ + '('
+        for t in self.transforms:
+            format_string_ += '\n'
+            format_string_ += '    {0}'.format(t)
+        format_string_ += '\n)'
+        return format_string_
+
+
+class RGB2BGR(object):
+    def __init__(self):
+        pass
+
+    def __call__(self, img):
+        return img[:, :, ::-1]
+
+    def __repr__(self):
+        return self.__class__.__name__ + "()"
+
+
+class BGR2RGB(object):
+    def __init__(self):
+        pass
+
+    def __call__(self, img):
+        return img[:, :, ::-1]
+
+    def __repr__(self):
+        return self.__class__.__name__ + "()"
+
+
+class File2Image(object):
+    def __init__(self):
+        pass
+
+    def __call__(self, img_path):
+        if py_version == 2:
+            fin = open(img_path)
+        else:
+            fin = open(img_path, "rb")
+        sample = fin.read()
+        data = np.fromstring(sample, np.uint8)
+        img = cv2.imdecode(data, cv2.IMREAD_COLOR)
+        '''
+        img = cv2.imread(img_path, -1)
+        channels = img.shape[2]
+        ori_h = img.shape[0]
+        ori_w = img.shape[1]
+        '''
+        return img
+
+    def __repr__(self):
+        return self.__class__.__name__ + "()"
+
+
+class URL2Image(object):
+    def __init__(self):
+        pass
+
+    def __call__(self, img_url):
+        resp = urllib.urlopen(img_url)
+        sample = resp.read()
+        data = np.fromstring(sample, np.uint8)
+        img = cv2.imdecode(data, cv2.IMREAD_COLOR)
+        return img
+
+    def __repr__(self):
+        return self.__class__.__name__ + "()"
+
+
+class Base64ToImage(object):
+    def __init__(self):
+        pass
+
+    def __call__(self, img_base64):
+        img = base64.b64decode(img_base64)
+        return img
+
+    def __repr__(self):
+        return self.__class__.__name__ + "()"
+
+
+class Div(object):
+    """ divide by some float number """
+
+    def __init__(self, value):
+        self.value = value
+
+    def __call__(self, img):
+        """
+        Args:
+            img (numpy array): (int8 numpy array)
+
+        Returns:
+            img (numpy array): (float32 numpy array)
+        """
+        img = img.astype('float32') / self.value
+
+        return img
+
+    def __repr__(self):
+        return self.__class__.__name__ + "({})".format(self.value)
+
+
+class Normalize(object):
+    """Normalize a tensor image with mean and standard deviation.
+    Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform
+    will normalize each channel of the input ``torch.*Tensor`` i.e.
+    ``output[channel] = (input[channel] - mean[channel]) / std[channel]``
+
+    .. note::
+        This transform acts out of place, i.e., it does not mutate the input tensor.
+
+    Args:
+        mean (sequence): Sequence of means for each channel.
+        std (sequence): Sequence of standard deviations for each channel.
+
+    """
+
+    def __init__(self, mean, std, channel_first=False):
+        self.mean = mean
+        self.std = std
+        self.channel_first = channel_first
+
+    def __call__(self, img):
+        """
+        Args:
+            img (numpy array): (C, H, W) to be normalized.
+
+        Returns:
+            Tensor: Normalized Tensor image.
+        """
+        return F.normalize(img, self.mean, self.std, self.channel_first)
+
+    def __repr__(self):
+        return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean,
+                                                                      self.std)
+
+
+class Lambda(object):
+    """Apply a user-defined lambda as a transform.
+       Very shame to just copy from
+       https://github.com/pytorch/vision/blob/master/torchvision/transforms/transforms.py#L301
+
+    Args:
+        lambd (function): Lambda/function to be used for transform.
+    """
+
+    def __init__(self, lambd):
+        assert callable(lambd), repr(type(lambd)
+                                     .__name__) + " object is not callable"
+        self.lambd = lambd
+
+    def __call__(self, img):
+        return self.lambd(img)
+
+    def __repr__(self):
+        return self.__class__.__name__ + '()'
+
+
+class CenterCrop(object):
+    """Crops the given Image at the center.
+
+    Args:
+        size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made.
+    """
+
+    def __init__(self, size):
+        self.size = size
+
+    def __call__(self, img):
+        """
+        Args:
+            img (numpy array): Image to be cropped.
+
+        Returns:
+            numpy array Image: Cropped image.
+        """
+        return F.crop(img, self.size, True)
+
+    def __repr__(self):
+        return self.__class__.__name__ + '(size={0})'.format(self.size)
+
+
+class Resize(object):
+    """Resize the input numpy array Image to the given size.
+
+    Args:
+        size (sequence or int): Desired output size. If size is a sequence like
+            (h, w), output size will be matched to this. If size is an int,
+            smaller edge of the image will be matched to this number.
+            i.e, if height > width, then image will be rescaled to
+            (size * height / width, size)
+        interpolation (int, optional): Desired interpolation. Default is
+            ``None``
+    """
+
+    def __init__(self, size, max_size=2147483647, interpolation=None):
+        self.size = size
+        self.max_size = max_size
+        self.interpolation = interpolation
+
+    def __call__(self, img):
+        return F.resize(img, self.size, self.max_size, self.interpolation)
+
+    def __repr__(self):
+        return self.__class__.__name__ + '(size={0}, max_size={1}, interpolation={2})'.format(
+            self.size, self.max_size,
+            _cv2_interpolation_to_str[self.interpolation])
+
+
+class PadStride(object):
+    def __init__(self, stride):
+        self.coarsest_stride = stride
+
+    def __call__(self, img):
+        coarsest_stride = self.coarsest_stride
+        if coarsest_stride == 0:
+            return img
+        im_c, im_h, im_w = img.shape
+        pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
+        pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
+        padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
+        padding_im[:, :im_h, :im_w] = img
+        im_info = {}
+        im_info['resize_shape'] = padding_im.shape[1:]
+        return padding_im
+
+
+class Transpose(object):
+    def __init__(self, transpose_target):
+        self.transpose_target = transpose_target
+
+    def __call__(self, img):
+        return F.transpose(img, self.transpose_target)
+        return img
+
+    def __repr__(self):
+        format_string = self.__class__.__name__ + \
+                        "({})".format(self.transpose_target)
+        return format_string
 
 
 class ImageReader():
diff --git a/python/paddle_serving_app/reader/imdb_reader.py b/python/paddle_serving_app/reader/imdb_reader.py
new file mode 100644
index 0000000000000000000000000000000000000000..a4ef3e163a50b0dc244ac2653df1e38d7f91699b
--- /dev/null
+++ b/python/paddle_serving_app/reader/imdb_reader.py
@@ -0,0 +1,92 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+
+import sys
+import os
+import paddle
+import re
+import paddle.fluid.incubate.data_generator as dg
+
+py_version = sys.version_info[0]
+
+
+class IMDBDataset(dg.MultiSlotDataGenerator):
+    def load_resource(self, dictfile):
+        self._vocab = {}
+        wid = 0
+        if py_version == 2:
+            with open(dictfile) as f:
+                for line in f:
+                    self._vocab[line.strip()] = wid
+                    wid += 1
+        else:
+            with open(dictfile, encoding="utf-8") as f:
+                for line in f:
+                    self._vocab[line.strip()] = wid
+                    wid += 1
+        self._unk_id = len(self._vocab)
+        self._pattern = re.compile(r'(;|,|\.|\?|!|\s|\(|\))')
+        self.return_value = ("words", [1, 2, 3, 4, 5, 6]), ("label", [0])
+
+    def get_words_only(self, line):
+        sent = line.lower().replace("<br />", " ").strip()
+        words = [x for x in self._pattern.split(sent) if x and x != " "]
+        feas = [
+            self._vocab[x] if x in self._vocab else self._unk_id for x in words
+        ]
+        return feas
+
+    def get_words_and_label(self, line):
+        send = '|'.join(line.split('|')[:-1]).lower().replace("<br />",
+                                                              " ").strip()
+        label = [int(line.split('|')[-1])]
+
+        words = [x for x in self._pattern.split(send) if x and x != " "]
+        feas = [
+            self._vocab[x] if x in self._vocab else self._unk_id for x in words
+        ]
+        return feas, label
+
+    def infer_reader(self, infer_filelist, batch, buf_size):
+        def local_iter():
+            for fname in infer_filelist:
+                with open(fname, "r") as fin:
+                    for line in fin:
+                        feas, label = self.get_words_and_label(line)
+                        yield feas, label
+
+        import paddle
+        batch_iter = paddle.batch(
+            paddle.reader.shuffle(
+                local_iter, buf_size=buf_size),
+            batch_size=batch)
+        return batch_iter
+
+    def generate_sample(self, line):
+        def memory_iter():
+            for i in range(1000):
+                yield self.return_value
+
+        def data_iter():
+            feas, label = self.get_words_and_label(line)
+            yield ("words", feas), ("label", label)
+
+        return data_iter
+
+
+if __name__ == "__main__":
+    imdb = IMDBDataset()
+    imdb.load_resource("imdb.vocab")
+    imdb.run_from_stdin()
diff --git a/python/paddle_serving_app/reader/lac_reader.py b/python/paddle_serving_app/reader/lac_reader.py
index a0ed0bbe44460993649675f627310e1a7b53c344..8f7d79a6a1e7ce8c4c86b689e2856eea6fa42158 100644
--- a/python/paddle_serving_app/reader/lac_reader.py
+++ b/python/paddle_serving_app/reader/lac_reader.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle_serving_client import Client
 import sys
-reload(sys)
-sys.setdefaultencoding('utf-8')
+py_version = sys.version_info[0]
+if py_version == 2:
+    reload(sys)
+    sys.setdefaultencoding('utf-8')
 import os
 import io
 
@@ -47,10 +48,16 @@ def load_kv_dict(dict_path,
 class LACReader(object):
     """data reader"""
 
-    def __init__(self, dict_folder):
+    def __init__(self, dict_folder=""):
         # read dict
         #basepath = os.path.abspath(__file__)
         #folder = os.path.dirname(basepath)
+        if dict_folder == "":
+            dict_folder = "lac_dict"
+            if not os.path.exists(dict_folder):
+                r = os.system(
+                    "wget https://paddle-serving.bj.bcebos.com/reader/lac/lac_dict.tar.gz  --no-check-certificate && tar -xzvf lac_dict.tar.gz"
+                )
         word_dict_path = os.path.join(dict_folder, "word.dic")
         label_dict_path = os.path.join(dict_folder, "tag.dic")
         replace_dict_path = os.path.join(dict_folder, "q2b.dic")
@@ -104,6 +111,10 @@ class LACReader(object):
         return word_ids
 
     def parse_result(self, words, crf_decode):
+        try:
+            words = unicode(words, "utf-8")
+        except:
+            pass
         tags = [self.id2label_dict[str(x[0])] for x in crf_decode]
 
         sent_out = []
diff --git a/python/paddle_serving_app/reader/ocr_reader.py b/python/paddle_serving_app/reader/ocr_reader.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5dc88482bd5e0a7a26873fd5cb60c43dc5104c9
--- /dev/null
+++ b/python/paddle_serving_app/reader/ocr_reader.py
@@ -0,0 +1,203 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import cv2
+import copy
+import numpy as np
+import math
+import re
+import sys
+import argparse
+from paddle_serving_app.reader import Sequential, Resize, Transpose, Div, Normalize
+
+
+class CharacterOps(object):
+    """ Convert between text-label and text-index """
+
+    def __init__(self, config):
+        self.character_type = config['character_type']
+        self.loss_type = config['loss_type']
+        if self.character_type == "en":
+            self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
+            dict_character = list(self.character_str)
+        elif self.character_type == "ch":
+            character_dict_path = config['character_dict_path']
+            self.character_str = ""
+            with open(character_dict_path, "rb") as fin:
+                lines = fin.readlines()
+                for line in lines:
+                    line = line.decode('utf-8').strip("\n").strip("\r\n")
+                    self.character_str += line
+            dict_character = list(self.character_str)
+        elif self.character_type == "en_sensitive":
+            # same with ASTER setting (use 94 char).
+            self.character_str = string.printable[:-6]
+            dict_character = list(self.character_str)
+        else:
+            self.character_str = None
+        assert self.character_str is not None, \
+            "Nonsupport type of the character: {}".format(self.character_str)
+        self.beg_str = "sos"
+        self.end_str = "eos"
+        if self.loss_type == "attention":
+            dict_character = [self.beg_str, self.end_str] + dict_character
+        self.dict = {}
+        for i, char in enumerate(dict_character):
+            self.dict[char] = i
+        self.character = dict_character
+
+    def encode(self, text):
+        """convert text-label into text-index.
+        input:
+            text: text labels of each image. [batch_size]
+
+        output:
+            text: concatenated text index for CTCLoss.
+                    [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)]
+            length: length of each text. [batch_size]
+        """
+        if self.character_type == "en":
+            text = text.lower()
+
+        text_list = []
+        for char in text:
+            if char not in self.dict:
+                continue
+            text_list.append(self.dict[char])
+        text = np.array(text_list)
+        return text
+
+    def decode(self, text_index, is_remove_duplicate=False):
+        """ convert text-index into text-label. """
+        char_list = []
+        char_num = self.get_char_num()
+
+        if self.loss_type == "attention":
+            beg_idx = self.get_beg_end_flag_idx("beg")
+            end_idx = self.get_beg_end_flag_idx("end")
+            ignored_tokens = [beg_idx, end_idx]
+        else:
+            ignored_tokens = [char_num]
+
+        for idx in range(len(text_index)):
+            if text_index[idx] in ignored_tokens:
+                continue
+            if is_remove_duplicate:
+                if idx > 0 and text_index[idx - 1] == text_index[idx]:
+                    continue
+            char_list.append(self.character[text_index[idx]])
+        text = ''.join(char_list)
+        return text
+
+    def get_char_num(self):
+        return len(self.character)
+
+    def get_beg_end_flag_idx(self, beg_or_end):
+        if self.loss_type == "attention":
+            if beg_or_end == "beg":
+                idx = np.array(self.dict[self.beg_str])
+            elif beg_or_end == "end":
+                idx = np.array(self.dict[self.end_str])
+            else:
+                assert False, "Unsupport type %s in get_beg_end_flag_idx"\
+                    % beg_or_end
+            return idx
+        else:
+            err = "error in get_beg_end_flag_idx when using the loss %s"\
+                % (self.loss_type)
+            assert False, err
+
+
+class OCRReader(object):
+    def __init__(self):
+        args = self.parse_args()
+        image_shape = [int(v) for v in args.rec_image_shape.split(",")]
+        self.rec_image_shape = image_shape
+        self.character_type = args.rec_char_type
+        self.rec_batch_num = args.rec_batch_num
+        char_ops_params = {}
+        char_ops_params["character_type"] = args.rec_char_type
+        char_ops_params["character_dict_path"] = args.rec_char_dict_path
+        char_ops_params['loss_type'] = 'ctc'
+        self.char_ops = CharacterOps(char_ops_params)
+
+    def parse_args(self):
+        parser = argparse.ArgumentParser()
+        parser.add_argument("--rec_algorithm", type=str, default='CRNN')
+        parser.add_argument("--rec_model_dir", type=str)
+        parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
+        parser.add_argument("--rec_char_type", type=str, default='ch')
+        parser.add_argument("--rec_batch_num", type=int, default=1)
+        parser.add_argument(
+            "--rec_char_dict_path", type=str, default="./ppocr_keys_v1.txt")
+        return parser.parse_args()
+
+    def resize_norm_img(self, img, max_wh_ratio):
+        imgC, imgH, imgW = self.rec_image_shape
+        if self.character_type == "ch":
+            imgW = int(32 * max_wh_ratio)
+        h = img.shape[0]
+        w = img.shape[1]
+        ratio = w / float(h)
+        if math.ceil(imgH * ratio) > imgW:
+            resized_w = imgW
+        else:
+            resized_w = int(math.ceil(imgH * ratio))
+
+        seq = Sequential([
+            Resize(imgH, resized_w), Transpose((2, 0, 1)), Div(255),
+            Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5], True)
+        ])
+        resized_image = seq(img)
+        padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
+        padding_im[:, :, 0:resized_w] = resized_image
+
+        return padding_im
+
+    def preprocess(self, img_list):
+        img_num = len(img_list)
+        norm_img_batch = []
+        max_wh_ratio = 0
+        for ino in range(img_num):
+            h, w = img_list[ino].shape[0:2]
+            wh_ratio = w * 1.0 / h
+            max_wh_ratio = max(max_wh_ratio, wh_ratio)
+        for ino in range(img_num):
+            norm_img = self.resize_norm_img(img_list[ino], max_wh_ratio)
+            norm_img = norm_img[np.newaxis, :]
+            norm_img_batch.append(norm_img)
+        norm_img_batch = np.concatenate(norm_img_batch)
+        norm_img_batch = norm_img_batch.copy()
+
+        return norm_img_batch[0]
+
+    def postprocess(self, outputs):
+        rec_res = []
+        rec_idx_lod = outputs["ctc_greedy_decoder_0.tmp_0.lod"]
+        predict_lod = outputs["softmax_0.tmp_0.lod"]
+        rec_idx_batch = outputs["ctc_greedy_decoder_0.tmp_0"]
+        for rno in range(len(rec_idx_lod) - 1):
+            beg = rec_idx_lod[rno]
+            end = rec_idx_lod[rno + 1]
+            rec_idx_tmp = rec_idx_batch[beg:end, 0]
+            preds_text = self.char_ops.decode(rec_idx_tmp)
+            beg = predict_lod[rno]
+            end = predict_lod[rno + 1]
+            probs = outputs["softmax_0.tmp_0"][beg:end, :]
+            ind = np.argmax(probs, axis=1)
+            blank = probs.shape[1]
+            valid_ind = np.where(ind != (blank - 1))[0]
+            score = np.mean(probs[valid_ind, ind[valid_ind]])
+            rec_res.append([preds_text, score])
+        return rec_res
diff --git a/python/paddle_serving_app/reader/senta_reader.py b/python/paddle_serving_app/reader/senta_reader.py
index 6e608b822fbb66f11288ea0080c8e264d8e5c34a..e0c93c00d1a6acb0c3d30294d40fb63b4929a639 100644
--- a/python/paddle_serving_app/reader/senta_reader.py
+++ b/python/paddle_serving_app/reader/senta_reader.py
@@ -14,10 +14,11 @@
 
 import sys
 import io
+import os
 
 
 class SentaReader():
-    def __init__(self, vocab_path, max_seq_len=20):
+    def __init__(self, vocab_path="", max_seq_len=20):
         self.max_seq_len = max_seq_len
         self.word_dict = self.load_vocab(vocab_path)
 
@@ -25,6 +26,13 @@ class SentaReader():
         """
         load the given vocabulary
         """
+        if vocab_path == "":
+            vocab_path = "senta_vocab.txt"
+            if not os.path.exists(vocab_path):
+                r = os.system(
+                    " wget https://paddle-serving.bj.bcebos.com/reader/senta/senta_vocab.txt --no-check-certificate"
+                )
+
         vocab = {}
         with io.open(vocab_path, 'r', encoding='utf8') as f:
             for line in f:
diff --git a/python/paddle_serving_app/reader/test_image_reader.py b/python/paddle_serving_app/reader/test_image_reader.py
new file mode 100644
index 0000000000000000000000000000000000000000..f2dc52771919651f586e1e9720fe0ae8f82e8c12
--- /dev/null
+++ b/python/paddle_serving_app/reader/test_image_reader.py
@@ -0,0 +1,30 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from image_reader import File2Image
+from image_reader import URL2Image
+from image_reader import Sequential
+from image_reader import Normalize
+from image_reader import CenterCrop
+from image_reader import Resize
+
+seq = Sequential([
+    File2Image(), CenterCrop(30),
+    Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Resize((5, 5))
+])
+
+url = "daisy.jpg"
+for x in range(100):
+    img = seq(url)
+    print(img.shape)
diff --git a/python/paddle_serving_app/trace.py b/python/paddle_serving_app/trace.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a7f35b672d8d9bd7e9b8c64c5004eca7b9f6795
--- /dev/null
+++ b/python/paddle_serving_app/trace.py
@@ -0,0 +1,76 @@
+# -*- coding: utf-8 -*-
+"""
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+import json
+import sys
+import argparse
+
+
+def parse_args():
+    parser = argparse.ArgumentParser("Convert profile log to trace")
+    parser.add_argument(
+        "--profile_file",
+        type=str,
+        default="",
+        required=True,
+        help="Profile log")
+    parser.add_argument(
+        "--trace_file", type=str, default="trace", help="Trace file")
+    return parser.parse_args()
+
+
+def prase(pid_str, time_str, counter):
+    pid = pid_str.split(":")[1]
+    event_list = time_str.split(" ")
+    trace_list = []
+    for event in event_list:
+        name, ts = event.split(":")
+        name_list = name.split("_")
+        ph = "B" if (name_list[-1] == "0") else "E"
+        if len(name_list) == 2:
+            name = name_list[0]
+        else:
+            name = name_list[0] + "_" + name_list[1]
+        event_dict = {}
+        event_dict["name"] = name
+        event_dict["tid"] = 0
+        event_dict["pid"] = pid
+        event_dict["ts"] = ts
+        event_dict["ph"] = ph
+
+        trace_list.append(event_dict)
+    return trace_list
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    profile_file = args.profile_file
+    trace_file = args.trace_file
+    all_list = []
+    counter = 0
+    with open(profile_file) as f:
+        for line in f.readlines():
+            line = line.strip().split("\t")
+            if line[0] == "PROFILE":
+                trace_list = prase(line[1], line[2], counter)
+                counter += 1
+                for trace in trace_list:
+                    all_list.append(trace)
+
+    trace = json.dumps(all_list, indent=2, separators=(',', ':'))
+    with open(trace_file, "w") as f:
+        f.write(trace)
diff --git a/python/paddle_serving_app/version.py b/python/paddle_serving_app/version.py
index 80f647be56d09740adfb9d68dd47bb0b1fa2c985..c91808f95e7a5b62729eb630a3203ad42f7a5889 100644
--- a/python/paddle_serving_app/version.py
+++ b/python/paddle_serving_app/version.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Paddle Serving App version string """
-serving_app_version = "0.0.1"
+serving_app_version = "0.1.0"
diff --git a/python/paddle_serving_client/__init__.py b/python/paddle_serving_client/__init__.py
index 3380934931d5872afca81934724f72614bb64a13..f201eefc449b3aea11db6ae209d79fb6acb05173 100644
--- a/python/paddle_serving_client/__init__.py
+++ b/python/paddle_serving_client/__init__.py
@@ -21,6 +21,7 @@ import google.protobuf.text_format
 import numpy as np
 import time
 import sys
+from .serving_client import PredictorRes
 
 int_type = 0
 float_type = 1
@@ -60,13 +61,18 @@ class SDKConfig(object):
         self.tag_list = []
         self.cluster_list = []
         self.variant_weight_list = []
+        self.rpc_timeout_ms = 20000
+        self.load_balance_strategy = "la"
 
     def add_server_variant(self, tag, cluster, variant_weight):
         self.tag_list.append(tag)
         self.cluster_list.append(cluster)
         self.variant_weight_list.append(variant_weight)
 
-    def gen_desc(self):
+    def set_load_banlance_strategy(self, strategy):
+        self.load_balance_strategy = strategy
+
+    def gen_desc(self, rpc_timeout_ms):
         predictor_desc = sdk.Predictor()
         predictor_desc.name = "general_model"
         predictor_desc.service_name = \
@@ -85,7 +91,7 @@ class SDKConfig(object):
         self.sdk_desc.predictors.extend([predictor_desc])
         self.sdk_desc.default_variant_conf.tag = "default"
         self.sdk_desc.default_variant_conf.connection_conf.connect_timeout_ms = 2000
-        self.sdk_desc.default_variant_conf.connection_conf.rpc_timeout_ms = 20000
+        self.sdk_desc.default_variant_conf.connection_conf.rpc_timeout_ms = rpc_timeout_ms
         self.sdk_desc.default_variant_conf.connection_conf.connect_retry_count = 2
         self.sdk_desc.default_variant_conf.connection_conf.max_connection_per_host = 100
         self.sdk_desc.default_variant_conf.connection_conf.hedge_request_timeout_ms = -1
@@ -108,11 +114,9 @@ class Client(object):
         self.feed_names_ = []
         self.fetch_names_ = []
         self.client_handle_ = None
-        self.result_handle_ = None
         self.feed_shapes_ = {}
         self.feed_types_ = {}
         self.feed_names_to_idx_ = {}
-        self.rpath()
         self.pid = os.getpid()
         self.predictor_sdk_ = None
         self.producers = []
@@ -120,16 +124,10 @@ class Client(object):
         self.profile_ = _Profiler()
         self.all_numpy_input = True
         self.has_numpy_input = False
-
-    def rpath(self):
-        lib_path = os.path.dirname(paddle_serving_client.__file__)
-        client_path = os.path.join(lib_path, 'serving_client.so')
-        lib_path = os.path.join(lib_path, 'lib')
-        os.system('patchelf --set-rpath {} {}'.format(lib_path, client_path))
+        self.rpc_timeout_ms = 20000
 
     def load_client_config(self, path):
         from .serving_client import PredictorClient
-        from .serving_client import PredictorRes
         model_conf = m_config.GeneralModelConfig()
         f = open(path, 'r')
         model_conf = google.protobuf.text_format.Merge(
@@ -139,7 +137,6 @@ class Client(object):
         # get feed vars, fetch vars
         # get feed shapes, feed types
         # map feed names to index
-        self.result_handle_ = PredictorRes()
         self.client_handle_ = PredictorClient()
         self.client_handle_.init(path)
         if "FLAGS_max_body_size" not in os.environ:
@@ -180,13 +177,19 @@ class Client(object):
         self.predictor_sdk_.add_server_variant(tag, cluster,
                                                str(variant_weight))
 
+    def set_rpc_timeout_ms(self, rpc_timeout):
+        if not isinstance(rpc_timeout, int):
+            raise ValueError("rpc_timeout must be int type.")
+        else:
+            self.rpc_timeout_ms = rpc_timeout
+
     def connect(self, endpoints=None):
         # check whether current endpoint is available
         # init from client config
         # create predictor here
         if endpoints is None:
             if self.predictor_sdk_ is None:
-                raise SystemExit(
+                raise ValueError(
                     "You must set the endpoints parameter or use add_variant function to create a variant."
                 )
         else:
@@ -197,7 +200,7 @@ class Client(object):
                 print(
                     "parameter endpoints({}) will not take effect, because you use the add_variant function.".
                     format(endpoints))
-        sdk_desc = self.predictor_sdk_.gen_desc()
+        sdk_desc = self.predictor_sdk_.gen_desc(self.rpc_timeout_ms)
         self.client_handle_.create_predictor_by_desc(sdk_desc.SerializeToString(
         ))
 
@@ -210,9 +213,15 @@ class Client(object):
     def shape_check(self, feed, key):
         if key in self.lod_tensor_set:
             return
-        if len(feed[key]) != self.feed_tensor_len[key]:
-            raise SystemExit("The shape of feed tensor {} not match.".format(
+        if isinstance(feed[key],
+                      list) and len(feed[key]) != self.feed_tensor_len[key]:
+            raise ValueError("The shape of feed tensor {} not match.".format(
                 key))
+        if type(feed[key]).__module__ == np.__name__ and np.size(feed[
+                key]) != self.feed_tensor_len[key]:
+            #raise SystemExit("The shape of feed tensor {} not match.".format(
+            #    key))
+            pass
 
     def predict(self, feed=None, fetch=None, need_variant_tag=False):
         self.profile_.record('py_prepro_0')
@@ -261,8 +270,8 @@ class Client(object):
             for key in feed_i:
                 if key not in self.feed_names_:
                     raise ValueError("Wrong feed name: {}.".format(key))
-                if not isinstance(feed_i[key], np.ndarray):
-                    self.shape_check(feed_i, key)
+                #if not isinstance(feed_i[key], np.ndarray):
+                self.shape_check(feed_i, key)
                 if self.feed_types_[key] == int_type:
                     if i == 0:
                         int_feed_names.append(key)
@@ -271,7 +280,6 @@ class Client(object):
                         else:
                             int_shape.append(self.feed_shapes_[key])
                     if isinstance(feed_i[key], np.ndarray):
-                        #int_slot.append(np.reshape(feed_i[key], (-1)).tolist())
                         int_slot.append(feed_i[key])
                         self.has_numpy_input = True
                     else:
@@ -285,7 +293,6 @@ class Client(object):
                         else:
                             float_shape.append(self.feed_shapes_[key])
                     if isinstance(feed_i[key], np.ndarray):
-                        #float_slot.append(np.reshape(feed_i[key], (-1)).tolist())
                         float_slot.append(feed_i[key])
                         self.has_numpy_input = True
                     else:
@@ -297,17 +304,19 @@ class Client(object):
         self.profile_.record('py_prepro_1')
         self.profile_.record('py_client_infer_0')
 
-        result_batch = self.result_handle_
+        result_batch_handle = PredictorRes()
         if self.all_numpy_input:
             res = self.client_handle_.numpy_predict(
                 float_slot_batch, float_feed_names, float_shape, int_slot_batch,
-                int_feed_names, int_shape, fetch_names, result_batch, self.pid)
+                int_feed_names, int_shape, fetch_names, result_batch_handle,
+                self.pid)
         elif self.has_numpy_input == False:
             res = self.client_handle_.batch_predict(
                 float_slot_batch, float_feed_names, float_shape, int_slot_batch,
-                int_feed_names, int_shape, fetch_names, result_batch, self.pid)
+                int_feed_names, int_shape, fetch_names, result_batch_handle,
+                self.pid)
         else:
-            raise SystemExit(
+            raise ValueError(
                 "Please make sure the inputs are all in list type or all in numpy.array type"
             )
 
@@ -318,28 +327,28 @@ class Client(object):
             return None
 
         multi_result_map = []
-        model_engine_names = result_batch.get_engine_names()
+        model_engine_names = result_batch_handle.get_engine_names()
         for mi, engine_name in enumerate(model_engine_names):
             result_map = {}
             # result map needs to be a numpy array
             for i, name in enumerate(fetch_names):
                 if self.fetch_names_to_type_[name] == int_type:
-                    result_map[name] = result_batch.get_int64_by_name(mi, name)
-                    shape = result_batch.get_shape(mi, name)
-                    result_map[name] = np.array(result_map[name], dtype='int64')
+                    # result_map[name] will be py::array(numpy array)
+                    result_map[name] = result_batch_handle.get_int64_by_name(
+                        mi, name)
+                    shape = result_batch_handle.get_shape(mi, name)
                     result_map[name].shape = shape
                     if name in self.lod_tensor_set:
-                        result_map["{}.lod".format(name)] = np.array(
-                            result_batch.get_lod(mi, name))
+                        result_map["{}.lod".format(
+                            name)] = result_batch_handle.get_lod(mi, name)
                 elif self.fetch_names_to_type_[name] == float_type:
-                    result_map[name] = result_batch.get_float_by_name(mi, name)
-                    shape = result_batch.get_shape(mi, name)
-                    result_map[name] = np.array(
-                        result_map[name], dtype='float32')
+                    result_map[name] = result_batch_handle.get_float_by_name(
+                        mi, name)
+                    shape = result_batch_handle.get_shape(mi, name)
                     result_map[name].shape = shape
                     if name in self.lod_tensor_set:
-                        result_map["{}.lod".format(name)] = np.array(
-                            result_batch.get_lod(mi, name))
+                        result_map["{}.lod".format(
+                            name)] = result_batch_handle.get_lod(mi, name)
             multi_result_map.append(result_map)
         ret = None
         if len(model_engine_names) == 1:
@@ -357,7 +366,7 @@ class Client(object):
 
         # When using the A/B test, the tag of variant needs to be returned
         return ret if not need_variant_tag else [
-            ret, self.result_handle_.variant_tag()
+            ret, result_batch_handle.variant_tag()
         ]
 
     def release(self):
diff --git a/python/paddle_serving_client/io/__init__.py b/python/paddle_serving_client/io/__init__.py
index 74a6ca871b5c1e32b3c1ecbc6656c95d7c78a399..20d29e2bdfe0d2753d2f23cda028d76a3b13c699 100644
--- a/python/paddle_serving_client/io/__init__.py
+++ b/python/paddle_serving_client/io/__init__.py
@@ -33,7 +33,11 @@ def save_model(server_model_folder,
     executor = Executor(place=CPUPlace())
 
     feed_var_names = [feed_var_dict[x].name for x in feed_var_dict]
-    target_vars = list(fetch_var_dict.values())
+    target_vars = []
+    target_var_names = []
+    for key in sorted(fetch_var_dict.keys()):
+        target_vars.append(fetch_var_dict[key])
+        target_var_names.append(key)
 
     save_inference_model(
         server_model_folder,
@@ -64,7 +68,7 @@ def save_model(server_model_folder,
             feed_var.shape.extend(tmp_shape)
         config.feed_var.extend([feed_var])
 
-    for key in fetch_var_dict:
+    for key in target_var_names:
         fetch_var = model_conf.FetchVar()
         fetch_var.alias_name = key
         fetch_var.name = fetch_var_dict[key].name
@@ -103,17 +107,21 @@ def save_model(server_model_folder,
         fout.write(config.SerializeToString())
 
 
-def inference_model_to_serving(infer_model, serving_client, serving_server):
+def inference_model_to_serving(dirname,
+                               serving_server="serving_server",
+                               serving_client="serving_client",
+                               model_filename=None,
+                               params_filename=None):
     place = fluid.CPUPlace()
     exe = fluid.Executor(place)
     inference_program, feed_target_names, fetch_targets = \
-            fluid.io.load_inference_model(dirname=infer_model, executor=exe)
+            fluid.io.load_inference_model(dirname=dirname, executor=exe, model_filename=model_filename, params_filename=params_filename)
     feed_dict = {
         x: inference_program.global_block().var(x)
         for x in feed_target_names
     }
     fetch_dict = {x.name: x for x in fetch_targets}
-    save_model(serving_client, serving_server, feed_dict, fetch_dict,
+    save_model(serving_server, serving_client, feed_dict, fetch_dict,
                inference_program)
     feed_names = feed_dict.keys()
     fetch_names = fetch_dict.keys()
diff --git a/python/paddle_serving_client/utils/__init__.py b/python/paddle_serving_client/utils/__init__.py
index e1d8fe93147cbe891a9a91228ff30a81f248934d..53f40726fbf21a0607b47bb29a20aa6ff50b6221 100644
--- a/python/paddle_serving_client/utils/__init__.py
+++ b/python/paddle_serving_client/utils/__init__.py
@@ -38,7 +38,7 @@ def benchmark_args():
 
 def show_latency(latency_list):
     latency_array = np.array(latency_list)
-    info = ""
+    info = "latency:\n"
     info += "mean :{} ms\n".format(np.mean(latency_array))
     info += "median :{} ms\n".format(np.median(latency_array))
     info += "80 percent :{} ms\n".format(np.percentile(latency_array, 80))
diff --git a/python/paddle_serving_client/version.py b/python/paddle_serving_client/version.py
index 99322ee8280a66a54371b296905d54f0766b016d..5a1f35c598f044e80cff12ce661ff80a61647543 100644
--- a/python/paddle_serving_client/version.py
+++ b/python/paddle_serving_client/version.py
@@ -12,6 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Paddle Serving Client version string """
-serving_client_version = "0.2.0"
-serving_server_version = "0.2.0"
-module_proto_version = "0.2.0"
+serving_client_version = "0.3.0"
+serving_server_version = "0.3.0"
+module_proto_version = "0.3.0"
diff --git a/python/paddle_serving_server/__init__.py b/python/paddle_serving_server/__init__.py
index a58fb11ac3ee1fbe5086ae4381f6d6208c0c73ec..7356de2c2feac126272cf9a771a03146a87ef541 100644
--- a/python/paddle_serving_server/__init__.py
+++ b/python/paddle_serving_server/__init__.py
@@ -23,6 +23,7 @@ import paddle_serving_server as paddle_serving_server
 from .version import serving_server_version
 from contextlib import closing
 import collections
+import fcntl
 
 
 class OpMaker(object):
@@ -127,6 +128,7 @@ class Server(object):
         self.model_toolkit_conf = None
         self.resource_conf = None
         self.memory_optimization = False
+        self.ir_optimization = False
         self.model_conf = None
         self.workflow_fn = "workflow.prototxt"
         self.resource_fn = "resource.prototxt"
@@ -175,6 +177,9 @@ class Server(object):
     def set_memory_optimize(self, flag=False):
         self.memory_optimization = flag
 
+    def set_ir_optimize(self, flag=False):
+        self.ir_optimization = flag
+
     def check_local_bin(self):
         if "SERVING_BIN" in os.environ:
             self.use_local_bin = True
@@ -195,6 +200,7 @@ class Server(object):
             engine.enable_batch_align = 0
             engine.model_data_path = model_config_path
             engine.enable_memory_optimization = self.memory_optimization
+            engine.enable_ir_optimization = self.ir_optimization
             engine.static_optimization = False
             engine.force_update_static_cache = False
 
@@ -244,7 +250,7 @@ class Server(object):
         workflow_oi_config_path = None
         if isinstance(model_config_paths, str):
             # If there is only one model path, use the default infer_op.
-            # Because there are several infer_op type, we need to find 
+            # Because there are several infer_op type, we need to find
             # it from workflow_conf.
             default_engine_names = [
                 'general_infer_0', 'general_dist_kv_infer_0',
@@ -269,7 +275,8 @@ class Server(object):
                 self.model_config_paths[node.name] = path
             print("You have specified multiple model paths, please ensure "
                   "that the input and output of multiple models are the same.")
-            workflow_oi_config_path = self.model_config_paths.items()[0][1]
+            workflow_oi_config_path = list(self.model_config_paths.items())[0][
+                1]
         else:
             raise Exception("The type of model_config_paths must be str or "
                             "dict({op: model_path}), not {}.".format(
@@ -284,8 +291,8 @@ class Server(object):
         # check config here
         # print config here
 
-    def use_mkl(self):
-        self.mkl_flag = True
+    def use_mkl(self, flag):
+        self.mkl_flag = flag
 
     def get_device_version(self):
         avx_flag = False
@@ -300,6 +307,10 @@ class Server(object):
             else:
                 device_version = "serving-cpu-avx-openblas-"
         else:
+            if mkl_flag:
+                print(
+                    "Your CPU does not support AVX, server will running with noavx-openblas mode."
+                )
             device_version = "serving-cpu-noavx-openblas-"
         return device_version
 
@@ -312,6 +323,10 @@ class Server(object):
         bin_url = "https://paddle-serving.bj.bcebos.com/bin/" + tar_name
         self.server_path = os.path.join(self.module_path, floder_name)
 
+        #acquire lock
+        version_file = open("{}/version.py".format(self.module_path), "r")
+        fcntl.flock(version_file, fcntl.LOCK_EX)
+
         if not os.path.exists(self.server_path):
             print('Frist time run, downloading PaddleServing components ...')
             r = os.system('wget ' + bin_url + ' --no-check-certificate')
@@ -335,6 +350,8 @@ class Server(object):
                         foemat(self.module_path))
                 finally:
                     os.remove(tar_name)
+        #release lock
+        version_file.close()
         os.chdir(self.cur_path)
         self.bin_path = self.server_path + "/serving"
 
diff --git a/python/paddle_serving_server/monitor.py b/python/paddle_serving_server/monitor.py
index 3f1ff6436917b8ae7ff4ea06fcae1f55bd65e887..84146039c40794436030a8c5c6ba9d18ccbfda06 100644
--- a/python/paddle_serving_server/monitor.py
+++ b/python/paddle_serving_server/monitor.py
@@ -20,7 +20,7 @@ Usage:
 import os
 import time
 import argparse
-import commands
+import subprocess
 import datetime
 import shutil
 import tarfile
@@ -209,7 +209,7 @@ class HadoopMonitor(Monitor):
         remote_filepath = os.path.join(path, filename)
         cmd = '{} -ls {} 2>/dev/null'.format(self._cmd_prefix, remote_filepath)
         _LOGGER.debug('check cmd: {}'.format(cmd))
-        [status, output] = commands.getstatusoutput(cmd)
+        [status, output] = subprocess.getstatusoutput(cmd)
         _LOGGER.debug('resp: {}'.format(output))
         if status == 0:
             [_, _, _, _, _, mdate, mtime, _] = output.split('\n')[-1].split()
diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py
index 395177a8c77e5c608c2e0364b1d43ac534172d66..894b0c5b132845cbde589982e1fb471f028e820b 100644
--- a/python/paddle_serving_server/serve.py
+++ b/python/paddle_serving_server/serve.py
@@ -41,6 +41,9 @@ def parse_args():  # pylint: disable=doc-string-missing
         "--device", type=str, default="cpu", help="Type of device")
     parser.add_argument(
         "--mem_optim", type=bool, default=False, help="Memory optimize")
+    parser.add_argument(
+        "--ir_optim", type=bool, default=False, help="Graph optimize")
+    parser.add_argument("--use_mkl", type=bool, default=False, help="Use MKL")
     parser.add_argument(
         "--max_body_size",
         type=int,
@@ -57,7 +60,9 @@ def start_standard_model():  # pylint: disable=doc-string-missing
     workdir = args.workdir
     device = args.device
     mem_optim = args.mem_optim
+    ir_optim = args.ir_optim
     max_body_size = args.max_body_size
+    use_mkl = args.use_mkl
 
     if model == "":
         print("You must specify your serving model")
@@ -78,6 +83,8 @@ def start_standard_model():  # pylint: disable=doc-string-missing
     server.set_op_sequence(op_seq_maker.get_op_sequence())
     server.set_num_threads(thread_num)
     server.set_memory_optimize(mem_optim)
+    server.set_ir_optimize(ir_optim)
+    server.use_mkl(use_mkl)
     server.set_max_body_size(max_body_size)
     server.set_port(port)
 
@@ -96,7 +103,7 @@ if __name__ == "__main__":
         service.load_model_config(args.model)
         service.prepare_server(
             workdir=args.workdir, port=args.port, device=args.device)
-        service.run_server()
+        service.run_rpc_service()
 
         app_instance = Flask(__name__)
 
diff --git a/python/paddle_serving_server/version.py b/python/paddle_serving_server/version.py
index 99322ee8280a66a54371b296905d54f0766b016d..5a1f35c598f044e80cff12ce661ff80a61647543 100644
--- a/python/paddle_serving_server/version.py
+++ b/python/paddle_serving_server/version.py
@@ -12,6 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Paddle Serving Client version string """
-serving_client_version = "0.2.0"
-serving_server_version = "0.2.0"
-module_proto_version = "0.2.0"
+serving_client_version = "0.3.0"
+serving_server_version = "0.3.0"
+module_proto_version = "0.3.0"
diff --git a/python/paddle_serving_server/web_service.py b/python/paddle_serving_server/web_service.py
index 7e69b241f50255aa69d34c1405b72eacb675be04..b3fcc1b880fcbffa1da884e4b68350c1870997c1 100755
--- a/python/paddle_serving_server/web_service.py
+++ b/python/paddle_serving_server/web_service.py
@@ -86,13 +86,13 @@ class WebService(object):
             for key in fetch_map:
                 fetch_map[key] = fetch_map[key].tolist()
             fetch_map = self.postprocess(
-                feed=feed, fetch=fetch, fetch_map=fetch_map)
+                feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map)
             result = {"result": fetch_map}
         except ValueError:
             result = {"result": "Request Value Error"}
         return result
 
-    def run_server(self):
+    def run_rpc_service(self):
         import socket
         localIP = socket.gethostbyname(socket.gethostname())
         print("web service address:")
@@ -101,7 +101,6 @@ class WebService(object):
         p_rpc = Process(target=self._launch_rpc_service)
         p_rpc.start()
 
-    def run_flask(self):
         app_instance = Flask(__name__)
 
         @app_instance.before_first_request
@@ -114,10 +113,16 @@ class WebService(object):
         def run():
             return self.get_prediction(request)
 
-        app_instance.run(host="0.0.0.0",
-                         port=self.port,
-                         threaded=False,
-                         processes=4)
+        self.app_instance = app_instance
+
+    def run_web_service(self):
+        self.app_instance.run(host="0.0.0.0",
+                              port=self.port,
+                              threaded=False,
+                              processes=1)
+
+    def get_app_instance(self):
+        return self.app_instance
 
     def preprocess(self, feed=[], fetch=[]):
         return feed, fetch
diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py
index 5fa4f010f2112bd400b81ba2f616e4ebe963a810..e40c0fa48763eaa66373e9f2149552c4f8693eb7 100644
--- a/python/paddle_serving_server_gpu/__init__.py
+++ b/python/paddle_serving_server_gpu/__init__.py
@@ -25,6 +25,7 @@ from .version import serving_server_version
 from contextlib import closing
 import argparse
 import collections
+import fcntl
 
 
 def serve_args():
@@ -47,6 +48,8 @@ def serve_args():
         "--name", type=str, default="None", help="Default service name")
     parser.add_argument(
         "--mem_optim", type=bool, default=False, help="Memory optimize")
+    parser.add_argument(
+        "--ir_optim", type=bool, default=False, help="Graph optimize")
     parser.add_argument(
         "--max_body_size",
         type=int,
@@ -156,6 +159,7 @@ class Server(object):
         self.model_toolkit_conf = None
         self.resource_conf = None
         self.memory_optimization = False
+        self.ir_optimization = False
         self.model_conf = None
         self.workflow_fn = "workflow.prototxt"
         self.resource_fn = "resource.prototxt"
@@ -204,6 +208,9 @@ class Server(object):
     def set_memory_optimize(self, flag=False):
         self.memory_optimization = flag
 
+    def set_ir_optimize(self, flag=False):
+        self.ir_optimization = flag
+
     def check_local_bin(self):
         if "SERVING_BIN" in os.environ:
             self.use_local_bin = True
@@ -240,6 +247,7 @@ class Server(object):
             engine.enable_batch_align = 0
             engine.model_data_path = model_config_path
             engine.enable_memory_optimization = self.memory_optimization
+            engine.enable_ir_optimization = self.ir_optimization
             engine.static_optimization = False
             engine.force_update_static_cache = False
 
@@ -313,7 +321,8 @@ class Server(object):
                 self.model_config_paths[node.name] = path
             print("You have specified multiple model paths, please ensure "
                   "that the input and output of multiple models are the same.")
-            workflow_oi_config_path = self.model_config_paths.items()[0][1]
+            workflow_oi_config_path = list(self.model_config_paths.items())[0][
+                1]
         else:
             raise Exception("The type of model_config_paths must be str or "
                             "dict({op: model_path}), not {}.".format(
@@ -339,6 +348,11 @@ class Server(object):
 
         download_flag = "{}/{}.is_download".format(self.module_path,
                                                    folder_name)
+
+        #acquire lock
+        version_file = open("{}/version.py".format(self.module_path), "r")
+        fcntl.flock(version_file, fcntl.LOCK_EX)
+
         if os.path.exists(download_flag):
             os.chdir(self.cur_path)
             self.bin_path = self.server_path + "/serving"
@@ -369,6 +383,8 @@ class Server(object):
                         format(self.module_path))
                 finally:
                     os.remove(tar_name)
+        #release lock
+        version_file.close()
         os.chdir(self.cur_path)
         self.bin_path = self.server_path + "/serving"
 
diff --git a/python/paddle_serving_server_gpu/monitor.py b/python/paddle_serving_server_gpu/monitor.py
index 3f1ff6436917b8ae7ff4ea06fcae1f55bd65e887..84146039c40794436030a8c5c6ba9d18ccbfda06 100644
--- a/python/paddle_serving_server_gpu/monitor.py
+++ b/python/paddle_serving_server_gpu/monitor.py
@@ -20,7 +20,7 @@ Usage:
 import os
 import time
 import argparse
-import commands
+import subprocess
 import datetime
 import shutil
 import tarfile
@@ -209,7 +209,7 @@ class HadoopMonitor(Monitor):
         remote_filepath = os.path.join(path, filename)
         cmd = '{} -ls {} 2>/dev/null'.format(self._cmd_prefix, remote_filepath)
         _LOGGER.debug('check cmd: {}'.format(cmd))
-        [status, output] = commands.getstatusoutput(cmd)
+        [status, output] = subprocess.getstatusoutput(cmd)
         _LOGGER.debug('resp: {}'.format(output))
         if status == 0:
             [_, _, _, _, _, mdate, mtime, _] = output.split('\n')[-1].split()
diff --git a/python/paddle_serving_server_gpu/serve.py b/python/paddle_serving_server_gpu/serve.py
index 512b5ec0a7d15a030afdcaa5e8daa344b29fb96e..309896a876bda5fc9b1baceb089242baa6d77dc5 100644
--- a/python/paddle_serving_server_gpu/serve.py
+++ b/python/paddle_serving_server_gpu/serve.py
@@ -35,6 +35,7 @@ def start_gpu_card_model(index, gpuid, args):  # pylint: disable=doc-string-miss
     thread_num = args.thread
     model = args.model
     mem_optim = args.mem_optim
+    ir_optim = args.ir_optim
     max_body_size = args.max_body_size
     workdir = "{}_{}".format(args.workdir, gpuid)
 
@@ -57,6 +58,7 @@ def start_gpu_card_model(index, gpuid, args):  # pylint: disable=doc-string-miss
     server.set_op_sequence(op_seq_maker.get_op_sequence())
     server.set_num_threads(thread_num)
     server.set_memory_optimize(mem_optim)
+    server.set_ir_optimize(ir_optim)
     server.set_max_body_size(max_body_size)
 
     server.load_model_config(model)
@@ -116,7 +118,7 @@ if __name__ == "__main__":
             web_service.set_gpus(gpu_ids)
         web_service.prepare_server(
             workdir=args.workdir, port=args.port, device=args.device)
-        web_service.run_server()
+        web_service.run_rpc_service()
 
         app_instance = Flask(__name__)
 
diff --git a/python/paddle_serving_server_gpu/version.py b/python/paddle_serving_server_gpu/version.py
index 99322ee8280a66a54371b296905d54f0766b016d..5a1f35c598f044e80cff12ce661ff80a61647543 100644
--- a/python/paddle_serving_server_gpu/version.py
+++ b/python/paddle_serving_server_gpu/version.py
@@ -12,6 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Paddle Serving Client version string """
-serving_client_version = "0.2.0"
-serving_server_version = "0.2.0"
-module_proto_version = "0.2.0"
+serving_client_version = "0.3.0"
+serving_server_version = "0.3.0"
+module_proto_version = "0.3.0"
diff --git a/python/paddle_serving_server_gpu/web_service.py b/python/paddle_serving_server_gpu/web_service.py
index 2ec996b1db89bdff3c4550caa566bec5af2d9506..76721de8a005dfb23fbe2427671446889aa72af1 100644
--- a/python/paddle_serving_server_gpu/web_service.py
+++ b/python/paddle_serving_server_gpu/web_service.py
@@ -131,14 +131,13 @@ class WebService(object):
             for key in fetch_map:
                 fetch_map[key] = fetch_map[key].tolist()
             result = self.postprocess(
-                feed=feed, fetch=fetch, fetch_map=fetch_map)
+                feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map)
             result = {"result": result}
-            result = {"result": fetch_map}
         except ValueError:
             result = {"result": "Request Value Error"}
         return result
 
-    def run_server(self):
+    def run_rpc_service(self):
         import socket
         localIP = socket.gethostbyname(socket.gethostname())
         print("web service address:")
@@ -151,7 +150,6 @@ class WebService(object):
         for p in server_pros:
             p.start()
 
-    def run_flask(self):
         app_instance = Flask(__name__)
 
         @app_instance.before_first_request
@@ -164,10 +162,16 @@ class WebService(object):
         def run():
             return self.get_prediction(request)
 
-        app_instance.run(host="0.0.0.0",
-                         port=self.port,
-                         threaded=False,
-                         processes=4)
+        self.app_instance = app_instance
+
+    def run_web_service(self):
+        self.app_instance.run(host="0.0.0.0",
+                              port=self.port,
+                              threaded=False,
+                              processes=1)
+
+    def get_app_instance(self):
+        return app_instance
 
     def preprocess(self, feed=[], fetch=[]):
         return feed, fetch
diff --git a/python/setup.py.app.in b/python/setup.py.app.in
index d981caa7c34ff1f84d4cdee0e64a2b03e47b7b66..77099e667e880f3f62ab4cde9d5ae3b6295d1b90 100644
--- a/python/setup.py.app.in
+++ b/python/setup.py.app.in
@@ -42,10 +42,11 @@ if '${PACK}' == 'ON':
 
 
 REQUIRED_PACKAGES = [
-    'six >= 1.10.0', 'sentencepiece'
+    'six >= 1.10.0', 'sentencepiece', 'opencv-python', 'pillow'
 ]
 
 packages=['paddle_serving_app',
+	  'paddle_serving_app.proto',
           'paddle_serving_app.reader',
 	  'paddle_serving_app.utils',
 	  'paddle_serving_app.models',
@@ -54,6 +55,8 @@ packages=['paddle_serving_app',
 package_data={}
 package_dir={'paddle_serving_app':
              '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app',
+	     'paddle_serving_app.proto':
+	     '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/proto',
              'paddle_serving_app.reader':
              '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/reader',
 	     'paddle_serving_app.utils':
diff --git a/python/setup.py.client.in b/python/setup.py.client.in
index 58061f7c887be23223f554d383c98bd75fb4828b..c46a58733a2c6ac6785e0047ab19080e92dd5695 100644
--- a/python/setup.py.client.in
+++ b/python/setup.py.client.in
@@ -26,7 +26,7 @@ from setuptools import setup
 from paddle_serving_client.version import serving_client_version
 from pkg_resources import DistributionNotFound, get_distribution
 
-py_version = sys.version_info[0]
+py_version = sys.version_info
         
 def python_version():
     return [int(v) for v in platform.python_version().split(".")]
@@ -39,7 +39,12 @@ def find_package(pkgname):
         return False
 
 def copy_lib():
-    lib_list = ['libpython2.7.so.1.0', 'libssl.so.10', 'libcrypto.so.10'] if py_version == 2 else ['libpython3.6m.so.1.0', 'libssl.so.10', 'libcrypto.so.10']
+    if py_version[0] == 2:
+        lib_list = ['libpython2.7.so.1.0', 'libssl.so.10', 'libcrypto.so.10'] 
+    elif py_version[1] == 6:
+        lib_list = ['libpython3.6m.so.1.0', 'libssl.so.10', 'libcrypto.so.10']
+    elif py_version[1] == 7:
+        lib_list = ['libpython3.7m.so.1.0', 'libssl.so.10', 'libcrypto.so.10']
     os.popen('mkdir -p paddle_serving_client/lib')
     for lib in lib_list:
         r = os.popen('whereis {}'.format(lib))
diff --git a/python/setup.py.server.in b/python/setup.py.server.in
index 97f02078806b20f41e917e0c385983a767a4df8c..a7190ecf36c194e7d486f96e1bf8e219a7600dba 100644
--- a/python/setup.py.server.in
+++ b/python/setup.py.server.in
@@ -38,12 +38,9 @@ max_version, mid_version, min_version = python_version()
 
 REQUIRED_PACKAGES = [
     'six >= 1.10.0', 'protobuf >= 3.1.0',
-    'paddle_serving_client', 'flask >= 1.1.1'
+    'paddle_serving_client', 'flask >= 1.1.1', 'paddle_serving_app'
 ]
 
-if not find_package("paddlepaddle") and not find_package("paddlepaddle-gpu"):
-    REQUIRED_PACKAGES.append("paddlepaddle")
-
 packages=['paddle_serving_server',
           'paddle_serving_server.proto']
 
diff --git a/python/setup.py.server_gpu.in b/python/setup.py.server_gpu.in
index 6a651053391b30afb71996c5073d21a5620d3320..90db7addbcd8b1929342a893c8213a48f3c8e9e3 100644
--- a/python/setup.py.server_gpu.in
+++ b/python/setup.py.server_gpu.in
@@ -38,11 +38,9 @@ max_version, mid_version, min_version = python_version()
 
 REQUIRED_PACKAGES = [
     'six >= 1.10.0', 'protobuf >= 3.1.0',
-    'paddle_serving_client', 'flask >= 1.1.1'
+    'paddle_serving_client', 'flask >= 1.1.1', 'paddle_serving_app'
 ]
 
-if not find_package("paddlepaddle") and not find_package("paddlepaddle-gpu"):
-    REQUIRED_PACKAGES.append("paddlepaddle") 
 
 packages=['paddle_serving_server_gpu',
           'paddle_serving_server_gpu.proto']
diff --git a/tools/Dockerfile b/tools/Dockerfile
index dc39adf01288f092143803557b322a0c8fbcb2b4..3c701725400350247153f828410d06cec69856f5 100644
--- a/tools/Dockerfile
+++ b/tools/Dockerfile
@@ -9,4 +9,6 @@ RUN yum -y install wget && \
     yum -y install python3 python3-devel && \
     yum clean all && \
     curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
-    python get-pip.py && rm get-pip.py
+    python get-pip.py && rm get-pip.py && \
+    localedef -c -i en_US -f UTF-8 en_US.UTF-8 && \
+    echo "export LANG=en_US.utf8" >> /root/.bashrc
diff --git a/tools/Dockerfile.centos6.devel b/tools/Dockerfile.centos6.devel
index dd519a0a08bd9fc02b7ad51d248912c2a22a811d..83981dcc4731252dfc75270b5ce6fc623a0266a8 100644
--- a/tools/Dockerfile.centos6.devel
+++ b/tools/Dockerfile.centos6.devel
@@ -21,7 +21,7 @@ RUN yum -y install wget && \
     wget https://www.python.org/ftp/python/2.7.5/Python-2.7.5.tgz && \
     tar -zxf Python-2.7.5.tgz && \
     cd Python-2.7.5 && \
-    ./configure --prefix=/usr/local/python2.7 --enable-shared && \
+    ./configure --prefix=/usr/local/python2.7 --enable-shared --enable-unicode=ucs4 && \
     make all && make install && \
     make clean && \
     echo 'export PATH=/usr/local/python2.7/bin:$PATH' >> /root/.bashrc && \
@@ -43,5 +43,7 @@ RUN yum -y install wget && \
     source /root/.bashrc && \
     cd .. && rm -rf Python-3.6.8* && \
     pip3 install google protobuf setuptools wheel flask numpy==1.16.4 && \
-    yum -y install epel-release && yum -y install patchelf && \
-    yum clean all
+    yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
+    yum clean all && \
+    localedef -c -i en_US -f UTF-8 en_US.UTF-8 && \
+    echo "export LANG=en_US.utf8" >> /root/.bashrc
diff --git a/tools/Dockerfile.centos6.gpu.devel b/tools/Dockerfile.centos6.gpu.devel
index 3288f09d4cacc8aa7fa0bd112dc6bf97939ecde5..9ee3591b9a1e2ea5881106cf7e67ca28b24c1890 100644
--- a/tools/Dockerfile.centos6.gpu.devel
+++ b/tools/Dockerfile.centos6.gpu.devel
@@ -21,7 +21,7 @@ RUN yum -y install wget && \
     wget https://www.python.org/ftp/python/2.7.5/Python-2.7.5.tgz && \
     tar -zxf Python-2.7.5.tgz && \
     cd Python-2.7.5 && \
-    ./configure --prefix=/usr/local/python2.7 --enable-shared && \
+    ./configure --prefix=/usr/local/python2.7 --enable-shared --enable-unicode=ucs4 && \
     make all && make install && \
     make clean && \
     echo 'export PATH=/usr/local/python2.7/bin:$PATH' >> /root/.bashrc && \
@@ -43,5 +43,6 @@ RUN yum -y install wget && \
     source /root/.bashrc && \
     cd .. && rm -rf Python-3.6.8* && \
     pip3 install google protobuf setuptools wheel flask numpy==1.16.4 && \
-    yum -y install epel-release && yum -y install patchelf && \
-    yum clean all
+    yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
+    yum clean all && \
+    echo "export LANG=en_US.utf8" >> /root/.bashrc
diff --git a/tools/Dockerfile.devel b/tools/Dockerfile.devel
index 6cb228f587054d5b579df0d85109d41c15c128e9..e4bcd33534cb9e887f49fcba5029619aaa1dea4c 100644
--- a/tools/Dockerfile.devel
+++ b/tools/Dockerfile.devel
@@ -20,5 +20,7 @@ RUN yum -y install wget >/dev/null \
     && rm get-pip.py \
     && yum install -y python3 python3-devel \
     && pip3 install google protobuf setuptools wheel flask \
-    && yum -y install epel-release && yum -y install patchelf \
-    && yum clean all
+    && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\
+    && yum clean all \
+    && localedef -c -i en_US -f UTF-8 en_US.UTF-8 \
+    && echo "export LANG=en_US.utf8" >> /root/.bashrc
diff --git a/tools/Dockerfile.gpu b/tools/Dockerfile.gpu
index a08bdf3daef103b5944df192fef967ebd9772b6c..2f38a3a3cd1c8987d34a81259ec9ad6ba67156a7 100644
--- a/tools/Dockerfile.gpu
+++ b/tools/Dockerfile.gpu
@@ -1,5 +1,6 @@
-FROM nvidia/cuda:9.0-cudnn7-runtime-centos7
+FROM nvidia/cuda:9.0-cudnn7-devel-centos7 as builder
 
+FROM nvidia/cuda:9.0-cudnn7-runtime-centos7
 RUN yum -y install wget && \
     yum -y install epel-release && yum -y install patchelf && \
     yum -y install gcc make python-devel && \
@@ -13,4 +14,8 @@ RUN yum -y install wget && \
     ln -s /usr/local/cuda-9.0/lib64/libcublas.so.9.0 /usr/local/cuda-9.0/lib64/libcublas.so && \
     echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' >> /root/.bashrc && \
     ln -s /usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudnn.so.7 /usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudnn.so && \
-    echo 'export LD_LIBRARY_PATH=/usr/local/cuda-9.0/targets/x86_64-linux/lib:$LD_LIBRARY_PATH' >> /root/.bashrc
+    echo 'export LD_LIBRARY_PATH=/usr/local/cuda-9.0/targets/x86_64-linux/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \
+    echo "export LANG=en_US.utf8" >> /root/.bashrc && \
+    mkdir -p /usr/local/cuda/extras
+
+COPY --from=builder /usr/local/cuda/extras/CUPTI /usr/local/cuda/extras/CUPTI
diff --git a/tools/Dockerfile.gpu.devel b/tools/Dockerfile.gpu.devel
index 8cd7a6dbbddd5e1b60b7833086aa25cd849da519..057201cefa1f8de7a105ea9b7f93e7ca9e342777 100644
--- a/tools/Dockerfile.gpu.devel
+++ b/tools/Dockerfile.gpu.devel
@@ -21,5 +21,6 @@ RUN yum -y install wget >/dev/null \
     && rm get-pip.py \
     && yum install -y python3 python3-devel \
     && pip3 install google protobuf setuptools wheel flask \
-    && yum -y install epel-release && yum -y install patchelf \
-    && yum clean all
+    && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\
+    && yum clean all \
+    && echo "export LANG=en_US.utf8" >> /root/.bashrc
diff --git a/tools/python_tag.py b/tools/python_tag.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c0fb5aa9928bb83c51df698b2f66df17793feb1
--- /dev/null
+++ b/tools/python_tag.py
@@ -0,0 +1,20 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
+import re
+with open("setup.cfg", "w") as f:
+    line = "[bdist_wheel]\npython-tag={0}{1}\nplat-name=manylinux1_x86_64".format(
+        get_abbr_impl(), get_impl_ver())
+    f.write(line)
diff --git a/tools/serving_build.sh b/tools/serving_build.sh
index a522efe19cb9f4170341f291d8c30db0e6749ad1..989e48ead9864e717e573f7f0800a1afba2e934a 100644
--- a/tools/serving_build.sh
+++ b/tools/serving_build.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-
+set -x
 function unsetproxy() {
     HTTP_PROXY_TEMP=$http_proxy
     HTTPS_PROXY_TEMP=$https_proxy
@@ -331,6 +331,76 @@ function python_test_bert() {
     cd ..
 }
 
+function python_test_multi_fetch() {
+    # pwd: /Serving/python/examples
+    local TYPT=$1
+    export SERVING_BIN=${SERVING_WORKDIR}/build-server-${TYPE}/core/general-server/serving
+    cd bert # pwd: /Serving/python/examples/bert
+    case $TYPE in
+        CPU)
+            #download model (max_seq_len=32)
+            wget https://paddle-serving.bj.bcebos.com/bert_example/bert_multi_fetch.tar.gz
+            tar -xzvf bert_multi_fetch.tar.gz
+            check_cmd "python -m paddle_serving_server.serve --model bert_seq32_model --port 9292 &"
+            sleep 5
+            check_cmd "head -n 8 data-c.txt | python test_multi_fetch_client.py"
+            kill_server_process
+            echo "bert mutli fetch RPC inference pass"
+            ;;
+        GPU)
+            #download model (max_seq_len=32)
+            wget https://paddle-serving.bj.bcebos.com/bert_example/bert_multi_fetch.tar.gz
+            tar -xzvf bert_multi_fetch.tar.gz
+            check_cmd "python -m paddle_serving_server_gpu.serve --model bert_seq32_model --port 9292 --gpu_ids 0 &"
+            sleep 5
+            check_cmd "head -n 8 data-c.txt | python test_multi_fetch_client.py"
+            kill_server_process
+            echo "bert mutli fetch RPC inference pass"
+            ;;
+        *)
+            echo "error type"
+            exit 1
+            ;;
+    esac
+    echo "test multi fetch $TYPE finished as expected."
+    unset SERVING_BIN
+    cd ..
+}
+
+function python_test_multi_process(){
+    # pwd: /Serving/python/examples
+    local TYPT=$1
+    export SERVING_BIN=${SERVING_WORKDIR}/build-server-${TYPE}/core/general-server/serving
+    cd fit_a_line # pwd: /Serving/python/examples/fit_a_line
+    sh get_data.sh
+    case $TYPE in
+        CPU)
+            check_cmd "python -m paddle_serving_server.serve --model uci_housing_model --port 9292 --workdir test9292 &"
+            check_cmd "python -m paddle_serving_server.serve --model uci_housing_model --port 9293 --workdir test9293 &"
+            sleep 5
+            check_cmd "python test_multi_process_client.py"
+            kill_server_process
+            echo "bert mutli rpc RPC inference pass"
+            ;;
+        GPU)
+            rm -rf ./image #TODO: The following code tried to create this folder, but no corresponding code was found
+            check_cmd "python -m paddle_serving_server_gpu.serve --model uci_housing_model --port 9292 --workdir test9292 --gpu_ids 0 &"
+            check_cmd "python -m paddle_serving_server_gpu.serve --model uci_housing_model --port 9293 --workdir test9293 --gpu_ids 0 &"
+            sleep 5
+            check_cmd "python test_multi_process_client.py"
+            kill_server_process
+            echo "bert mutli process RPC inference pass"
+            ;;
+        *)
+            echo "error type"
+            exit 1
+            ;;
+    esac
+    echo "test multi process $TYPE finished as expected."
+    unset SERVING_BIN
+    cd ..
+}
+
 function python_test_imdb() {
     # pwd: /Serving/python/examples
     local TYPE=$1
@@ -343,7 +413,7 @@ function python_test_imdb() {
             sleep 5
             check_cmd "head test_data/part-0 | python test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab"
             # test batch predict
-            check_cmd "python benchmark_batch.py --thread 4 --batch_size 8 --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc --endpoint 127.0.0.1:9292"
+            check_cmd "python benchmark.py --thread 4 --batch_size 8 --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc --endpoint 127.0.0.1:9292"
             echo "imdb CPU RPC inference pass"
             kill_server_process
             rm -rf work_dir1
@@ -359,7 +429,7 @@ function python_test_imdb() {
                 exit 1
             fi
             # test batch predict
-            check_cmd "python benchmark_batch.py --thread 4 --batch_size 8 --model imdb_bow_client_conf/serving_client_conf.prototxt --request http --endpoint 127.0.0.1:9292"
+            check_cmd "python benchmark.py --thread 4 --batch_size 8 --model imdb_bow_client_conf/serving_client_conf.prototxt --request http --endpoint 127.0.0.1:9292"
             setproxy # recover proxy state
             kill_server_process
             ps -ef | grep "text_classify_service.py" | grep -v grep | awk '{print $2}' | xargs kill
@@ -385,15 +455,16 @@ function python_test_lac() {
     cd lac # pwd: /Serving/python/examples/lac
     case $TYPE in
         CPU)
-            sh get_data.sh
-            check_cmd "python -m paddle_serving_server.serve --model jieba_server_model/ --port 9292 &"
+            python -m paddle_serving_app.package --get_model lac
+            tar -xzvf lac.tar.gz
+            check_cmd "python -m paddle_serving_server.serve --model lac_model/ --port 9292 &"
             sleep 5
-            check_cmd "echo \"我爱北京天安门\" | python lac_client.py jieba_client_conf/serving_client_conf.prototxt lac_dict/"
+            check_cmd "echo \"我爱北京天安门\" | python lac_client.py lac_client/serving_client_conf.prototxt "
             echo "lac CPU RPC inference pass"
             kill_server_process
 
             unsetproxy # maybe the proxy is used on iPipe, which makes web-test failed.
-            check_cmd "python lac_web_service.py jieba_server_model/ lac_workdir 9292 &"
+            check_cmd "python lac_web_service.py lac_model/ lac_workdir 9292 &"
             sleep 5
             check_cmd "curl -H \"Content-Type:application/json\" -X POST -d '{\"feed\":[{\"words\": \"我爱北京天安门\"}], \"fetch\":[\"word_seg\"]}' http://127.0.0.1:9292/lac/prediction"
             # check http code
@@ -436,7 +507,9 @@ function python_run_test() {
     python_run_criteo_ctr_with_cube $TYPE # pwd: /Serving/python/examples
     python_test_bert $TYPE # pwd: /Serving/python/examples
     python_test_imdb $TYPE # pwd: /Serving/python/examples
-    python_test_lac $TYPE
+    python_test_lac $TYPE # pwd: /Serving/python/examples
+    python_test_multi_process $TYPE # pwd: /Serving/python/examples
+    python_test_multi_fetch $TYPE # pwd: /Serving/python/examples
     echo "test python $TYPE part finished as expected."
     cd ../.. # pwd: /Serving
 }