Merge branch 'develop' into wheel-name

8d3acf3f · Jiawei Wang · GitHub · 5dd05c04 · ae32b728 · 8d3acf3f
52 changed file
--- a/README.md
+++ b/README.md
@@ -80,33 +80,38 @@ The tutorial provides
 We **highly recommend** you to **run Paddle Serving in Docker**, please visit [Run in Docker](doc/RUN_IN_DOCKER.md). See the [document](doc/DOCKER_IMAGES.md) for more docker images.
-**Attention:**: Currently, the default GPU environment of paddlepaddle 2.0 is Cuda 10.2, so the sample code of GPU Docker is based on Cuda 10.2. We also provides docker images and whl packages for other GPU environments. If users use other environments, they need to carefully check and select the appropriate version.
+**Attention:**: Currently, the default GPU environment of paddlepaddle 2.1 is Cuda 10.2, so the sample code of GPU Docker is based on Cuda 10.2. We also provides docker images and whl packages for other GPU environments. If users use other environments, they need to carefully check and select the appropriate version.
+**Attention:** the following so-called 'python' or 'pip' stands for one of Python 3.6/3.7/3.8.
 ```
 # Run CPU Docker
-docker pull registry.baidubce.com/paddlepaddle/serving:0.5.0-devel
+docker pull registry.baidubce.com/paddlepaddle/serving:0.6.0-devel
-docker run -p 9292:9292 --name test -dit registry.baidubce.com/paddlepaddle/serving:0.5.0-devel bash
+docker run -p 9292:9292 --name test -dit registry.baidubce.com/paddlepaddle/serving:0.6.0-devel bash
 docker exec -it test bash
 git clone https://github.com/PaddlePaddle/Serving
 ```
 ```
 # Run GPU Docker
-nvidia-docker pull registry.baidubce.com/paddlepaddle/serving:0.5.0-cuda10.2-cudnn8-devel
+nvidia-docker pull registry.baidubce.com/paddlepaddle/serving:0.6.0-cuda10.2-cudnn8-devel
-nvidia-docker run -p 9292:9292 --name test -dit registry.baidubce.com/paddlepaddle/serving:0.5.0-cuda10.2-cudnn8-devel bash
+nvidia-docker run -p 9292:9292 --name test -dit registry.baidubce.com/paddlepaddle/serving:0.6.0-cuda10.2-cudnn8-devel bash
 nvidia-docker exec -it test bash
 git clone https://github.com/PaddlePaddle/Serving
 ```
+install python dependencies
+```
+cd Serving
+pip install -r python/requirements.txt
+```
 ```shell
-pip install paddle-serving-client==0.5.0
+pip install paddle-serving-client==0.6.0
-pip install paddle-serving-server==0.5.0 # CPU
+pip install paddle-serving-server==0.6.0 # CPU
-pip install paddle-serving-app==0.3.0
+pip install paddle-serving-app==0.6.0
-pip install paddle-serving-server-gpu==0.5.0.post102 #GPU with CUDA10.2 + TensorRT7
+pip install paddle-serving-server-gpu==0.6.0.post102 #GPU with CUDA10.2 + TensorRT7
 # DO NOT RUN ALL COMMANDS! check your GPU env and select the right one
-pip install paddle-serving-server-gpu==0.5.0.post9 # GPU with CUDA9.0
+pip install paddle-serving-server-gpu==0.6.0.post101 # GPU with CUDA10.1 + TensorRT6
-pip install paddle-serving-server-gpu==0.5.0.post10 # GPU with CUDA10.0
+pip install paddle-serving-server-gpu==0.6.0.post11 # GPU with CUDA10.1 + TensorRT7
-pip install paddle-serving-server-gpu==0.5.0.post101 # GPU with CUDA10.1 + TensorRT6
-pip install paddle-serving-server-gpu==0.5.0.post11 # GPU with CUDA10.1 + TensorRT7
 ```
 You may need to use a domestic mirror source (in China, you can use the Tsinghua mirror source, add `-i https://pypi.tuna.tsinghua.edu.cn/simple` to pip command) to speed up the download.
@@ -115,28 +120,31 @@ If you need install modules compiled with develop branch, please download packag
 Packages of paddle-serving-server and paddle-serving-server-gpu support Centos 6/7, Ubuntu 16/18, Windows 10.
-Packages of paddle-serving-client and paddle-serving-app support Linux and Windows, but paddle-serving-client only support python2.7/3.5/3.6/3.7/3.8.
+Packages of paddle-serving-client and paddle-serving-app support Linux and Windows, but paddle-serving-client only support python3.6/3.7/3.8.
+**For latest version, Cuda 9.0 or Cuda 10.0 are no longer supported, Python2.7/3.5 is no longer supported.**
+Recommended to install paddle >= 2.1.0
-Recommended to install paddle >= 2.0.0
 ```
 # CPU users, please run
-pip install paddlepaddle==2.0.0
+pip install paddlepaddle==2.1.0
 # GPU Cuda10.2 please run
-pip install paddlepaddle-gpu==2.0.0 
+pip install paddlepaddle-gpu==2.1.0 
 ```
 **Note**: If your Cuda version is not 10.2, please do not execute the above commands directly, you need to refer to [Paddle official documentation-multi-version whl package list
 ](https://www.paddlepaddle.org.cn/documentation/docs/en/install/Tables_en.html#multi-version-whl-package-list-release)
-Select the url link of the corresponding GPU environment and install it. For example, for Python2.7 users of Cuda 9.0, please select `cp27-cp27mu` and
+Select the url link of the corresponding GPU environment and install it. For example, for Python3.6 users of Cuda 10.1, please select `cp36-cp36m` and
-The url corresponding to `cuda9.0_cudnn7-mkl`, copy it and run
+The url corresponding to `cuda10.1-cudnn7-mkl-gcc8.2-avx-trt6.0.1.5`, copy it and run
 ```
-pip install https://paddle-wheel.bj.bcebos.com/2.0.0-gpu-cuda9-cudnn7-mkl/paddlepaddle_gpu-2.0.0.post90-cp27-cp27mu-linux_x86_64.whl
+pip install https://paddle-wheel.bj.bcebos.com/with-trt/2.1.0-gpu-cuda10.1-cudnn7-mkl-gcc8.2/paddlepaddle_gpu-2.1.0.post101-cp36-cp36m-linux_x86_64.whl
 ```
-the default `paddlepaddle-gpu==2.0.0` is Cuda 10.2 with no TensorRT. If you want to install PaddlePaddle with TensorRT. please also check the documentation-multi-version whl package list and find key word `cuda10.2-cudnn8.0-trt7.1.3`. More info please check [Paddle Serving uses TensorRT](./doc/TENSOR_RT.md)
+the default `paddlepaddle-gpu==2.1.0` is Cuda 10.2 with no TensorRT. If you want to install PaddlePaddle with TensorRT. please also check the documentation-multi-version whl package list and find key word `cuda10.2-cudnn8.0-trt7.1.3`. More info please check [Paddle Serving uses TensorRT](./doc/TENSOR_RT.md)
 If it is other environment and Python version, please find the corresponding link in the table and install it with pip.
@@ -211,6 +219,34 @@ the response is
 ```
 {"result":{"price":[[18.901151657104492]]}}
 ```
+<h3 align="center">Pipeline Service</h3>
+Paddle Serving provides industry-leading multi-model tandem services, which strongly supports the actual operating business scenarios of major companies, please refer to [OCR word recognition](./python/examples/pipeline/ocr).
+we get two models
+```
+python -m paddle_serving_app.package --get_model ocr_rec
+tar -xzvf ocr_rec.tar.gz
+python -m paddle_serving_app.package --get_model ocr_det
+tar -xzvf ocr_det.tar.gz
+```
+then we start server side, launch two models as one standalone web service
+```
+python web_service.py
+```
+http request
+```
+python pipeline_http_client.py
+```
+grpc request
+```
+python pipeline_rpc_client.py
+```
+output
+```
+{'err_no': 0, 'err_msg': '', 'key': ['res'], 'value': ["['土地整治与土壤修复研究中心', '华南农业大学1素图']"]}
+```
 <h2 align="center">Document</h2>
@@ -218,8 +254,12 @@ the response is
 - [How to save a servable model?](doc/SAVE.md)
 - [Write Bert-as-Service in 10 minutes](doc/BERT_10_MINS.md)
 - [Paddle Serving Examples](python/examples)
+- [How to process natural data in Paddle Serving?(Chinese)](doc/PROCESS_DATA.md)
+- [How to process level of detail(LOD)?](doc/LOD.md)
 ### Developers
+- [How to deploy Paddle Serving on K8S?(Chinese)](doc/PADDLE_SERVING_ON_KUBERNETES.md)
+- [How to route Paddle Serving to secure endpoint?(Chinese)](doc/SERVIING_AUTH_DOCKER.md)
 - [How to develop a new Web Service?](doc/NEW_WEB_SERVICE.md)
 - [Compile from source code](doc/COMPILE.md)
 - [Develop Pipeline Serving](doc/PIPELINE_SERVING.md)
@@ -231,8 +271,7 @@ the response is
 - [How to profile Paddle Serving latency?](python/examples/util)
 - [How to optimize performance?](doc/PERFORMANCE_OPTIM.md)
 - [Deploy multi-services on one GPU(Chinese)](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
- [CPU Benchmarks(Chinese)](doc/BENCHMARKING.md)
+- [GPU Benchmarks(Chinese)](doc/BENCHMARKING_GPU.md)
- [GPU Benchmarks(Chinese)](doc/GPU_BENCHMARKING.md)
 ### Design
 - [Design Doc](doc/DESIGN_DOC.md)
@@ -253,6 +292,7 @@ If you want to contribute code to Paddle Serving, please reference [Contribution
 - Special Thanks to [@BeyondYourself](https://github.com/BeyondYourself) in complementing the gRPC tutorial, updating the FAQ doc and modifying the mdkir command
 - Special Thanks to [@mcl-stone](https://github.com/mcl-stone) in updating faster_rcnn benchmark
 - Special Thanks to [@cg82616424](https://github.com/cg82616424) in updating the unet benchmark and modifying resize comment error
+- Special Thanks to [@cuicheng01](https://github.com/cuicheng01) for providing 11 PaddleClas models
 ### Feedback

--- a/README_CN.md
+++ b/README_CN.md
@@ -81,33 +81,39 @@ Paddle Serving开发者为您提供了简单易用的[AIStudio教程-Paddle Serv
 **强烈建议**您在**Docker内构建**Paddle Serving，请查看[如何在Docker中运行PaddleServing](doc/RUN_IN_DOCKER_CN.md)。更多镜像请查看[Docker镜像列表](doc/DOCKER_IMAGES_CN.md)。
-**提示**：目前paddlepaddle 2.0版本的默认GPU环境是Cuda 10.2，因此GPU Docker的示例代码以Cuda 10.2为准。镜像和pip安装包也提供了其余GPU环境，用户如果使用其他环境，需要仔细甄别并选择合适的版本。
+**提示**：目前paddlepaddle 2.1版本的默认GPU环境是Cuda 10.2，因此GPU Docker的示例代码以Cuda 10.2为准。镜像和pip安装包也提供了其余GPU环境，用户如果使用其他环境，需要仔细甄别并选择合适的版本。
+**提示**：本项目仅支持Python3.6/3.7/3.8，接下来所有的与Python/Pip相关的操作都需要选择正确的Python版本。
 ```
 # 启动 CPU Docker
-docker pull registry.baidubce.com/paddlepaddle/serving:0.5.0-devel
+docker pull registry.baidubce.com/paddlepaddle/serving:0.6.0-devel
-docker run -p 9292:9292 --name test -dit registry.baidubce.com/paddlepaddle/serving:0.5.0-devel bash
+docker run -p 9292:9292 --name test -dit registry.baidubce.com/paddlepaddle/serving:0.6.0-devel bash
 docker exec -it test bash
 git clone https://github.com/PaddlePaddle/Serving
 ```
 ```
 # 启动 GPU Docker
-nvidia-docker pull registry.baidubce.com/paddlepaddle/serving:0.5.0-cuda10.2-cudnn8-devel
+nvidia-docker pull registry.baidubce.com/paddlepaddle/serving:0.6.0-cuda10.2-cudnn8-devel
-nvidia-docker run -p 9292:9292 --name test -dit registry.baidubce.com/paddlepaddle/serving:0.5.0-cuda10.2-cudnn8-devel bash
+nvidia-docker run -p 9292:9292 --name test -dit registry.baidubce.com/paddlepaddle/serving:0.6.0-cuda10.2-cudnn8-devel bash
 nvidia-docker exec -it test bash
 git clone https://github.com/PaddlePaddle/Serving
 ```
+安装所需的pip依赖
+```
+cd Serving
+pip install -r python/requirements.txt
+```
 ```shell
-pip install paddle-serving-client==0.5.0
+pip install paddle-serving-client==0.6.0
-pip install paddle-serving-server==0.5.0 # CPU
+pip install paddle-serving-server==0.6.0 # CPU
-pip install paddle-serving-app==0.3.0
+pip install paddle-serving-app==0.6.0
-pip install paddle-serving-server-gpu==0.5.0.post102 #GPU with CUDA10.2 + TensorRT7
+pip install paddle-serving-server-gpu==0.6.0.post102 #GPU with CUDA10.2 + TensorRT7
 # 其他GPU环境需要确认环境再选择执行哪一条
-pip install paddle-serving-server-gpu==0.5.0.post9 # GPU with CUDA9.0 
+pip install paddle-serving-server-gpu==0.6.0.post101 # GPU with CUDA10.1 + TensorRT6
-pip install paddle-serving-server-gpu==0.5.0.post10 # GPU with CUDA10.0 
+pip install paddle-serving-server-gpu==0.6.0.post11 # GPU with CUDA10.1 + TensorRT7
-pip install paddle-serving-server-gpu==0.5.0.post101 # GPU with CUDA10.1 + TensorRT6
-pip install paddle-serving-server-gpu==0.5.0.post11 # GPU with CUDA10.1 + TensorRT7
 ```
 您可能需要使用国内镜像源（例如清华源, 在pip命令中添加`-i https://pypi.tuna.tsinghua.edu.cn/simple`）来加速下载。
@@ -116,25 +122,27 @@ pip install paddle-serving-server-gpu==0.5.0.post11 # GPU with CUDA10.1 + Tensor
 paddle-serving-server和paddle-serving-server-gpu安装包支持Centos 6/7, Ubuntu 16/18和Windows 10。
-paddle-serving-client和paddle-serving-app安装包支持Linux和Windows，其中paddle-serving-client仅支持python2.7/3.5/3.6/3.7/3.8。
+paddle-serving-client和paddle-serving-app安装包支持Linux和Windows，其中paddle-serving-client仅支持python3.6/3.7/3.8。
+**最新的0.6.0的版本，已经不支持Cuda 9.0和Cuda 10.0，Python已不支持2.7和3.5。**
-推荐安装2.0.0及以上版本的paddle
+推荐安装2.1.0及以上版本的paddle
 ```
 # CPU环境请执行
-pip install paddlepaddle==2.0.0
+pip install paddlepaddle==2.1.0
 # GPU Cuda10.2环境请执行
-pip install paddlepaddle-gpu==2.0.0
+pip install paddlepaddle-gpu==2.1.0
 ```
 **注意**： 如果您的Cuda版本不是10.2，请勿直接执行上述命令，需要参考[Paddle官方文档-多版本whl包列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/install/Tables.html#whl-release)
-选择相应的GPU环境的url链接并进行安装，例如Cuda 9.0的Python2.7用户，请选择表格当中的`cp27-cp27mu`和`cuda9.0_cudnn7-mkl`对应的url，复制下来并执行
+选择相应的GPU环境的url链接并进行安装，例如Cuda 10.1的Python3.6用户，请选择表格当中的`cp36-cp36m`和`cuda10.1-cudnn7-mkl-gcc8.2-avx-trt6.0.1.5`对应的url，复制下来并执行
 ```
-pip install https://paddle-wheel.bj.bcebos.com/2.0.0-gpu-cuda9-cudnn7-mkl/paddlepaddle_gpu-2.0.0.post90-cp27-cp27mu-linux_x86_64.whl
+pip install https://paddle-wheel.bj.bcebos.com/with-trt/2.1.0-gpu-cuda10.1-cudnn7-mkl-gcc8.2/paddlepaddle_gpu-2.1.0.post101-cp36-cp36m-linux_x86_64.whl
 ```
-由于默认的`paddlepaddle-gpu==2.0.0`是Cuda 10.2，并没有联编TensorRT，因此如果需要和在`paddlepaddle-gpu`上使用TensorRT，需要在上述多版本whl包列表当中，找到`cuda10.2-cudnn8.0-trt7.1.3`，下载对应的Python版本。更多信息请参考[如何使用TensorRT?](doc/TENSOR_RT_CN.md)。
+由于默认的`paddlepaddle-gpu==2.1.0`是Cuda 10.2，并没有联编TensorRT，因此如果需要和在`paddlepaddle-gpu`上使用TensorRT，需要在上述多版本whl包列表当中，找到`cuda10.2-cudnn8.0-trt7.1.3`，下载对应的Python版本。更多信息请参考[如何使用TensorRT?](doc/TENSOR_RT_CN.md)。
 如果是其他环境和Python版本，请在表格中找到对应的链接并用pip安装。
@@ -195,8 +203,10 @@ print(fetch_map)
 ```
 在这里，`client.predict`函数具有两个参数。 `feed`是带有模型输入变量别名和值的`python dict`。 `fetch`被要从服务器返回的预测变量赋值。 在该示例中，在训练过程中保存可服务模型时，被赋值的tensor名为`"x"`和`"price"`。
 <h3 align="center">HTTP服务</h3>
-用户也可以将数据格式处理逻辑放在服务器端进行，这样就可以直接用curl去访问服务，参考如下案例，在目录`python/examples/fit_a_line`
+用户也可以将数据格式处理逻辑放在服务器端进行，这样就可以直接用curl去访问服务，参考如下案例，在目录`python/examples/fit_a_line`.
 ```
 python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 --name uci
@@ -210,17 +220,50 @@ curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1
 {"result":{"price":[[18.901151657104492]]}}
 ```
+<h3 align="center">Pipeline服务</h3>
+Paddle Serving提供业界领先的多模型串联服务，强力支持各大公司实际运行的业务场景，参考 [OCR文字识别案例](python/examples/pipeline/ocr)，在目录`python/examples/pipeline/ocr`
+我们先获取两个模型
+```
+python -m paddle_serving_app.package --get_model ocr_rec
+tar -xzvf ocr_rec.tar.gz
+python -m paddle_serving_app.package --get_model ocr_det
+tar -xzvf ocr_det.tar.gz
+```
+然后启动服务端程序，将两个串联的模型作为一个整体的服务。
+```
+python web_service.py
+```
+最终使用http的方式请求
+```
+python pipeline_http_client.py
+```
+也支持rpc的方式
+```
+python pipeline_rpc_client.py
+```
+输出
+```
+{'err_no': 0, 'err_msg': '', 'key': ['res'], 'value': ["['土地整治与土壤修复研究中心', '华南农业大学1素图']"]}
+```
 <h2 align="center">文档</h2>
 ### 新手教程
 - [怎样保存用于Paddle Serving的模型？](doc/SAVE_CN.md)
 - [十分钟构建Bert-As-Service](doc/BERT_10_MINS_CN.md)
 - [Paddle Serving示例合辑](python/examples)
+- [如何在Paddle Serving处理常见数据类型](doc/PROCESS_DATA.md)
+- [如何在Serving上处理level of details(LOD)?](doc/LOD_CN.md)
 ### 开发者教程
 - [如何开发一个新的Web Service?](doc/NEW_WEB_SERVICE_CN.md)
 - [如何编译PaddleServing?](doc/COMPILE_CN.md)
 - [如何开发Pipeline?](doc/PIPELINE_SERVING_CN.md)
+- [如何在K8S集群上部署Paddle Serving?](doc/PADDLE_SERVING_ON_KUBERNETES.md)
+- [如何在Paddle Serving上部署安全网关?](doc/SERVIING_AUTH_DOCKER.md)
+- [如何开发Pipeline?](doc/PIPELINE_SERVING_CN.md)
 - [如何使用uWSGI部署Web Service](doc/UWSGI_DEPLOY_CN.md)
 - [如何实现模型文件热加载](doc/HOT_LOADING_IN_SERVING_CN.md)
 - [如何使用TensorRT?](doc/TENSOR_RT_CN.md)
@@ -229,8 +272,7 @@ curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1
 - [如何测试Paddle Serving性能？](python/examples/util/)
 - [如何优化性能?](doc/PERFORMANCE_OPTIM_CN.md)
 - [在一张GPU上启动多个预测服务](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
- [CPU版Benchmarks](doc/BENCHMARKING.md)
+- [GPU版Benchmarks](doc/BENCHMARKING_GPU.md)
- [GPU版Benchmarks](doc/GPU_BENCHMARKING.md)
 ### 设计文档
 - [Paddle Serving设计文档](doc/DESIGN_DOC_CN.md)
@@ -251,6 +293,7 @@ curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1
 - 特别感谢 [@BeyondYourself](https://github.com/BeyondYourself) 提供grpc教程，更新FAQ教程，整理文件目录。
 - 特别感谢 [@mcl-stone](https://github.com/mcl-stone) 提供faster rcnn benchmark脚本
 - 特别感谢 [@cg82616424](https://github.com/cg82616424) 提供unet benchmark脚本和修改部分注释错误
+- 特别感谢 [@cuicheng01](https://github.com/cuicheng01) 提供PaddleClas的11个模型
 ### 反馈

--- a/doc/BENCHMARKING_GPU.md
+++ b/doc/BENCHMARKING_GPU.md
 本次提测的Serving版本，支持GPU预测，希望以此任务为例，对Paddle Serving支持GPU预测的性能给出测试数据。
-# 1. 测试环境说明
+## 1. 测试环境说明
 |          | GPU | 显存 | CPU | 内存 |
 |----------|---------|----------|----------------------------------------------|------|
 | Serving端 | 4x Tesla P4-8GB | 7611MiB | Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz 48核 | 216G |
 | Client端  | 4x Tesla P4-8GB | 7611MiB | Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz 48核 | 216G |
+使用单卡GPU，未开启TensorRT。
 模型：ResNet_v2_50
-# 2. PaddleServing-PipeLine方式
+## 2. PaddleServing-PipeLine(python)
 |model_name |thread_num |batch_size |CPU_util(%) |GPU_memory(mb) |GPU_util(%) |qps(samples/s) |total count |mean(ms) |median(ms) |80 percent(ms) |90 percent(ms) |99 percent(ms) |total cost(s) |each cost(s)|
 |:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--
 |ResNet_v2_50 |1 |1 |2.2 |3327 |17.25 |17.633658869240787 |355 |56.428481238996476 |38.646728515625 |39.496826171875 |39.98369140625 |1273.1911083984373 |20.131953477859497 |20.033540725708008|
@@ -43,8 +44,8 @@
 |ResNet_v2_50 |16 |16 |3.5 |6567 |77.8706 |186.56600081516 |248 |1332.1007946383568 |1365.2745361328125 |1399.212255859375 |1432.4037353515625 |1771.4374853515626 |21.26861262321472 |20.64799252152443|
 |ResNet_v2_50 |16 |32 |4.3 |6567 |83.6371 |201.1293408638195 |140 |2419.3400198800223 |2561.09228515625 |2616.081103515625 |2642.0835205078124 |2883.8197412109366 |22.274224042892456 |21.169659316539764|
-# 3. 竞品TensorFlow-Serving方式
+## 3. 竞品TensorFlow-Serving(C++)
-me|thread_num|batch_size|CPU_util(%)|GPU_memory(mb)|GPU_util(%)|qps(samples/s)|total count|mean(ms)|median(ms)|80 percent(ms)|90 percent(ms)|99 percent(ms)|total cost(s)|each cost(s)|
+model_name|thread_num|batch_size|CPU_util(%)|GPU_memory(mb)|GPU_util(%)|qps(samples/s)|total count|mean(ms)|median(ms)|80 percent(ms)|90 percent(ms)|99 percent(ms)|total cost(s)|each cost(s)|
 |:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|
 |ResNet_v2_50|1|1|2.1|6315|54|43.75570770301271|NaN|15.5063232421875|15.239013671875|15.387646484374999|15.971313476562498|19.846301269531253|0.22854161262512207|0.15510153770446777|
 |ResNet_v2_50|1|4|0.9|6315|89|48.117446702088664|NaN|73.0424560546875|35.518310546875|37.1490234375|74.91518554687487|379.05396972656257|0.8312993049621582|0.7305266857147217|

--- a/doc/C++DESIGN.md
+++ b/doc/C++DESIGN.md
-# Paddle Serving Design
+# C++ Serving Design
 ([简体中文](./C++DESIGN_CN.md)|English)

--- a/doc/C++DESIGN_CN.md
+++ b/doc/C++DESIGN_CN.md
-# Paddle Serving设计方案
+# C++ Serving设计方案
 (简体中文|[English](./C++DESIGN.md))

--- a/doc/COMPILE.md
+++ b/doc/COMPILE.md
@@ -16,7 +16,7 @@
 |         glibc-static         |               2.17                |
 |        openssl-devel         |              1.0.2k               |
 |         bzip2-devel          |          1.0.6 and later          |
-| python-devel / python3-devel | 2.7.5 and later / 3.6.8 and later |
+|        python3-devel         |         3.6.0 and later |
 |         sqlite-devel         |         3.7.17 and later          |
 |           patchelf           |                0.9                |
 |           libXext            |               1.3.3               |
@@ -123,6 +123,7 @@ If not in Docker environment, users can refer to the following execution methods
 export CUDA_PATH='/usr/local/cuda'
 export CUDNN_LIBRARY='/usr/local/cuda/lib64/'
 export CUDA_CUDART_LIBRARY="/usr/local/cuda/lib64/"
 export TENSORRT_LIBRARY_PATH="/usr/local/TensorRT6-cuda10.1-cudnn7/targets/x86_64-linux-gnu/"
 mkdir server-build-gpu && cd server-build-gpu

--- a/doc/COMPILE_CN.md
+++ b/doc/COMPILE_CN.md
@@ -16,7 +16,7 @@
 |         glibc-static         |               2.17                |
 |        openssl-devel         |              1.0.2k               |
 |         bzip2-devel          |          1.0.6 and later          |
-| python-devel / python3-devel | 2.7.5 and later / 3.6.8 and later |
+| python-devel / python3-devel |          3.6.0 and later          |
 |         sqlite-devel         |         3.7.17 and later          |
 |           patchelf           |                0.9                |
 |           libXext            |               1.3.3               |

--- a/doc/DOCKER_IMAGES.md
+++ b/doc/DOCKER_IMAGES.md
@@ -44,7 +44,8 @@ registry.baidubce.com/paddlepaddle/serving:latest-java
 **XPU:**
 ```
-registry.baidubce.com/paddlepaddle/serving:xpu-beta
+registry.baidubce.com/paddlepaddle/serving:xpu-arm # for arm xpu user
+registry.baidubce.com/paddlepaddle/serving:xpu-x86 # for x86 xpu user
 ```
 ## Requirements for running CUDA containers
@@ -64,9 +65,9 @@ Develop Images:
 |    CPU   | >=0.5.0 | 0.6.0-devel                 | Ubuntu 16 |  8.2.0       |
 |          | <=0.4.0 | 0.4.0-devel                  | CentOS 7  | 4.8.5       |
 | Cuda10.1 | >=0.5.0 | 0.6.0-cuda10.1-cudnn7-devel  | Ubuntu 16 |   8.2.0       |
-|          | 0.6.0   | 0.5.0-cuda10.1-cudnn7-gcc54-devel  | Ubuntu 16 |  5.4.0 |
+|          | 0.6.0   | 0.6.0-cuda10.1-cudnn7-gcc54-devel  | Ubuntu 16 |  5.4.0 |
 |          | <=0.4.0 | 0.6.0-cuda10.1-cudnn7-devel    | CentOS 7  | 4.8.5     |
-| Cuda10.2 | >=0.5.0 | 0.5.0-cuda10.2-cudnn8-devel  | Ubuntu 16 |   8.2.0       |
+| Cuda10.2 | >=0.5.0 | 0.6.0-cuda10.2-cudnn8-devel  | Ubuntu 16 |   8.2.0       |
 |          | <=0.4.0 | Nan                          | Nan       | Nan         |
 | Cuda11.0 | >=0.5.0 | 0.6.0-cuda11.0-cudnn8-devel | Ubuntu 18 |    8.2.0       |
 |          | <=0.4.0 | Nan                          | Nan       | Nan         |

--- a/doc/DOCKER_IMAGES_CN.md
+++ b/doc/DOCKER_IMAGES_CN.md
@@ -47,7 +47,8 @@ registry.baidubce.com/paddlepaddle/serving:latest-java
 **XPU镜像：**
 ```
-registry.baidubce.com/paddlepaddle/serving:xpu-beta
+registry.baidubce.com/paddlepaddle/serving:xpu-arm # for arm xpu user
+registry.baidubce.com/paddlepaddle/serving:xpu-x86 # for x86 xpu user
 ```
@@ -70,9 +71,9 @@ registry.baidubce.com/paddlepaddle/serving:xpu-beta
 |    CPU   | >=0.5.0 | 0.6.0-devel                 | Ubuntu 16 |  8.2.0       |
 |          | <=0.4.0 | 0.4.0-devel                  | CentOS 7  | 4.8.5       |
 | Cuda10.1 | >=0.5.0 | 0.6.0-cuda10.1-cudnn7-devel  | Ubuntu 16 |   8.2.0       |
-|          | 0.6.0   | 0.5.0-cuda10.1-cudnn7-gcc54-devel  | Ubuntu 16 |  5.4.0 |
+|          | 0.6.0   | 0.6.0-cuda10.1-cudnn7-gcc54-devel  | Ubuntu 16 |  5.4.0 |
 |          | <=0.4.0 | 0.6.0-cuda10.1-cudnn7-devel    | CentOS 7  | 4.8.5     |
-| Cuda10.2 | >=0.5.0 | 0.5.0-cuda10.2-cudnn8-devel  | Ubuntu 16 |   8.2.0       |
+| Cuda10.2 | >=0.5.0 | 0.6.0-cuda10.2-cudnn8-devel  | Ubuntu 16 |   8.2.0       |
 |          | <=0.4.0 | Nan                          | Nan       | Nan         |
 | Cuda11.0 | >=0.5.0 | 0.6.0-cuda11.0-cudnn8-devel | Ubuntu 18 |    8.2.0       |
 |          | <=0.4.0 | Nan                          | Nan       | Nan         |

--- a/doc/LATEST_PACKAGES.md
+++ b/doc/LATEST_PACKAGES.md
@@ -4,41 +4,18 @@
 ### Python 3
 ```
 # Compile by gcc8.2
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.0.0-py3-none-any.whl
+https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server-0.0.0-py3-none-any.whl
-```
-### Python 2
-```
-# Compile by gcc8.2
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.0.0-py2-none-any.whl
 ```
 ## GPU server
 ### Python 3
 ```
-#cuda 9.0, Compile by gcc4.8
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post9-py3-none-any.whl
-#cuda 10.0, Compile by gcc4.8
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-py3-none-any.whl
-#cuda10.1 with TensorRT 6, Compile by gcc8.2
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post101-py3-none-any.whl
-#cuda10.2 with TensorRT 7, Compile by gcc8.2
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post102-py3-none-any.whl
-#cuda11.0 with TensorRT 7 (beta), Compile by gcc8.2
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post11-py3-none-any.whl
-```
-### Python 2
-```
-#cuda 9.0, Compile by gcc4.8
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post9-py2-none-any.whl
-#cuda 10.0, Compile by gcc4.8
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-py2-none-any.whl
 #cuda10.1 with TensorRT 6, Compile by gcc8.2
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post101-py2-none-any.whl
+https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-0.0.0.post101-py3-none-any.whl
 #cuda10.2 with TensorRT 7, Compile by gcc8.2
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post102-py2-none-any.whl
+https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-0.0.0.post102-py3-none-any.whl
 #cuda11.0 with TensorRT 7 (beta), Compile by gcc8.2
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post11-py2-none-any.whl
+https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-0.0.0.post11-py3-none-any.whl
 ```
 **Tips:**  If you want to use CPU server and GPU server at the same time, you should check the gcc version,  only Cuda10.1/10.2/11 can run with CPU server owing to the same gcc version(8.2).
@@ -46,49 +23,41 @@ https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post11-
 ### Python 3.6
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.0.0-cp36-none-any.whl
+https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-0.0.0-cp36-none-any.whl
 ```
 ### Python 3.8
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.0.0-cp38-none-any.whl
+https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-0.0.0-cp38-none-any.whl
 ```
 ### Python 3.7
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.0.0-cp37-none-any.whl
+https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-0.0.0-cp37-none-any.whl
-```
-### Python 3.5
-```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.0.0-cp35-none-any.whl
-```
-### Python 2.7
-```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.0.0-cp27-none-any.whl
 ```
 ## App
 ### Python 3
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.0.0-py3-none-any.whl
+https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_app-0.0.0-py3-none-any.whl
 ```
-### Python 2
+## Baidu Kunlun user
-```
+for kunlun user who uses arm-xpu or x86-xpu can download the wheel packages as follows. Users should use the xpu-beta docker [DOCKER IMAGES](./DOCKER_IMAGES.md) 
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.0.0-py2-none-any.whl
+**We only support Python 3.6 for Kunlun Users.**
-```
-## ARM user
-for ARM user who uses [Paddle-Lite](https://github.com/PaddlePaddle/Paddle-Lite) can download the wheel packages as follows. And ARM user should use the xpu-beta docker [DOCKER IMAGES](./DOCKER_IMAGES.md) 
-**We only support Python 3.6 for Arm Users.**
 ### Wheel Package Links
+for arm kunlun user
+```
+https://paddle-serving.bj.bcebos.com/whl/xpu/0.6.0/paddle_serving_server_xpu-0.6.0.post2-cp36-cp36m-linux_aarch64.whl
+https://paddle-serving.bj.bcebos.com/whl/xpu/0.6.0/paddle_serving_client-0.6.0-cp36-cp36m-linux_aarch64.whl
+https://paddle-serving.bj.bcebos.com/whl/xpu/0.6.0/paddle_serving_app-0.6.0-cp36-cp36m-linux_aarch64.whl
 ```
-# Server 
-https://paddle-serving.bj.bcebos.com/whl/xpu/paddle_serving_server_gpu-0.0.0.postarm_xpu-py3-none-any.whl
+for x86 kunlun user
-# Client
+``` 
-https://paddle-serving.bj.bcebos.com/whl/xpu/paddle_serving_client-0.0.0-cp36-none-any.whl 
+https://paddle-serving.bj.bcebos.com/whl/xpu/0.6.0/paddle_serving_server_xpu-0.6.0.post2-cp36-cp36m-linux_x86_64.whl
-# App
+https://paddle-serving.bj.bcebos.com/whl/xpu/0.6.0/paddle_serving_client-0.6.0-cp36-cp36m-linux_x86_64.whl
-https://paddle-serving.bj.bcebos.com/whl/xpu/paddle_serving_app-0.0.0-py3-none-any.whl 
+https://paddle-serving.bj.bcebos.com/whl/xpu/0.6.0/paddle_serving_app-0.6.0-cp36-cp36m-linux_x86_64.whl
 ```
@@ -98,26 +67,22 @@ for most users, we do not need to read this section. But if you deploy your Padd
 #### Bin links
 ```
 # CPU AVX MKL
-https://paddle-serving.bj.bcebos.com/bin/serving-cpu-avx-mkl-0.0.0.tar.gz
+https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-cpu-avx-mkl-0.0.0.tar.gz
 # CPU AVX OPENBLAS
-https://paddle-serving.bj.bcebos.com/bin/serving-cpu-avx-openblas-0.0.0.tar.gz
+https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-cpu-avx-openblas-0.0.0.tar.gz
 # CPU NOAVX OPENBLAS
-https://paddle-serving.bj.bcebos.com/bin/serving-cpu-noavx-openblas-0.0.0.tar.gz
+https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-cpu-noavx-openblas-0.0.0.tar.gz
-# Cuda 9
-https://paddle-serving.bj.bcebos.com/bin/serving-gpu-cuda9-0.0.0.tar.gz
-# Cuda 10
-https://paddle-serving.bj.bcebos.com/bin/serving-gpu-cuda10-0.0.0.tar.gz
 # Cuda 10.1
-https://paddle-serving.bj.bcebos.com/bin/serving-gpu-101-0.0.0.tar.gz
+https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-101-0.0.0.tar.gz
 # Cuda 10.2
-https://paddle-serving.bj.bcebos.com/bin/serving-gpu-102-0.0.0.tar.gz
+https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-102-0.0.0.tar.gz
 # Cuda 11
-https://paddle-serving.bj.bcebos.com/bin/serving-gpu-cuda11-0.0.0.tar.gz
+https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-cuda11-0.0.0.tar.gz
 ```
 #### How to setup SERVING_BIN offline?
 - download the serving server whl package and bin package, and make sure they are for the same environment
 - download the serving client whl and serving app whl, pay attention to the Python version.
- `pip install ` the serving and `tar xf ` the binary package, then `export SERVING_BIN=$PWD/serving-gpu-cuda10-0.0.0/serving` (take Cuda 10.0 as the example)
+- `pip install ` the serving and `tar xf ` the binary package, then `export SERVING_BIN=$PWD/serving-gpu-cuda11-0.0.0/serving` (take Cuda 11 as the example)
--- a/doc/PIPELINE_SERVING_CN.md
+++ b/doc/PIPELINE_SERVING_CN.md
@@ -467,7 +467,7 @@ op:
 ### 3.4 实现Server并启动服务
-代码示例中，重点留意3个自定义Op的proprocess、postprocess处理，以及Combin Op初始化列表input_ops=[bow_op, cnn_op]，设置Combin Op的前置OP列表。
+代码示例中，重点留意3个自定义Op的preprocess、postprocess处理，以及Combin Op初始化列表input_ops=[bow_op, cnn_op]，设置Combin Op的前置OP列表。
 ```python
 from paddle_serving_server.pipeline import Op, RequestOp, ResponseOp

--- a/doc/PROCESS_DATA.md
+++ b/doc/PROCESS_DATA.md
+## Paddle Serving 数据处理
+### 综述
+Paddle Serving提供了非常灵活的pipeline web/rpc服务，因此需要一个统一的教程来指导在数据流的各个阶段，我们的自然数据（文字/图片/稀疏参数表）会以何种形式存在并且传递。本文将以pipeline web service为例。
+### pipeline客户端
+pipeline客户端只做很简单的处理，他们把自然输入转化成可以序列化的JSON字典或者是对应的protubuf bytes字段即可。
+#### 1）字符串/数字
+字符串和数字在这个阶段都以字符串的形式存在。我们以[房价预测](../python/examples/pipeline/simple_web_service)作为例子。房价预测的输入是13个维度的浮点数去描述一个住房的特征。在客户端阶段就可以直接如下所示
+```
+curl -X POST -k http://localhost:18082/uci/prediction -d '{"key": ["x"], "value": ["0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332"]}'
+```
+我们直接把13个数字当成一整个字符串，中间用逗号`,` 隔开。在这里 key所跟随的列表长度需要和 value所跟随的列表长度相等。
+同理，如果是字符串文字输入，在这个阶段不妨直接明文输入，例如Bert在这个阶段不妨可以直接写成
+```
+curl -X POST -k http://localhost:18082/bert/prediction -d '{"key": ["x"], "value": ["hello world"]}'
+```
+当然，复杂的处理也可以把这个curl转换成python语言，详情参见[Bert Pipeline示例](../python/examples/pipeline/bert). 
+#### 2）图片
+图片在Paddle的输入通常需要转换成numpy array，但是在客户端阶段，不需要转换成numpy array，因为那样比较耗费空间，在这个阶段我们用base64 string来传输就可以了，到了服务端的前处理再去解读base64转换成numpy array。详情参见[图像分类pipeline示例](../python/examples/pipeline/PaddleClas/DarkNet53/pipeline_http_client.py)，我们也贴出部分代码
+```python
+def cv2_to_base64(image):
+    return base64.b64encode(image).decode('utf8')
+if __name__ == "__main__":
+    url = "http://127.0.0.1:18080/imagenet/prediction"
+    with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
+        image_data1 = file.read()
+    image = cv2_to_base64(image_data1)
+    data = {"key": ["image"], "value": [image]}
+    for i in range(100):
+        r = requests.post(url=url, data=json.dumps(data))
+        print(r.json())
+```
+可以看出经过这样的操作，图片就可以像string一样，成为JSON或者GRPC Protobuf请求的一部分，发送到了服务端。
+## pipeline服务端前处理
+这些数据到了服务端之后，由于有一个auto batch的阶段，所以服务端程序接受到的是一个列表的python dict，列表里面的每一个dict，对应着我们从客户端发出去的请求。
+#### 1）字符串/数字
+刚才提到的房价预测示例，[服务端程序](../python/examples/pipeline/simple_web_service/web_service.py)在这里。
+```python
+    def init_op(self):
+        self.separator = ","
+        self.batch_separator = ";"
+    def preprocess(self, input_dicts, data_id, log_id):
+        (_, input_dict), = input_dicts.items() 
+        _LOGGER.error("UciOp::preprocess >>> log_id:{}, input:{}".format(
+            log_id, input_dict))
+        x_value = input_dict["x"].split(self.batch_separator)
+        x_lst = []
+        for x_val in x_value:
+            x_lst.append(
+                np.array([
+                    float(x.strip()) for x in x_val.split(self.separator)
+                ]).reshape(1, 13))
+        input_dict["x"] = np.concatenate(x_lst, axis=0)
+        proc_dict = {}
+        return input_dict, False, None, ""
+```
+可以看到我们在接收到客户端的请求（请求字典如下）
+```json
+{"key": ["x"], "value": ["0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332"]}
+```
+之后，服务端对字符串的逗号`,`做了分隔。变成了 numpy array，并且shape是[1, 13]。最终需要确保 return的input_dict就是 能够和Paddle Predictor直接做交互的字典。
+对于bert服务由于发送的已经是明文，服务端处理程序
+```python
+    def init_op(self):
+        self.reader = ChineseBertReader({
+            "vocab_file": "vocab.txt",
+            "max_seq_len": 128
+        })
+    def preprocess(self, input_dicts, data_id, log_id):
+        (_, input_dict), = input_dicts.items()
+        print("input dict", input_dict)
+        batch_size = len(input_dict.keys())
+        feed_res = []
+        for i in range(batch_size):
+            feed_dict = self.reader.process(input_dict[str(i)].encode("utf-8"))
+            for key in feed_dict.keys():
+                feed_dict[key] = np.array(feed_dict[key]).reshape(
+                    (1, len(feed_dict[key]), 1))
+            feed_res.append(feed_dict)
+        feed_dict = {}
+        for key in feed_res[0].keys():
+            feed_dict[key] = np.concatenate([x[key] for x in feed_res], axis=0)
+            print(key, feed_dict[key].shape)
+        return feed_dict, False, None, ""
+```
+就是由一个bert字典，来处理输入的明文数据，每一句话都生成 与bert seq len长度的浮点数。最终需要确保 return的input_dict就是 能够和Paddle Predictor直接做交互的字典。
+#### 2）图片处理
+图像的前处理阶段，前面提到的图像处理程序，[服务端程序](../python/examples/pipeline/PaddleClas/DarkNet53/resnet50_web_service.py)如下。
+```python
+    def init_op(self):
+        self.seq = Sequential([
+            Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
+            Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225],
+                                True)
+        ])
+        self.label_dict = {}
+        label_idx = 0
+        with open("imagenet.label") as fin:
+            for line in fin:
+                self.label_dict[label_idx] = line.strip()
+                label_idx += 1
+    def preprocess(self, input_dicts, data_id, log_id):
+        (_, input_dict), = input_dicts.items()
+        batch_size = len(input_dict.keys())
+        imgs = []
+        for key in input_dict.keys():
+            data = base64.b64decode(input_dict[key].encode('utf8'))
+            data = np.fromstring(data, np.uint8)
+            im = cv2.imdecode(data, cv2.IMREAD_COLOR)
+            img = self.seq(im)
+            imgs.append(img[np.newaxis, :].copy())
+        input_imgs = np.concatenate(imgs, axis=0)
+        return {"image": input_imgs}, False, None, ""
+```
+可以看到我们在收到请求后，先要做base64的decode，然后再做np from string 最后用opencv库imcode，才能完成图片到numpy array的转换，这个时候的数据就可以直接用于Paddle的图像前处理。
+我们最后再经过Sequential的 Resize（调整大小），CenterCrop（中央部分裁剪），RGB2BGR（颜色通道转换），Transpose（转置矩阵），Normalize（归一化），最终形成和Paddle模型输入需求相一致的numpy array。
+## pipeline服务端预测
+预测阶段和Paddle预测一样，我们在preprocess函数给到了所需的输入，就可以不需要额外添加代码，到postprocess端等待输出即可。
+## pipeline服务端后处理
+后处理阶段函数原型是`def postprocess(self, input_dicts, fetch_dict, log_id):`
+我们会获取Paddle预测返回的fetch dict，后处理通常需要这个字典信息。
+后处理的方式多种多样，例如前面的房价预测就不要后处理，预测的结果就已经给出了对房价的预测。
+图像分类需要做后处理，代码如下
+```python
+def postprocess(self, input_dicts, fetch_dict, log_id):
+        score_list = fetch_dict["prediction"]
+        result = {"label": [], "prob": []}
+        for score in score_list:
+            score = score.tolist()
+            max_score = max(score)
+            result["label"].append(self.label_dict[score.index(max_score)]
+                                   .strip().replace(",", ""))
+            result["prob"].append(max_score)
+        result["label"] = str(result["label"])
+        result["prob"] = str(result["prob"])
+        return result, None, ""
+```
+我们可以看到输出的字典只有 `prediction`的矩阵，只有通过后处理，才能得到这幅图模型判定的label（物体种类），和prob（对该物体的可信度）。
+如果是数字和字符串信息，确保return的result可被JSON序列化即可。
+通常后处理返回不再需要传输图片，如果需要传输图片，一样需要处理成base64的样子，交给客户端。
--- a/doc/WINDOWS_TUTORIAL.md
+++ b/doc/WINDOWS_TUTORIAL.md
@@ -8,7 +8,7 @@ This document guides users how to build Paddle Serving service on the Windows pl
 ### Running Paddle Serving on Native Windows System
-**Configure Python environment variables to PATH**: **We only support Python 3.5+ on Native Windows System.**. First, you need to add the directory where the Python executable program is located to the PATH. Usually in **System Properties/My Computer Properties**-**Advanced**-**Environment Variables**, click Path and add the path at the beginning. For example, `C:\Users\$USER\AppData\Local\Programs\Python\Python36`, and finally click **OK** continuously. If you enter python on Powershell, you can enter the python interactive interface, indicating that the environment variable configuration is successful.
+**Configure Python environment variables to PATH**: **We only support Python 3.6+ on Native Windows System.**. First, you need to add the directory where the Python executable program is located to the PATH. Usually in **System Properties/My Computer Properties**-**Advanced**-**Environment Variables**, click Path and add the path at the beginning. For example, `C:\Users\$USER\AppData\Local\Programs\Python\Python36`, and finally click **OK** continuously. If you enter python on Powershell, you can enter the python interactive interface, indicating that the environment variable configuration is successful.
 **Install wget**: Because all the downloads in the tutorial and the built-in model download function in `paddle_serving_app` all use the wget tool, download the binary package at the [link](http://gnuwin32.sourceforge.net/packages/wget.htm), unzip and copy it to `C:\Windows\System32`, if there is a security prompt, you need to pass it.

--- a/doc/WINDOWS_TUTORIAL_CN.md
+++ b/doc/WINDOWS_TUTORIAL_CN.md
@@ -8,7 +8,7 @@
 ### 原生Windows系统运行Paddle Serving
-**配置Python环境变量到PATH**：**目前原生Windows仅支持Python 3.5或更高版本**。首先需要将Python的可执行程序所在目录加入到PATH当中。通常在**系统属性/我的电脑属性**-**高级**-**环境变量** ，点选Path，并在开头加上路径。例如`C:\Users\$USER\AppData\Local\Programs\Python\Python36`，最后连续点击**确定** 。在Powershell上如果输入python可以进入python交互界面，说明环境变量配置成功。
+**配置Python环境变量到PATH**：**目前原生Windows仅支持Python 3.6或更高版本**。首先需要将Python的可执行程序所在目录加入到PATH当中。通常在**系统属性/我的电脑属性**-**高级**-**环境变量** ，点选Path，并在开头加上路径。例如`C:\Users\$USER\AppData\Local\Programs\Python\Python36`，最后连续点击**确定** 。在Powershell上如果输入python可以进入python交互界面，说明环境变量配置成功。
 **安装wget工具**：由于教程当中所有的下载，以及`paddle_serving_app`当中内嵌的模型下载功能，都是用到wget工具，在链接[下载wget](http://gnuwin32.sourceforge.net/packages/wget.htm)，解压后复制到`C:\Windows\System32`下，如有安全提示需要通过。

--- a/python/examples/pipeline/PaddleClas/DarkNet53/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleClas/DarkNet53/benchmark_config.yaml
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-model_name: "DarkNet53"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar"
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-runtime_device: "gpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleClas/DarkNet53/benchmark_config.yaml.template
+++ b/python/examples/pipeline/PaddleClas/DarkNet53/benchmark_config.yaml.template
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-model_name: "imagenet"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "model_url_path"
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-runtime_device: "cpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleClas/DarkNet53/benchmark_gpu.sh
+++ b/python/examples/pipeline/PaddleClas/DarkNet53/benchmark_gpu.sh
-export FLAGS_profile_pipeline=1
-alias python3="python3.7"
-modelname="imagenet"
-use_gpu=1
-gpu_id="0"
-benchmark_config_filename="benchmark_config.yaml"
-# HTTP
-ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
-sleep 3
-if [ $use_gpu -eq 1 ]; then
-  python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
-else
-  python3 benchmark.py yaml local_predictor 1 cpu
-fi
-rm -rf profile_log_$modelname
-for thread_num in 1
-do
-  for batch_size in 1
-  do
-    echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
-    rm -rf PipelineServingLogs
-    rm -rf cpu_utilization.py
-    python3 resnet50_web_service.py >web.log 2>&1 &
-    sleep 3
-    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
-    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
-    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
-    python3 benchmark.py run http $thread_num $batch_size
-    python3 cpu_utilization.py >>profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
-    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
-    ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
-    python3 benchmark.py dump benchmark.log benchmark.tmp
-    mv benchmark.tmp benchmark.log
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
-    cat benchmark.log >> profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
-    #rm -rf gpu_use.log gpu_utilization.log
-  done
-done
--- a/python/examples/pipeline/PaddleClas/HRNet_W18_C/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleClas/HRNet_W18_C/benchmark_config.yaml
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-model_name: "HRNet_W18_C"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W18_C_pretrained.tar"
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-runtime_device: "gpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleClas/HRNet_W18_C/benchmark_config.yaml.template
+++ b/python/examples/pipeline/PaddleClas/HRNet_W18_C/benchmark_config.yaml.template
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-model_name: "imagenet"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "model_url_path"
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-runtime_device: "cpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleClas/HRNet_W18_C/benchmark_gpu.sh
+++ b/python/examples/pipeline/PaddleClas/HRNet_W18_C/benchmark_gpu.sh
-export FLAGS_profile_pipeline=1
-alias python3="python3.7"
-modelname="imagenet"
-use_gpu=1
-gpu_id="0"
-benchmark_config_filename="benchmark_config.yaml"
-# HTTP
-ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
-sleep 3
-if [ $use_gpu -eq 1 ]; then
-  python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
-else
-  python3 benchmark.py yaml local_predictor 1 cpu
-fi
-rm -rf profile_log_$modelname
-for thread_num in 1
-do
-  for batch_size in 1
-  do
-    echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
-    rm -rf PipelineServingLogs
-    rm -rf cpu_utilization.py
-    python3 resnet50_web_service.py >web.log 2>&1 &
-    sleep 3
-    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
-    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
-    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
-    python3 benchmark.py run http $thread_num $batch_size
-    python3 cpu_utilization.py >>profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
-    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
-    ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
-    python3 benchmark.py dump benchmark.log benchmark.tmp
-    mv benchmark.tmp benchmark.log
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
-    cat benchmark.log >> profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
-    #rm -rf gpu_use.log gpu_utilization.log
-  done
-done
--- a/python/examples/pipeline/PaddleClas/MobileNetV1/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleClas/MobileNetV1/benchmark_config.yaml
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-model_name: "MobileNetV1"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar"
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-runtime_device: "gpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleClas/MobileNetV1/benchmark_config.yaml.template
+++ b/python/examples/pipeline/PaddleClas/MobileNetV1/benchmark_config.yaml.template
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-model_name: "imagenet"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "model_url_path"
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-runtime_device: "cpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleClas/MobileNetV1/benchmark_gpu.sh
+++ b/python/examples/pipeline/PaddleClas/MobileNetV1/benchmark_gpu.sh
-export FLAGS_profile_pipeline=1
-alias python3="python3.7"
-modelname="imagenet"
-use_gpu=1
-gpu_id="0"
-benchmark_config_filename="benchmark_config.yaml"
-# HTTP
-ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
-sleep 3
-if [ $use_gpu -eq 1 ]; then
-  python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
-else
-  python3 benchmark.py yaml local_predictor 1 cpu
-fi
-rm -rf profile_log_$modelname
-for thread_num in 1
-do
-  for batch_size in 1
-  do
-    echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
-    rm -rf PipelineServingLogs
-    rm -rf cpu_utilization.py
-    python3 resnet50_web_service.py >web.log 2>&1 &
-    sleep 3
-    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
-    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
-    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
-    python3 benchmark.py run http $thread_num $batch_size
-    python3 cpu_utilization.py >>profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
-    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
-    ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
-    python3 benchmark.py dump benchmark.log benchmark.tmp
-    mv benchmark.tmp benchmark.log
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
-    cat benchmark.log >> profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
-    #rm -rf gpu_use.log gpu_utilization.log
-  done
-done
--- a/python/examples/pipeline/PaddleClas/MobileNetV2/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleClas/MobileNetV2/benchmark_config.yaml
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-model_name: "MobileNetV2"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar"
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-runtime_device: "gpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleClas/MobileNetV2/benchmark_config.yaml.template
+++ b/python/examples/pipeline/PaddleClas/MobileNetV2/benchmark_config.yaml.template
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-model_name: "imagenet"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "model_url_path"
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-runtime_device: "cpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleClas/MobileNetV2/benchmark_gpu.sh
+++ b/python/examples/pipeline/PaddleClas/MobileNetV2/benchmark_gpu.sh
-export FLAGS_profile_pipeline=1
-alias python3="python3.7"
-modelname="imagenet"
-use_gpu=1
-gpu_id="0"
-benchmark_config_filename="benchmark_config.yaml"
-# HTTP
-ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
-sleep 3
-if [ $use_gpu -eq 1 ]; then
-  python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
-else
-  python3 benchmark.py yaml local_predictor 1 cpu
-fi
-rm -rf profile_log_$modelname
-for thread_num in 1
-do
-  for batch_size in 1
-  do
-    echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
-    rm -rf PipelineServingLogs
-    rm -rf cpu_utilization.py
-    python3 resnet50_web_service.py >web.log 2>&1 &
-    sleep 3
-    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
-    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
-    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
-    python3 benchmark.py run http $thread_num $batch_size
-    python3 cpu_utilization.py >>profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
-    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
-     ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
-    python3 benchmark.py dump benchmark.log benchmark.tmp
-    mv benchmark.tmp benchmark.log
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
-    cat benchmark.log >> profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
-    #rm -rf gpu_use.log gpu_utilization.log
-  done
-done
--- a/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/benchmark_config.yaml
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-model_name: "MobileNetV3_large_x1_0"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_large_x1_0_pretrained.tar"
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-runtime_device: "gpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/benchmark_config.yaml.template
+++ b/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/benchmark_config.yaml.template
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-model_name: "imagenet"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "model_url_path"
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-runtime_device: "cpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/benchmark_gpu.sh
+++ b/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/benchmark_gpu.sh
-export FLAGS_profile_pipeline=1
-alias python3="python3.7"
-modelname="imagenet"
-use_gpu=1
-gpu_id="0"
-benchmark_config_filename="benchmark_config.yaml"
-# HTTP
-ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
-sleep 3
-if [ $use_gpu -eq 1 ]; then
-  python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
-else
-  python3 benchmark.py yaml local_predictor 1 cpu
-fi
-rm -rf profile_log_$modelname
-for thread_num in 1
-do
-  for batch_size in 1
-  do
-    echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
-    rm -rf PipelineServingLogs
-    rm -rf cpu_utilization.py
-    python3 resnet50_web_service.py >web.log 2>&1 &
-    sleep 3
-    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
-    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
-    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
-    python3 benchmark.py run http $thread_num $batch_size
-    python3 cpu_utilization.py >>profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
-    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
-    python3 benchmark.py dump benchmark.log benchmark.tmp
-    mv benchmark.tmp benchmark.log
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
-    cat benchmark.log >> profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
-    #rm -rf gpu_use.log gpu_utilization.log
-  done
-done
--- a/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/benchmark_config.yaml
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-model_name: "ResNeXt101_vd_64x4d"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar"
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-runtime_device: "gpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/benchmark_config.yaml.template
+++ b/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/benchmark_config.yaml.template
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-model_name: "imagenet"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "model_url_path"
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-runtime_device: "cpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/benchmark_gpu.sh
+++ b/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/benchmark_gpu.sh
-export FLAGS_profile_pipeline=1
-alias python3="python3.7"
-modelname="imagenet"
-use_gpu=1
-gpu_id="0"
-benchmark_config_filename="benchmark_config.yaml"
-# HTTP
-ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
-sleep 3
-if [ $use_gpu -eq 1 ]; then
-  python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
-else
-  python3 benchmark.py yaml local_predictor 1 cpu
-fi
-rm -rf profile_log_$modelname
-for thread_num in 1
-do
-  for batch_size in 1
-  do
-    echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
-    rm -rf PipelineServingLogs
-    rm -rf cpu_utilization.py
-    python3 resnet50_web_service.py >web.log 2>&1 &
-    sleep 3
-    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
-    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
-    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
-    python3 benchmark.py run http $thread_num $batch_size
-    python3 cpu_utilization.py >>profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
-    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
-    ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
-    python3 benchmark.py dump benchmark.log benchmark.tmp
-    mv benchmark.tmp benchmark.log
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
-    cat benchmark.log >> profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
-    #rm -rf gpu_use.log gpu_utilization.log
-  done
-done
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd/benchmark_config.yaml
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-model_name: "ResNet50_vd"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar"
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-runtime_device: "gpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd/benchmark_config.yaml.template
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd/benchmark_config.yaml.template
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-model_name: "imagenet"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "model_url_path"
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-runtime_device: "cpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd/benchmark_gpu.sh
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd/benchmark_gpu.sh
-export FLAGS_profile_pipeline=1
-alias python3="python3.7"
-modelname="imagenet"
-use_gpu=1
-gpu_id="0"
-benchmark_config_filename="benchmark_config.yaml"
-# HTTP
-ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
-sleep 3
-if [ $use_gpu -eq 1 ]; then
-  python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
-else
-  python3 benchmark.py yaml local_predictor 1 cpu
-fi
-rm -rf profile_log_$modelname
-for thread_num in 1
-do
-  for batch_size in 1
-  do
-    echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
-    rm -rf PipelineServingLogs
-    rm -rf cpu_utilization.py
-    python3 resnet50_web_service.py >web.log 2>&1 &
-    sleep 3
-    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
-    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
-    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
-    python3 benchmark.py run http $thread_num $batch_size
-    python3 cpu_utilization.py >>profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
-    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
-    ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
-    python3 benchmark.py dump benchmark.log benchmark.tmp
-    mv benchmark.tmp benchmark.log
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
-    cat benchmark.log >> profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
-    #rm -rf gpu_use.log gpu_utilization.log
-  done
-done
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/benchmark_config.yaml
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-model_name: "ResNet50_vd_FPGM"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_FPGM_pretrained.tar"
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-runtime_device: "gpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/benchmark_config.yaml.template
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/benchmark_config.yaml.template
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-model_name: "imagenet"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "model_url_path"
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-runtime_device: "cpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/benchmark_gpu.sh
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/benchmark_gpu.sh
-export FLAGS_profile_pipeline=1
-alias python3="python3.7"
-modelname="imagenet"
-use_gpu=1
-gpu_id="0"
-benchmark_config_filename="benchmark_config.yaml"
-# HTTP
-ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
-sleep 3
-if [ $use_gpu -eq 1 ]; then
-  python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
-else
-  python3 benchmark.py yaml local_predictor 1 cpu
-fi
-rm -rf profile_log_$modelname
-for thread_num in 1
-do
-  for batch_size in 1
-  do
-    echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
-    rm -rf PipelineServingLogs
-    rm -rf cpu_utilization.py
-    python3 resnet50_web_service.py >web.log 2>&1 &
-    sleep 3
-    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
-    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
-    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
-    python3 benchmark.py run http $thread_num $batch_size
-    python3 cpu_utilization.py >>profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
-    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
-    ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
-    python3 benchmark.py dump benchmark.log benchmark.tmp
-    mv benchmark.tmp benchmark.log
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
-    cat benchmark.log >> profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
-    #rm -rf gpu_use.log gpu_utilization.log
-  done
-done
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/benchmark_config.yaml
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-model_name: "ResNet50_vd_KL"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_KL_pretrained.tar"
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-runtime_device: "gpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/benchmark_config.yaml.template
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/benchmark_config.yaml.template
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-model_name: "imagenet"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "model_url_path"
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-runtime_device: "cpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/benchmark_gpu.sh
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/benchmark_gpu.sh
-export FLAGS_profile_pipeline=1
-alias python3="python3.7"
-modelname="imagenet"
-use_gpu=1
-gpu_id="0"
-benchmark_config_filename="benchmark_config.yaml"
-# HTTP
-ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
-sleep 3
-if [ $use_gpu -eq 1 ]; then
-  python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
-else
-  python3 benchmark.py yaml local_predictor 1 cpu
-fi
-rm -rf profile_log_$modelname
-for thread_num in 1
-do
-  for batch_size in 1
-  do
-    echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
-    rm -rf PipelineServingLogs
-    rm -rf cpu_utilization.py
-    python3 resnet50_web_service.py >web.log 2>&1 &
-    sleep 3
-    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
-    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
-    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
-    python3 benchmark.py run http $thread_num $batch_size
-    python3 cpu_utilization.py >>profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
-    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
-    ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
-    python3 benchmark.py dump benchmark.log benchmark.tmp
-    mv benchmark.tmp benchmark.log
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
-    cat benchmark.log >> profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
-    #rm -rf gpu_use.log gpu_utilization.log
-  done
-done
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/benchmark_config.yaml
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-model_name: "ResNet50_vd_PACT"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_PACT_pretrained.tar"
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-runtime_device: "gpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/benchmark_config.yaml.template
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/benchmark_config.yaml.template
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-model_name: "imagenet"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "model_url_path"
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-runtime_device: "cpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/benchmark_gpu.sh
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/benchmark_gpu.sh
-export FLAGS_profile_pipeline=1
-alias python3="python3.7"
-modelname="imagenet"
-use_gpu=1
-gpu_id="0"
-benchmark_config_filename="benchmark_config.yaml"
-# HTTP
-ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
-sleep 3
-if [ $use_gpu -eq 1 ]; then
-  python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
-else
-  python3 benchmark.py yaml local_predictor 1 cpu
-fi
-rm -rf profile_log_$modelname
-for thread_num in 1
-do
-  for batch_size in 1
-  do
-    echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
-    rm -rf PipelineServingLogs
-    rm -rf cpu_utilization.py
-    python3 resnet50_web_service.py >web.log 2>&1 &
-    sleep 3
-    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
-    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
-    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
-    python3 benchmark.py run http $thread_num $batch_size
-    python3 cpu_utilization.py >>profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
-    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
-    ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
-    python3 benchmark.py dump benchmark.log benchmark.tmp
-    mv benchmark.tmp benchmark.log
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
-    cat benchmark.log >> profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
-    #rm -rf gpu_use.log gpu_utilization.log
-  done
-done
--- a/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/benchmark_config.yaml
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-model_name: "ShuffleNetV2_x1_0"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x1_0_pretrained.tar"
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-runtime_device: "gpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/benchmark_config.yaml.template
+++ b/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/benchmark_config.yaml.template
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-model_name: "imagenet"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "model_url_path"
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-runtime_device: "cpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/benchmark_gpu.sh
+++ b/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/benchmark_gpu.sh
-export FLAGS_profile_pipeline=1
-alias python3="python3.7"
-modelname="imagenet"
-use_gpu=1
-gpu_id="0"
-benchmark_config_filename="benchmark_config.yaml"
-# HTTP
-ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
-sleep 3
-if [ $use_gpu -eq 1 ]; then
-  python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
-else
-  python3 benchmark.py yaml local_predictor 1 cpu
-fi
-rm -rf profile_log_$modelname
-for thread_num in 1
-do
-  for batch_size in 1
-  do
-    echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
-    rm -rf PipelineServingLogs
-    rm -rf cpu_utilization.py
-    python3 resnet50_web_service.py >web.log 2>&1 &
-    sleep 3
-    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
-    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
-    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
-    python3 benchmark.py run http $thread_num $batch_size
-    python3 cpu_utilization.py >>profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
-    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
-    ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
-    python3 benchmark.py dump benchmark.log benchmark.tmp
-    mv benchmark.tmp benchmark.log
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
-    cat benchmark.log >> profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
-    #rm -rf gpu_use.log gpu_utilization.log
-  done
-done
--- a/python/paddle_serving_app/reader/image_reader.py
+++ b/python/paddle_serving_app/reader/image_reader.py
@@ -142,10 +142,10 @@ class DBPostProcess(object):
    def box_score_fast(self, bitmap, _box):
        h, w = bitmap.shape[:2]
        box = _box.copy()
-        xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
+        xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int32), 0, w - 1)
-        xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1)
+        xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int32), 0, w - 1)
-        ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1)
+        ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int32), 0, h - 1)
-        ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1)
+        ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int32), 0, h - 1)
        mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
        box[:, 0] = box[:, 0] - xmin

--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -12,6 +12,7 @@ flask>=1.1.2
 click==7.1.2
 itsdangerous==1.1.0
 Jinja2==2.11.3
+pyclipper==1.2.1
 MarkupSafe==1.1.1
 Werkzeug==1.0.1
 ujson>=2.0.3

--- a/tools/dockerfiles/build_scripts/install_whl.sh
+++ b/tools/dockerfiles/build_scripts/install_whl.sh
@@ -24,41 +24,52 @@ app_release="paddle-serving-app==0.3.1"
 if [[ $PYTHON_VERSION == "3.6" ]];then
    CPYTHON="36"
+    CPYTHON_PADDLE="36m"
 elif [[ $PYTHON_VERSION == "3.7" ]];then
    CPYTHON="37"
+    CPYTHON_PADDLE="37m"
 elif [[ $PYTHON_VERSION == "3.8" ]];then
    CPYTHON="38"
+    CPYTHON_PADDLE="38"
 fi
 if [[ $SERVING_VERSION == "0.5.0" ]]; then
    if [[ "$RUN_ENV" == "cpu" ]];then
        server_release="paddle-serving-server==$SERVING_VERSION"
-        serving_bin="https://paddle-serving.bj.bcebos.com/bin/serving-cpu-noavx-openblas-${SERVING_VERSION}.tar.gz"
+        serving_bin="https://paddle-serving.bj.bcebos.com/bin/serving-cpu-avx-mkl-${SERVING_VERSION}.tar.gz"
+        paddle_whl="https://paddle-wheel.bj.bcebos.com/$PADDLE_VERSION-cpu-avx-mkl/paddlepaddle-$PADDLE_VERSION-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl"
    elif [[ "$RUN_ENV" == "cuda10.1" ]];then
        server_release="paddle-serving-server-gpu==$SERVING_VERSION.post101"
        serving_bin="https://paddle-serving.bj.bcebos.com/bin/serving-gpu-101-${SERVING_VERSION}.tar.gz"
+        paddle_whl="https://paddle-wheel.bj.bcebos.com/with-trt/$PADDLE_VERSION-gpu-cuda10.1-cudnn7-mkl-gcc8.2/paddlepaddle_gpu-$PADDLE_VERSION.post101-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl"
    elif [[ "$RUN_ENV" == "cuda10.2" ]];then
        server_release="paddle-serving-server-gpu==$SERVING_VERSION.post102"
        serving_bin="https://paddle-serving.bj.bcebos.com/bin/serving-gpu-102-${SERVING_VERSION}.tar.gz"
+        paddle_whl="https://paddle-wheel.bj.bcebos.com/with-trt/$PADDLE_VERSION-gpu-cuda10.2-cudnn8-mkl-gcc8.2/paddlepaddle_gpu-$PADDLE_VERSION-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl"
    elif [[ "$RUN_ENV" == "cuda11" ]];then
        server_release="paddle-serving-server-gpu==$SERVING_VERSION.post11"
        serving_bin="https://paddle-serving.bj.bcebos.com/bin/serving-gpu-cuda11-${SERVING_VERSION}.tar.gz"
+        paddle_whl="https://paddle-wheel.bj.bcebos.com/with-trt/$PADDLE_VERSION-gpu-cuda11.0-cudnn8-mkl-gcc8.2/paddlepaddle_gpu-$PADDLE_VERSION.post110-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl"
    fi
    client_release="paddle-serving-client==$SERVING_VERSION"
    app_release="paddle-serving-app==0.3.1"
 elif [[ $SERVING_VERSION == "0.6.0" ]]; then 
    if [[ "$RUN_ENV" == "cpu" ]];then
        server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server-$SERVING_VERSION-py3-none-any.whl"
-        serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-cpu-noavx-openblas-$SERVING_VERSION.tar.gz"
+        serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-cpu-avx-mkl-$SERVING_VERSION.tar.gz"
+        paddle_whl="https://paddle-wheel.bj.bcebos.com/$PADDLE_VERSION-cpu-avx-mkl/paddlepaddle-$PADDLE_VERSION-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl"
    elif [[ "$RUN_ENV" == "cuda10.1" ]];then
        server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-$SERVING_VERSION.post101-py3-none-any.whl"
        serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-101-$SERVING_VERSION.tar.gz"
+        paddle_whl="https://paddle-wheel.bj.bcebos.com/with-trt/$PADDLE_VERSION-gpu-cuda10.1-cudnn7-mkl-gcc8.2/paddlepaddle_gpu-$PADDLE_VERSION.post101-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl"
    elif [[ "$RUN_ENV" == "cuda10.2" ]];then
        server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-$SERVING_VERSION.post102-py3-none-any.whl"
        serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-102-$SERVING_VERSION.tar.gz"
+        paddle_whl="https://paddle-wheel.bj.bcebos.com/with-trt/$PADDLE_VERSION-gpu-cuda10.2-cudnn8-mkl-gcc8.2/paddlepaddle_gpu-$PADDLE_VERSION-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl"
    elif [[ "$RUN_ENV" == "cuda11" ]];then
        server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-$SERVING_VERSION.post11-py3-none-any.whl"
        serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-cuda11-$SERVING_VERSION.tar.gz"
+        paddle_whl="https://paddle-wheel.bj.bcebos.com/with-trt/$PADDLE_VERSION-gpu-cuda11.0-cudnn8-mkl-gcc8.2/paddlepaddle_gpu-$PADDLE_VERSION.post110-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl"
    fi
    client_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-$SERVING_VERSION-cp$CPYTHON-none-any.whl"
    app_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_app-$SERVING_VERSION-py3-none-any.whl"
@@ -66,7 +77,7 @@ fi
 if [[ "$RUN_ENV" == "cpu" ]];then
    python$PYTHON_VERSION -m pip install $client_release $app_release $server_release
-    python$PYTHON_VERSION -m pip install paddlepaddle==${PADDLE_VERSION}
+    python$PYTHON_VERSION -m pip install $paddle_whl
    cd /usr/local/
    wget $serving_bin 
    tar xf serving-cpu-noavx-openblas-${SERVING_VERSION}.tar.gz
@@ -76,7 +87,7 @@ if [[ "$RUN_ENV" == "cpu" ]];then
    cd -
 elif [[ "$RUN_ENV" == "cuda10.1" ]];then
    python$PYTHON_VERSION -m pip install $client_release $app_release $server_release
-    python$PYTHON_VERSION -m pip install paddlepaddle-gpu==${PADDLE_VERSION}
+    python$PYTHON_VERSION -m pip install $paddle_whl
    cd /usr/local/
    wget $serving_bin
    tar xf serving-gpu-101-${SERVING_VERSION}.tar.gz
@@ -86,7 +97,7 @@ elif [[ "$RUN_ENV" == "cuda10.1" ]];then
    cd -
 elif [[ "$RUN_ENV" == "cuda10.2" ]];then
    python$PYTHON_VERSION -m pip install $client_release $app_release $server_release
-    python$PYTHON_VERSION -m pip install paddlepaddle-gpu==${PADDLE_VERSION}
+    python$PYTHON_VERSION -m pip install $paddle_whl
    cd /usr/local/
    wget $serving_bin
    tar xf serving-gpu-102-${SERVING_VERSION}.tar.gz
@@ -96,7 +107,7 @@ elif [[ "$RUN_ENV" == "cuda10.2" ]];then
    cd -
 elif [[ "$RUN_ENV" == "cuda11" ]];then
    python$PYTHON_VERSION -m pip install $client_release $app_release $server_release
-    python$PYTHON_VERSION -m pip install paddlepaddle-gpu==${PADDLE_VERSION}
+    python$PYTHON_VERSION -m pip install $paddle_whl
    cd /usr/local/
    wget $serving_bin
    tar xf serving-gpu-cuda11-${SERVING_VERSION}.tar.gz

--- a/tools/dockerfiles/build_scripts/soft_link.sh
+++ b/tools/dockerfiles/build_scripts/soft_link.sh
@@ -6,14 +6,14 @@ if [[ "$RUN_ENV" == "cuda10.1" ]];then
    ln -sf /usr/lib/x86_64-linux-gnu/libcublas.so.10 /usr/lib/libcublas.so && \
    ln -sf /usr/lib/x86_64-linux-gnu/libcudnn.so.7 /usr/lib/libcudnn.so
-elif [[ "$RUN_ENV" == "cuda10.1" ]];then
+elif [[ "$RUN_ENV" == "cuda10.2" ]];then
    ln -sf /usr/local/cuda-10.2/targets/x86_64-linux/lib/libcudart.so.10.2 /usr/lib/libcudart.so && \
    ln -sf /usr/local/cuda-10.2/targets/x86_64-linux/lib/libcusolver.so.10 /usr/lib/libcusolver.so && \
    ln -sf /usr/lib/x86_64-linux-gnu/libcuda.so /usr/lib/libcuda.so && \
    ln -sf /usr/lib/x86_64-linux-gnu/libcublas.so.10 /usr/lib/libcublas.so && \
    ln -sf /usr/lib/x86_64-linux-gnu/libcudnn.so.8 /usr/lib/libcudnn.so
-elif [[ "$RUN_ENV" == "cuda10.1" ]];then
+elif [[ "$RUN_ENV" == "cuda11" ]];then
    ln -sf /usr/local/cuda-11.0/targets/x86_64-linux/lib/libcudart.so.11.0 /usr/lib/libcudart.so && \
    ln -sf /usr/local/cuda-11.0/targets/x86_64-linux/lib/libcusolver.so.10 /usr/lib/libcusolver.so && \
    ln -sf /usr/lib/x86_64-linux-gnu/libcuda.so /usr/lib/libcuda.so && \

--- a/tools/generate_runtime_docker.sh
+++ b/tools/generate_runtime_docker.sh
@@ -9,8 +9,8 @@ function usage
    echo "   ";
    echo "   --env                 : running env, cpu/cuda10.1/cuda10.2/cuda11";
    echo "   --python              : python version, 3.6/3.7/3.8 ";
-    echo "   --serving             : serving version(0.5.0)";
+    echo "   --serving             : serving version(0.6.0)";
-    echo "   --paddle              : paddle version(2.0.1)"
+    echo "   --paddle              : paddle version(2.1.0)"
    echo "   --image_name          : image name(default serving_runtime:env-python)"
    echo "  -h | --help            : helper";
 }