diff --git a/CMakeLists.txt b/CMakeLists.txt
index af065158699199af61aca02f563dda1b1cddf2b1..7c497e3e048c4dd8d5c1291286de2ab9d218b914 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -85,6 +85,17 @@ include(generic)
include(flags)
endif()
+if (APP)
+include(external/zlib)
+include(external/boost)
+include(external/protobuf)
+include(external/gflags)
+include(external/glog)
+include(external/pybind11)
+include(external/python)
+include(generic)
+endif()
+
if (SERVER)
include(external/cudnn)
include(paddlepaddle)
diff --git a/README.md b/README.md
index 747c140ded49f279c289b0bc8a3b4b1963243040..1818ddd61cc5423c4a590815930d007303f18e81 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,12 @@
+([简体中文](./README_CN.md)|English)
+
+
@@ -23,28 +26,20 @@ We consider deploying deep learning inference service online to be a user-facing
-Some Key Features
-
-- Integrate with Paddle training pipeline seamlessly, most paddle models can be deployed **with one line command**.
-- **Industrial serving features** supported, such as models management, online loading, online A/B testing etc.
-- **Distributed Key-Value indexing** supported which is especially useful for large scale sparse features as model inputs.
-- **Highly concurrent and efficient communication** between clients and servers supported.
-- **Multiple programming languages** supported on client side, such as Golang, C++ and python.
-- **Extensible framework design** which can support model serving beyond Paddle.
Installation
We **highly recommend** you to **run Paddle Serving in Docker**, please visit [Run in Docker](https://github.com/PaddlePaddle/Serving/blob/develop/doc/RUN_IN_DOCKER.md)
```
# Run CPU Docker
-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0
-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker pull hub.baidubce.com/paddlepaddle/serving:latest
+docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest
docker exec -it test bash
```
```
# Run GPU Docker
-nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
-nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
+nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:latest-gpu
+nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest-gpu
nvidia-docker exec -it test bash
```
@@ -56,10 +51,44 @@ pip install paddle-serving-server-gpu # GPU
You may need to use a domestic mirror source (in China, you can use the Tsinghua mirror source, add `-i https://pypi.tuna.tsinghua.edu.cn/simple` to pip command) to speed up the download.
+If you need install modules compiled with develop branch, please download packages from [latest packages list](./doc/LATEST_PACKAGES.md) and install with `pip install` command.
+
Client package support Centos 7 and Ubuntu 18, or you can use HTTP service without install client.
+
+ Pre-built services with Paddle Serving
+
+Chinese Word Segmentation
+
+``` shell
+> python -m paddle_serving_app.package -get_model lac
+> tar -xzf lac.tar.gz
+> python lac_web_service.py 9292 &
+> curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "我爱北京天安门"}], "fetch":["word_seg"]}' http://127.0.0.1:9393/lac/prediction
+{"result":[{"word_seg":"我|爱|北京|天安门"}]}
+```
+
+Image Classification
+
+
+
+
+
+
+
+``` shell
+> python -m paddle_serving_app.package -get_model resnet_v2_50_imagenet
+> tar -xzf resnet_v2_50_imagenet.tar.gz
+> python resnet50_imagenet_classify.py resnet50_serving_model &
+> curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"image": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://127.0.0.1:9292/image/prediction
+{"result":{"label":["daisy"],"prob":[0.9341403245925903]}}
+```
+
+
Quick Start Example
+This quick start example is only for users who already have a model to deploy and we prepare a ready-to-deploy model here. If you want to know how to use paddle serving from offline training to online serving, please reference to [Train_To_Service](https://github.com/PaddlePaddle/Serving/blob/develop/doc/TRAIN_TO_SERVICE.md)
+
### Boston House Price Prediction model
``` shell
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing.tar.gz
@@ -82,7 +111,9 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
| `port` | int | `9292` | Exposed port of current service to users|
| `name` | str | `""` | Service name, can be used to generate HTTP request url |
| `model` | str | `""` | Path of paddle model directory to be served |
-| `mem_optim` | bool | `False` | Enable memory optimization |
+| `mem_optim` | bool | `False` | Enable memory / graphic memory optimization |
+| `ir_optim` | bool | `False` | Enable analysis and optimization of calculation graph |
+| `use_mkl` (Only for cpu version) | bool | `False` | Run inference with MKL |
Here, we use `curl` to send a HTTP POST request to the service we just started. Users can use any python library to send HTTP POST as well, e.g, [requests](https://requests.readthedocs.io/en/master/).
@@ -113,138 +144,13 @@ print(fetch_map)
```
Here, `client.predict` function has two arguments. `feed` is a `python dict` with model input variable alias name and values. `fetch` assigns the prediction variables to be returned from servers. In the example, the name of `"x"` and `"price"` are assigned when the servable model is saved during training.
- Pre-built services with Paddle Serving
-
-Chinese Word Segmentation
-
-- **Description**:
-``` shell
-Chinese word segmentation HTTP service that can be deployed with one line command.
-```
-
-- **Download Servable Package**:
-``` shell
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/lac/lac_model_jieba_web.tar.gz
-```
-- **Host web service**:
-``` shell
-tar -xzf lac_model_jieba_web.tar.gz
-python lac_web_service.py jieba_server_model/ lac_workdir 9292
-```
-- **Request sample**:
-``` shell
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "我爱北京天安门"}], "fetch":["word_seg"]}' http://127.0.0.1:9292/lac/prediction
-```
-- **Request result**:
-``` shell
-{"word_seg":"我|爱|北京|天安门"}
-```
-
-Image Classification
-
-- **Description**:
-``` shell
-Image classification trained with Imagenet dataset. A label and corresponding probability will be returned.
-Note: This demo needs paddle-serving-server-gpu.
-```
-
-- **Download Servable Package**:
-``` shell
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imagenet-example/imagenet_demo.tar.gz
-```
-- **Host web service**:
-``` shell
-tar -xzf imagenet_demo.tar.gz
-python image_classification_service_demo.py resnet50_serving_model
-```
-- **Request sample**:
-
-
-
-
-
-
-
-``` shell
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"url": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://127.0.0.1:9292/image/prediction
-```
-- **Request result**:
-``` shell
-{"label":"daisy","prob":0.9341403245925903}
-```
-
-
More Demos
-
-| Key | Value |
-| :----------------- | :----------------------------------------------------------- |
-| Model Name | Bert-Base-Baike |
-| URL | [https://paddle-serving.bj.bcebos.com/bert_example/bert_seq128.tar.gz](https://paddle-serving.bj.bcebos.com/bert_example%2Fbert_seq128.tar.gz) |
-| Client/Server Code | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/bert |
-| Description | Get semantic representation from a Chinese Sentence |
-
-
-
-| Key | Value |
-| :----------------- | :----------------------------------------------------------- |
-| Model Name | Resnet50-Imagenet |
-| URL | [https://paddle-serving.bj.bcebos.com/imagenet-example/ResNet50_vd.tar.gz](https://paddle-serving.bj.bcebos.com/imagenet-example%2FResNet50_vd.tar.gz) |
-| Client/Server Code | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imagenet |
-| Description | Get image semantic representation from an image |
-
-
-
-| Key | Value |
-| :----------------- | :----------------------------------------------------------- |
-| Model Name | Resnet101-Imagenet |
-| URL | https://paddle-serving.bj.bcebos.com/imagenet-example/ResNet101_vd.tar.gz |
-| Client/Server Code | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imagenet |
-| Description | Get image semantic representation from an image |
-
-
-
-| Key | Value |
-| :----------------- | :----------------------------------------------------------- |
-| Model Name | CNN-IMDB |
-| URL | https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz |
-| Client/Server Code | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imdb |
-| Description | Get category probability from an English Sentence |
-
-
-
-| Key | Value |
-| :----------------- | :----------------------------------------------------------- |
-| Model Name | LSTM-IMDB |
-| URL | https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz |
-| Client/Server Code | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imdb |
-| Description | Get category probability from an English Sentence |
-
-
-
-| Key | Value |
-| :----------------- | :----------------------------------------------------------- |
-| Model Name | BOW-IMDB |
-| URL | https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz |
-| Client/Server Code | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imdb |
-| Description | Get category probability from an English Sentence |
-
-
-
-| Key | Value |
-| :----------------- | :----------------------------------------------------------- |
-| Model Name | Jieba-LAC |
-| URL | https://paddle-serving.bj.bcebos.com/lac/lac_model.tar.gz |
-| Client/Server Code | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/lac |
-| Description | Get word segmentation from a Chinese Sentence |
-
-
-
-| Key | Value |
-| :----------------- | :----------------------------------------------------------- |
-| Model Name | DNN-CTR |
-| URL | https://paddle-serving.bj.bcebos.com/criteo_ctr_example/criteo_ctr_demo_model.tar.gz |
-| Client/Server Code | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/criteo_ctr |
-| Description | Get click probability from a feature vector of item |
+Some Key Features of Paddle Serving
+- Integrate with Paddle training pipeline seamlessly, most paddle models can be deployed **with one line command**.
+- **Industrial serving features** supported, such as models management, online loading, online A/B testing etc.
+- **Distributed Key-Value indexing** supported which is especially useful for large scale sparse features as model inputs.
+- **Highly concurrent and efficient communication** between clients and servers supported.
+- **Multiple programming languages** supported on client side, such as Golang, C++ and python.
Document
@@ -259,11 +165,13 @@ curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"url": "https://pa
- [How to develop a new Web Service?](doc/NEW_WEB_SERVICE.md)
- [Golang client](doc/IMDB_GO_CLIENT.md)
- [Compile from source code](doc/COMPILE.md)
+- [Deploy Web Service with uWSGI](doc/UWSGI_DEPLOY.md)
+- [Hot loading for model file](doc/HOT_LOADING_IN_SERVING.md)
### About Efficiency
- [How to profile Paddle Serving latency?](python/examples/util)
-- [How to optimize performance?(Chinese)](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
-- [Deploy multi-services on one GPU(Chinese)](doc/PERFORMANCE_OPTIM_CN.md)
+- [How to optimize performance?(Chinese)](doc/PERFORMANCE_OPTIM_CN.md)
+- [Deploy multi-services on one GPU(Chinese)](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
- [CPU Benchmarks(Chinese)](doc/BENCHMARKING.md)
- [GPU Benchmarks(Chinese)](doc/GPU_BENCHMARKING.md)
diff --git a/README_CN.md b/README_CN.md
index 266fca330d7597d6188fa0022e6376bc23149c74..29cf095248f4c125b3dba7146e67efe8b7abae6c 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -1,9 +1,12 @@
+(简体中文|[English](./README.md))
+
+
@@ -24,14 +27,7 @@ Paddle Serving 旨在帮助深度学习开发者轻易部署在线预测服务
-核心功能
-- 与Paddle训练紧密连接,绝大部分Paddle模型可以 **一键部署**.
-- 支持 **工业级的服务能力** 例如模型管理,在线加载,在线A/B测试等.
-- 支持 **分布式键值对索引** 助力于大规模稀疏特征作为模型输入.
-- 支持客户端和服务端之间 **高并发和高效通信**.
-- 支持 **多种编程语言** 开发客户端,例如Golang,C++和Python.
-- **可伸缩框架设计** 可支持不限于Paddle的模型服务.
安装
@@ -39,14 +35,14 @@ Paddle Serving 旨在帮助深度学习开发者轻易部署在线预测服务
```
# 启动 CPU Docker
-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0
-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker pull hub.baidubce.com/paddlepaddle/serving:latest
+docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest
docker exec -it test bash
```
```
# 启动 GPU Docker
-nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
-nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
+nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:latest-gpu
+nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest-gpu
nvidia-docker exec -it test bash
```
```shell
@@ -57,9 +53,42 @@ pip install paddle-serving-server-gpu # GPU
您可能需要使用国内镜像源(例如清华源, 在pip命令中添加`-i https://pypi.tuna.tsinghua.edu.cn/simple`)来加速下载。
+如果需要使用develop分支编译的安装包,请从[最新安装包列表](./doc/LATEST_PACKAGES.md)中获取下载地址进行下载,使用`pip install`命令进行安装。
+
客户端安装包支持Centos 7和Ubuntu 18,或者您可以使用HTTP服务,这种情况下不需要安装客户端。
-快速启动示例
+ Paddle Serving预装的服务
+
+中文分词
+
+``` shell
+> python -m paddle_serving_app.package -get_model lac
+> tar -xzf lac.tar.gz
+> python lac_web_service.py 9292 &
+> curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "我爱北京天安门"}], "fetch":["word_seg"]}' http://127.0.0.1:9393/lac/prediction
+{"result":[{"word_seg":"我|爱|北京|天安门"}]}
+```
+
+图像分类
+
+
+
+
+
+
+
+``` shell
+> python -m paddle_serving_app.package -get_model resnet_v2_50_imagenet
+> tar -xzf resnet_v2_50_imagenet.tar.gz
+> python resnet50_imagenet_classify.py resnet50_serving_model &
+> curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"image": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://127.0.0.1:9292/image/prediction
+{"result":{"label":["daisy"],"prob":[0.9341403245925903]}}
+```
+
+
+
快速开始示例
+
+这个快速开始示例主要是为了给那些已经有一个要部署的模型的用户准备的,而且我们也提供了一个可以用来部署的模型。如果您想知道如何从离线训练到在线服务走完全流程,请参考[从训练到部署](https://github.com/PaddlePaddle/Serving/blob/develop/doc/TRAIN_TO_SERVICE_CN.md)
波士顿房价预测
@@ -87,6 +116,8 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
| `name` | str | `""` | Service name, can be used to generate HTTP request url |
| `model` | str | `""` | Path of paddle model directory to be served |
| `mem_optim` | bool | `False` | Enable memory optimization |
+| `ir_optim` | bool | `False` | Enable analysis and optimization of calculation graph |
+| `use_mkl` (Only for cpu version) | bool | `False` | Run inference with MKL |
我们使用 `curl` 命令来发送HTTP POST请求给刚刚启动的服务。用户也可以调用python库来发送HTTP POST请求,请参考英文文档 [requests](https://requests.readthedocs.io/en/master/)。
@@ -118,139 +149,13 @@ print(fetch_map)
```
在这里,`client.predict`函数具有两个参数。 `feed`是带有模型输入变量别名和值的`python dict`。 `fetch`被要从服务器返回的预测变量赋值。 在该示例中,在训练过程中保存可服务模型时,被赋值的tensor名为`"x"`和`"price"`。
-Paddle Serving预装的服务
-
-中文分词模型
-
-- **介绍**:
-``` shell
-本示例为中文分词HTTP服务一键部署
-```
-
-- **下载服务包**:
-``` shell
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/lac/lac_model_jieba_web.tar.gz
-```
-- **启动web服务**:
-``` shell
-tar -xzf lac_model_jieba_web.tar.gz
-python lac_web_service.py jieba_server_model/ lac_workdir 9292
-```
-- **客户端请求示例**:
-``` shell
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "我爱北京天安门"}], "fetch":["word_seg"]}' http://127.0.0.1:9292/lac/prediction
-```
-- **返回结果示例**:
-``` shell
-{"word_seg":"我|爱|北京|天安门"}
-```
-
-图像分类模型
-
-- **介绍**:
-``` shell
-图像分类模型由Imagenet数据集训练而成,该服务会返回一个标签及其概率
-注意:本示例需要安装paddle-serving-server-gpu
-```
-
-- **下载服务包**:
-``` shell
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imagenet-example/imagenet_demo.tar.gz
-```
-- **启动web服务**:
-``` shell
-tar -xzf imagenet_demo.tar.gz
-python image_classification_service_demo.py resnet50_serving_model
-```
-- **客户端请求示例**:
-
-
-
-
-
-
-
-``` shell
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"url": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://127.0.0.1:9292/image/prediction
-```
-- **返回结果示例**:
-``` shell
-{"label":"daisy","prob":0.9341403245925903}
-```
-
-
更多示例
-
-| Key | Value |
-| :----------------- | :----------------------------------------------------------- |
-| 模型名 | Bert-Base-Baike |
-| 下载链接 | [https://paddle-serving.bj.bcebos.com/bert_example/bert_seq128.tar.gz](https://paddle-serving.bj.bcebos.com/bert_example%2Fbert_seq128.tar.gz) |
-| 客户端/服务端代码 | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/bert |
-| 介绍 | 获得一个中文语句的语义表示 |
-
-
-
-| Key | Value |
-| :----------------- | :----------------------------------------------------------- |
-| 模型名 | Resnet50-Imagenet |
-| 下载链接 | [https://paddle-serving.bj.bcebos.com/imagenet-example/ResNet50_vd.tar.gz](https://paddle-serving.bj.bcebos.com/imagenet-example%2FResNet50_vd.tar.gz) |
-| 客户端/服务端代码 | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imagenet |
-| 介绍 | 获得一张图片的图像语义表示 |
-
-
-
-| Key | Value |
-| :----------------- | :----------------------------------------------------------- |
-| 模型名 | Resnet101-Imagenet |
-| 下载链接 | https://paddle-serving.bj.bcebos.com/imagenet-example/ResNet101_vd.tar.gz |
-| 客户端/服务端代码 | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imagenet |
-| 介绍 | 获得一张图片的图像语义表示 |
-
-
-
-| Key | Value |
-| :----------------- | :----------------------------------------------------------- |
-| 模型名 | CNN-IMDB |
-| 下载链接 | https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz |
-| 客户端/服务端代码 | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imdb |
-| 介绍 | 从一个中文语句获得类别及其概率 |
-
-
-
-| Key | Value |
-| :----------------- | :----------------------------------------------------------- |
-| 模型名 | LSTM-IMDB |
-| 下载链接 | https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz |
-| 客户端/服务端代码 | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imdb |
-| 介绍 | 从一个英文语句获得类别及其概率 |
-
-
-
-| Key | Value |
-| :----------------- | :----------------------------------------------------------- |
-| 模型名 | BOW-IMDB |
-| 下载链接 | https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz |
-| 客户端/服务端代码 | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imdb |
-| 介绍 | 从一个英文语句获得类别及其概率 |
-
-
-
-| Key | Value |
-| :----------------- | :----------------------------------------------------------- |
-| 模型名 | Jieba-LAC |
-| 下载链接 | https://paddle-serving.bj.bcebos.com/lac/lac_model.tar.gz |
-| 客户端/服务端代码 | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/lac |
-| 介绍 | 获取中文语句的分词 |
-
-
-
-| Key | Value |
-| :----------------- | :----------------------------------------------------------- |
-| 模型名 | DNN-CTR |
-| 下载链接 | https://paddle-serving.bj.bcebos.com/criteo_ctr_example/criteo_ctr_demo_model.tar.gz |
-| 客户端/服务端代码 | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/criteo_ctr |
-| 介绍 | 从项目的特征向量中获得点击概率 |
-
+Paddle Serving的核心功能
+- 与Paddle训练紧密连接,绝大部分Paddle模型可以 **一键部署**.
+- 支持 **工业级的服务能力** 例如模型管理,在线加载,在线A/B测试等.
+- 支持 **分布式键值对索引** 助力于大规模稀疏特征作为模型输入.
+- 支持客户端和服务端之间 **高并发和高效通信**.
+- 支持 **多种编程语言** 开发客户端,例如Golang,C++和Python.
文档
@@ -265,11 +170,13 @@ curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"url": "https://pa
- [如何开发一个新的Web Service?](doc/NEW_WEB_SERVICE_CN.md)
- [如何在Paddle Serving使用Go Client?](doc/IMDB_GO_CLIENT_CN.md)
- [如何编译PaddleServing?](doc/COMPILE_CN.md)
+- [如何使用uWSGI部署Web Service](doc/UWSGI_DEPLOY_CN.md)
+- [如何实现模型文件热加载](doc/HOT_LOADING_IN_SERVING_CN.md)
### 关于Paddle Serving性能
- [如何测试Paddle Serving性能?](python/examples/util/)
-- [如何优化性能?](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
-- [在一张GPU上启动多个预测服务](doc/PERFORMANCE_OPTIM_CN.md)
+- [如何优化性能?](doc/PERFORMANCE_OPTIM_CN.md)
+- [在一张GPU上启动多个预测服务](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
- [CPU版Benchmarks](doc/BENCHMARKING.md)
- [GPU版Benchmarks](doc/GPU_BENCHMARKING.md)
diff --git a/cmake/paddlepaddle.cmake b/cmake/paddlepaddle.cmake
index c9ac3d2f04db833f34211af3cc7aaac2d5184bf9..7670444ed1e021376fa44491973bb748cf611ecf 100644
--- a/cmake/paddlepaddle.cmake
+++ b/cmake/paddlepaddle.cmake
@@ -31,7 +31,7 @@ message( "WITH_GPU = ${WITH_GPU}")
# Paddle Version should be one of:
# latest: latest develop build
# version number like 1.5.2
-SET(PADDLE_VERSION "1.7.1")
+SET(PADDLE_VERSION "1.7.2")
if (WITH_GPU)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda${CUDA_VERSION_MAJOR}-cudnn7-avx-mkl")
diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index ce2e5e3814ae1e585976c5d9c8848b506293ee67..56296b53319fb185c772ffa10e8b31c8203862fb 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -23,6 +23,11 @@ add_subdirectory(pdcodegen)
add_subdirectory(sdk-cpp)
endif()
+if (APP)
+add_subdirectory(configure)
+endif()
+
+
if(CLIENT)
add_subdirectory(general-client)
endif()
diff --git a/core/configure/CMakeLists.txt b/core/configure/CMakeLists.txt
index b6384fc99ea3df6d71a61865e3aabf5b39b510dd..d3e5b75da96ad7a0789866a4a2c474fad988c21b 100644
--- a/core/configure/CMakeLists.txt
+++ b/core/configure/CMakeLists.txt
@@ -1,3 +1,4 @@
+if (SERVER OR CLIENT)
LIST(APPEND protofiles
${CMAKE_CURRENT_LIST_DIR}/proto/server_configure.proto
${CMAKE_CURRENT_LIST_DIR}/proto/sdk_configure.proto
@@ -28,6 +29,7 @@ FILE(GLOB inc ${CMAKE_CURRENT_BINARY_DIR}/*.pb.h)
install(FILES ${inc}
DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/include/configure)
+endif()
py_proto_compile(general_model_config_py_proto SRCS proto/general_model_config.proto)
add_custom_target(general_model_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
@@ -51,6 +53,14 @@ add_custom_command(TARGET general_model_config_py_proto POST_BUILD
endif()
+if (APP)
+add_custom_command(TARGET general_model_config_py_proto POST_BUILD
+ COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/proto
+ COMMAND cp *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/proto
+ COMMENT "Copy generated general_model_config proto file into directory paddle_serving_app/proto."
+ WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+endif()
+
if (SERVER)
py_proto_compile(server_config_py_proto SRCS proto/server_configure.proto)
add_custom_target(server_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
diff --git a/core/configure/proto/server_configure.proto b/core/configure/proto/server_configure.proto
index 4bdc233099cffbc7949a6b5cf8627fe6461f565c..8956022685090c94be2037445c646e9fbffd1a5c 100644
--- a/core/configure/proto/server_configure.proto
+++ b/core/configure/proto/server_configure.proto
@@ -43,6 +43,7 @@ message EngineDesc {
optional bool enable_memory_optimization = 13;
optional bool static_optimization = 14;
optional bool force_update_static_cache = 15;
+ optional bool enable_ir_optimization = 16;
};
// model_toolkit conf
diff --git a/core/cube/cube-agent/src/agent/util.go b/core/cube/cube-agent/src/agent/util.go
index 29d27682a3c2e1c46d7ca8cb71de53c2e95df71f..1a0917d9810fb17cdaa4b2b1177d1e7414344a3e 100644
--- a/core/cube/cube-agent/src/agent/util.go
+++ b/core/cube/cube-agent/src/agent/util.go
@@ -83,9 +83,6 @@ func JsonReq(method, requrl string, timeout int, kv *map[string]string,
}
func GetHdfsMeta(src string) (master, ugi, path string, err error) {
- //src = "hdfs://root:rootpasst@st1-inf-platform0.st01.baidu.com:54310/user/mis_user/news_dnn_ctr_cube_1/1501836820/news_dnn_ctr_cube_1_part54.tar"
- //src = "hdfs://st1-inf-platform0.st01.baidu.com:54310/user/mis_user/news_dnn_ctr_cube_1/1501836820/news_dnn_ctr_cube_1_part54.tar"
-
ugiBegin := strings.Index(src, "//")
ugiPos := strings.LastIndex(src, "@")
if ugiPos != -1 && ugiBegin != -1 {
diff --git a/core/general-client/CMakeLists.txt b/core/general-client/CMakeLists.txt
index 88abcbcb776ae999cbf9123d1dad0864a987ecf4..d6079317a75d3f45b61920836e6695bd6b31d951 100644
--- a/core/general-client/CMakeLists.txt
+++ b/core/general-client/CMakeLists.txt
@@ -1,5 +1,5 @@
if(CLIENT)
add_subdirectory(pybind11)
pybind11_add_module(serving_client src/general_model.cpp src/pybind_general_model.cpp)
-target_link_libraries(serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
+target_link_libraries(serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -Wl,-rpath,'$ORIGIN'/lib)
endif()
diff --git a/core/general-client/include/general_model.h b/core/general-client/include/general_model.h
index 7e04ae11f2106bc8e03fb9045976abc2460e1864..b379188854c30587d24962bc827aa099c3a39183 100644
--- a/core/general-client/include/general_model.h
+++ b/core/general-client/include/general_model.h
@@ -69,15 +69,27 @@ class ModelRes {
const std::vector& get_int64_by_name(const std::string& name) {
return _int64_value_map[name];
}
+ std::vector&& get_int64_by_name_with_rv(const std::string& name) {
+ return std::move(_int64_value_map[name]);
+ }
const std::vector& get_float_by_name(const std::string& name) {
return _float_value_map[name];
}
- const std::vector& get_shape(const std::string& name) {
+ std::vector&& get_float_by_name_with_rv(const std::string& name) {
+ return std::move(_float_value_map[name]);
+ }
+ const std::vector& get_shape_by_name(const std::string& name) {
return _shape_map[name];
}
- const std::vector& get_lod(const std::string& name) {
+ std::vector&& get_shape_by_name_with_rv(const std::string& name) {
+ return std::move(_shape_map[name]);
+ }
+ const std::vector& get_lod_by_name(const std::string& name) {
return _lod_map[name];
}
+ std::vector&& get_lod_by_name_with_rv(const std::string& name) {
+ return std::move(_lod_map[name]);
+ }
void set_engine_name(const std::string& engine_name) {
_engine_name = engine_name;
}
@@ -121,17 +133,33 @@ class PredictorRes {
const std::string& name) {
return _models[model_idx].get_int64_by_name(name);
}
+ std::vector&& get_int64_by_name_with_rv(const int model_idx,
+ const std::string& name) {
+ return std::move(_models[model_idx].get_int64_by_name_with_rv(name));
+ }
const std::vector& get_float_by_name(const int model_idx,
const std::string& name) {
return _models[model_idx].get_float_by_name(name);
}
- const std::vector& get_shape(const int model_idx,
- const std::string& name) {
- return _models[model_idx].get_shape(name);
+ std::vector&& get_float_by_name_with_rv(const int model_idx,
+ const std::string& name) {
+ return std::move(_models[model_idx].get_float_by_name_with_rv(name));
+ }
+ const std::vector& get_shape_by_name(const int model_idx,
+ const std::string& name) {
+ return _models[model_idx].get_shape_by_name(name);
+ }
+ const std::vector&& get_shape_by_name_with_rv(const int model_idx,
+ const std::string& name) {
+ return std::move(_models[model_idx].get_shape_by_name_with_rv(name));
+ }
+ const std::vector& get_lod_by_name(const int model_idx,
+ const std::string& name) {
+ return _models[model_idx].get_lod_by_name(name);
}
- const std::vector& get_lod(const int model_idx,
- const std::string& name) {
- return _models[model_idx].get_lod(name);
+ const std::vector&& get_lod_by_name_with_rv(const int model_idx,
+ const std::string& name) {
+ return std::move(_models[model_idx].get_lod_by_name_with_rv(name));
}
void add_model_res(ModelRes&& res) {
_engine_names.push_back(res.engine_name());
diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp
index 86f75bc1c1b401cd14f2c6651ea52ef08fdb8c40..d4e54c2ac04cf84b2a036f7abe0d426e6f186699 100644
--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -258,9 +258,10 @@ int PredictorClient::batch_predict(
ModelRes model;
model.set_engine_name(output.engine_name());
+ int idx = 0;
+
for (auto &name : fetch_name) {
// int idx = _fetch_name_to_idx[name];
- int idx = 0;
int shape_size = output.insts(0).tensor_array(idx).shape_size();
VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
<< shape_size;
@@ -279,9 +280,9 @@ int PredictorClient::batch_predict(
idx += 1;
}
+ idx = 0;
for (auto &name : fetch_name) {
// int idx = _fetch_name_to_idx[name];
- int idx = 0;
if (_fetch_name_to_type[name] == 0) {
VLOG(2) << "ferch var " << name << "type int";
model._int64_value_map[name].resize(
@@ -345,7 +346,7 @@ int PredictorClient::numpy_predict(
PredictorRes &predict_res_batch,
const int &pid) {
int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size());
-
+ VLOG(2) << "batch size: " << batch_size;
predict_res_batch.clear();
Timer timeline;
int64_t preprocess_start = timeline.TimeStampUS();
@@ -462,7 +463,7 @@ int PredictorClient::numpy_predict(
for (ssize_t j = 0; j < int_array.shape(1); j++) {
for (ssize_t k = 0; k < int_array.shape(2); k++) {
for (ssize_t l = 0; k < int_array.shape(3); l++) {
- tensor->add_float_data(int_array(i, j, k, l));
+ tensor->add_int64_data(int_array(i, j, k, l));
}
}
}
@@ -474,7 +475,7 @@ int PredictorClient::numpy_predict(
for (ssize_t i = 0; i < int_array.shape(0); i++) {
for (ssize_t j = 0; j < int_array.shape(1); j++) {
for (ssize_t k = 0; k < int_array.shape(2); k++) {
- tensor->add_float_data(int_array(i, j, k));
+ tensor->add_int64_data(int_array(i, j, k));
}
}
}
@@ -484,7 +485,7 @@ int PredictorClient::numpy_predict(
auto int_array = int_feed[vec_idx].unchecked<2>();
for (ssize_t i = 0; i < int_array.shape(0); i++) {
for (ssize_t j = 0; j < int_array.shape(1); j++) {
- tensor->add_float_data(int_array(i, j));
+ tensor->add_int64_data(int_array(i, j));
}
}
break;
@@ -492,7 +493,7 @@ int PredictorClient::numpy_predict(
case 1: {
auto int_array = int_feed[vec_idx].unchecked<1>();
for (ssize_t i = 0; i < int_array.shape(0); i++) {
- tensor->add_float_data(int_array(i));
+ tensor->add_int64_data(int_array(i));
}
break;
}
@@ -536,9 +537,9 @@ int PredictorClient::numpy_predict(
ModelRes model;
model.set_engine_name(output.engine_name());
+ int idx = 0;
for (auto &name : fetch_name) {
// int idx = _fetch_name_to_idx[name];
- int idx = 0;
int shape_size = output.insts(0).tensor_array(idx).shape_size();
VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
<< shape_size;
@@ -557,9 +558,10 @@ int PredictorClient::numpy_predict(
idx += 1;
}
+ idx = 0;
+
for (auto &name : fetch_name) {
// int idx = _fetch_name_to_idx[name];
- int idx = 0;
if (_fetch_name_to_type[name] == 0) {
VLOG(2) << "ferch var " << name << "type int";
model._int64_value_map[name].resize(
diff --git a/core/general-client/src/pybind_general_model.cpp b/core/general-client/src/pybind_general_model.cpp
index b0d1d2d624d616a1df3805364cf7802cc19fc46b..3e065e4de1ff3c01ff6bc05cb39a2607620915b4 100644
--- a/core/general-client/src/pybind_general_model.cpp
+++ b/core/general-client/src/pybind_general_model.cpp
@@ -32,24 +32,41 @@ PYBIND11_MODULE(serving_client, m) {
.def(py::init())
.def("get_int64_by_name",
[](PredictorRes &self, int model_idx, std::string &name) {
- return self.get_int64_by_name(model_idx, name);
- },
- py::return_value_policy::reference)
+ // see more: https://github.com/pybind/pybind11/issues/1042
+ std::vector *ptr = new std::vector(
+ std::move(self.get_int64_by_name_with_rv(model_idx, name)));
+ auto capsule = py::capsule(ptr, [](void *p) {
+ delete reinterpret_cast *>(p);
+ });
+ return py::array(ptr->size(), ptr->data(), capsule);
+ })
.def("get_float_by_name",
[](PredictorRes &self, int model_idx, std::string &name) {
- return self.get_float_by_name(model_idx, name);
- },
- py::return_value_policy::reference)
+ std::vector *ptr = new std::vector(
+ std::move(self.get_float_by_name_with_rv(model_idx, name)));
+ auto capsule = py::capsule(ptr, [](void *p) {
+ delete reinterpret_cast *>(p);
+ });
+ return py::array(ptr->size(), ptr->data(), capsule);
+ })
.def("get_shape",
[](PredictorRes &self, int model_idx, std::string &name) {
- return self.get_shape(model_idx, name);
- },
- py::return_value_policy::reference)
+ std::vector *ptr = new std::vector(
+ std::move(self.get_shape_by_name_with_rv(model_idx, name)));
+ auto capsule = py::capsule(ptr, [](void *p) {
+ delete reinterpret_cast *>(p);
+ });
+ return py::array(ptr->size(), ptr->data(), capsule);
+ })
.def("get_lod",
[](PredictorRes &self, int model_idx, std::string &name) {
- return self.get_lod(model_idx, name);
- },
- py::return_value_policy::reference)
+ std::vector *ptr = new std::vector(
+ std::move(self.get_lod_by_name_with_rv(model_idx, name)));
+ auto capsule = py::capsule(ptr, [](void *p) {
+ delete reinterpret_cast *>(p);
+ });
+ return py::array(ptr->size(), ptr->data(), capsule);
+ })
.def("variant_tag", [](PredictorRes &self) { return self.variant_tag(); })
.def("get_engine_names",
[](PredictorRes &self) { return self.get_engine_names(); });
@@ -100,7 +117,8 @@ PYBIND11_MODULE(serving_client, m) {
fetch_name,
predict_res_batch,
pid);
- })
+ },
+ py::call_guard())
.def("numpy_predict",
[](PredictorClient &self,
const std::vector>>
diff --git a/core/general-server/op/general_reader_op.cpp b/core/general-server/op/general_reader_op.cpp
index 8695da2591a30725d5b2390ad287f9ceae40052b..7d48949b22d0ace289ab3b9214f092819f5476e0 100644
--- a/core/general-server/op/general_reader_op.cpp
+++ b/core/general-server/op/general_reader_op.cpp
@@ -131,7 +131,7 @@ int GeneralReaderOp::inference() {
lod_tensor.dtype = paddle::PaddleDType::FLOAT32;
}
- if (req->insts(0).tensor_array(i).shape(0) == -1) {
+ if (model_config->_is_lod_feed[i]) {
lod_tensor.lod.resize(1);
lod_tensor.lod[0].push_back(0);
VLOG(2) << "var[" << i << "] is lod_tensor";
@@ -153,6 +153,7 @@ int GeneralReaderOp::inference() {
// specify the memory needed for output tensor_vector
for (int i = 0; i < var_num; ++i) {
if (out->at(i).lod.size() == 1) {
+ int tensor_size = 0;
for (int j = 0; j < batch_size; ++j) {
const Tensor &tensor = req->insts(j).tensor_array(i);
int data_len = 0;
@@ -162,15 +163,28 @@ int GeneralReaderOp::inference() {
data_len = tensor.float_data_size();
}
VLOG(2) << "tensor size for var[" << i << "]: " << data_len;
+ tensor_size += data_len;
int cur_len = out->at(i).lod[0].back();
VLOG(2) << "current len: " << cur_len;
- out->at(i).lod[0].push_back(cur_len + data_len);
- VLOG(2) << "new len: " << cur_len + data_len;
+ int sample_len = 0;
+ if (tensor.shape_size() == 1) {
+ sample_len = data_len;
+ } else {
+ sample_len = tensor.shape(0);
+ }
+ out->at(i).lod[0].push_back(cur_len + sample_len);
+ VLOG(2) << "new len: " << cur_len + sample_len;
+ }
+ out->at(i).data.Resize(tensor_size * elem_size[i]);
+ out->at(i).shape = {out->at(i).lod[0].back()};
+ for (int j = 1; j < req->insts(0).tensor_array(i).shape_size(); ++j) {
+ out->at(i).shape.push_back(req->insts(0).tensor_array(i).shape(j));
+ }
+ if (out->at(i).shape.size() == 1) {
+ out->at(i).shape.push_back(1);
}
- out->at(i).data.Resize(out->at(i).lod[0].back() * elem_size[i]);
- out->at(i).shape = {out->at(i).lod[0].back(), 1};
VLOG(2) << "var[" << i
<< "] is lod_tensor and len=" << out->at(i).lod[0].back();
} else {
diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp
index 4d853f88eef88716c498b2b95c1498f1abdeb3d0..5667a174d9bb6e134e58de72524c60839dc82356 100644
--- a/core/general-server/op/general_response_op.cpp
+++ b/core/general-server/op/general_response_op.cpp
@@ -15,8 +15,10 @@
#include "core/general-server/op/general_response_op.h"
#include
#include
+#include