提交 c74f2671 编写于 作者: B barrierye

merge core/general-client/src/general_model.cpp

...@@ -35,13 +35,28 @@ We consider deploying deep learning inference service online to be a user-facing ...@@ -35,13 +35,28 @@ We consider deploying deep learning inference service online to be a user-facing
<h2 align="center">Installation</h2> <h2 align="center">Installation</h2>
We highly recommend you to run Paddle Serving in Docker, please visit [Run in Docker](https://github.com/PaddlePaddle/Serving/blob/develop/doc/RUN_IN_DOCKER.md) We highly recommend you to run Paddle Serving in Docker, please visit [Run in Docker](https://github.com/PaddlePaddle/Serving/blob/develop/doc/RUN_IN_DOCKER.md)
```
# Run CPU Docker
docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0
docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0
docker exec -it test bash
```
```
# Run GPU Docker
nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
nvidia-docker exec -it test bash
```
```shell ```shell
pip install paddle-serving-client pip install paddle-serving-client
pip install paddle-serving-server pip install paddle-serving-server # CPU
pip install paddle-serving-server-gpu # GPU
``` ```
You may need to use a domestic mirror source (in China, you can use the Tsinghua mirror source) to speed up the download. You may need to use a domestic mirror source (in China, you can use the Tsinghua mirror source, add `-i https://pypi.tuna.tsinghua.edu.cn/simple` to pip command) to speed up the download.
Client package support Centos 7 and Ubuntu 18, or you can use HTTP service without install client.
<h2 align="center">Quick Start Example</h2> <h2 align="center">Quick Start Example</h2>
...@@ -130,6 +145,7 @@ curl -H "Content-Type:application/json" -X POST -d '{"words": "我爱北京天 ...@@ -130,6 +145,7 @@ curl -H "Content-Type:application/json" -X POST -d '{"words": "我爱北京天
- **Description**: - **Description**:
``` shell ``` shell
Image classification trained with Imagenet dataset. A label and corresponding probability will be returned. Image classification trained with Imagenet dataset. A label and corresponding probability will be returned.
Note: This demo needs paddle-serving-server-gpu.
``` ```
- **Download Servable Package**: - **Download Servable Package**:
...@@ -245,6 +261,8 @@ curl -H "Content-Type:application/json" -X POST -d '{"url": "https://paddle-serv ...@@ -245,6 +261,8 @@ curl -H "Content-Type:application/json" -X POST -d '{"url": "https://paddle-serv
### About Efficiency ### About Efficiency
- [How to profile Paddle Serving latency?](python/examples/util) - [How to profile Paddle Serving latency?](python/examples/util)
- [How to optimize performance?(Chinese)](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
- [Deploy multi-services on one GPU(Chinese)](doc/PERFORMANCE_OPTIM_CN.md)
- [CPU Benchmarks(Chinese)](doc/BENCHMARKING.md) - [CPU Benchmarks(Chinese)](doc/BENCHMARKING.md)
- [GPU Benchmarks(Chinese)](doc/GPU_BENCHMARKING.md) - [GPU Benchmarks(Chinese)](doc/GPU_BENCHMARKING.md)
......
...@@ -37,12 +37,27 @@ Paddle Serving 旨在帮助深度学习开发者轻易部署在线预测服务 ...@@ -37,12 +37,27 @@ Paddle Serving 旨在帮助深度学习开发者轻易部署在线预测服务
强烈建议您在Docker内构建Paddle Serving,请查看[如何在Docker中运行PaddleServing](doc/RUN_IN_DOCKER_CN.md) 强烈建议您在Docker内构建Paddle Serving,请查看[如何在Docker中运行PaddleServing](doc/RUN_IN_DOCKER_CN.md)
```
# 启动 CPU Docker
docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0
docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0
docker exec -it test bash
```
```
# 启动 GPU Docker
nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
nvidia-docker exec -it test bash
```
```shell ```shell
pip install paddle-serving-client pip install paddle-serving-client
pip install paddle-serving-server pip install paddle-serving-server # CPU
pip install paddle-serving-server-gpu # GPU
``` ```
您可能需要使用国内镜像源(例如清华源)来加速下载。 您可能需要使用国内镜像源(例如清华源, 在pip命令中添加`-i https://pypi.tuna.tsinghua.edu.cn/simple`)来加速下载。
客户端安装包支持Centos 7和Ubuntu 18,或者您可以使用HTTP服务,这种情况下不需要安装客户端。
<h2 align="center">快速启动示例</h2> <h2 align="center">快速启动示例</h2>
...@@ -135,6 +150,7 @@ curl -H "Content-Type:application/json" -X POST -d '{"words": "我爱北京天 ...@@ -135,6 +150,7 @@ curl -H "Content-Type:application/json" -X POST -d '{"words": "我爱北京天
- **介绍**: - **介绍**:
``` shell ``` shell
图像分类模型由Imagenet数据集训练而成,该服务会返回一个标签及其概率 图像分类模型由Imagenet数据集训练而成,该服务会返回一个标签及其概率
注意:本示例需要安装paddle-serving-server-gpu
``` ```
- **下载服务包**: - **下载服务包**:
...@@ -251,6 +267,8 @@ curl -H "Content-Type:application/json" -X POST -d '{"url": "https://paddle-serv ...@@ -251,6 +267,8 @@ curl -H "Content-Type:application/json" -X POST -d '{"url": "https://paddle-serv
### 关于Paddle Serving性能 ### 关于Paddle Serving性能
- [如何测试Paddle Serving性能?](python/examples/util/) - [如何测试Paddle Serving性能?](python/examples/util/)
- [如何优化性能?](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
- [在一张GPU上启动多个预测服务](doc/PERFORMANCE_OPTIM_CN.md)
- [CPU版Benchmarks](doc/BENCHMARKING.md) - [CPU版Benchmarks](doc/BENCHMARKING.md)
- [GPU版Benchmarks](doc/GPU_BENCHMARKING.md) - [GPU版Benchmarks](doc/GPU_BENCHMARKING.md)
......
...@@ -18,9 +18,11 @@ project(cube-transfer Go) ...@@ -18,9 +18,11 @@ project(cube-transfer Go)
include(cmake/golang.cmake) include(cmake/golang.cmake)
ExternalGoProject_Add(docopt-go github.com/docopt/docopt-go)
ExternalGoProject_Add(rfw github.com/mipearson/rfw) ExternalGoProject_Add(rfw github.com/mipearson/rfw)
ExternalGoProject_Add(logex github.com/Badangel/logex) ExternalGoProject_Add(docopt-go github.com/docopt/docopt-go)
add_custom_target(logex
COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get github.com/Badangel/logex
DEPENDS rfw)
add_subdirectory(src) add_subdirectory(src)
install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/conf DESTINATION ${PADDLE_SERVING_INSTALL_DIR}) install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/conf DESTINATION ${PADDLE_SERVING_INSTALL_DIR})
...@@ -314,7 +314,7 @@ int PredictorClient::batch_predict( ...@@ -314,7 +314,7 @@ int PredictorClient::batch_predict(
tensor_vec.push_back(inst->add_tensor_array()); tensor_vec.push_back(inst->add_tensor_array());
} }
VLOG(2) << "batch [" << bi << "] int_feed_name and float_feed_name" VLOG(2) << "batch [" << bi << "] int_feed_name and float_feed_name "
<< "prepared"; << "prepared";
int vec_idx = 0; int vec_idx = 0;
for (auto &name : float_feed_name) { for (auto &name : float_feed_name) {
...@@ -376,6 +376,7 @@ int PredictorClient::batch_predict( ...@@ -376,6 +376,7 @@ int PredictorClient::batch_predict(
} else { } else {
client_infer_end = timeline.TimeStampUS(); client_infer_end = timeline.TimeStampUS();
postprocess_start = client_infer_end; postprocess_start = client_infer_end;
uint32_t model_num = res.outputs_size(); uint32_t model_num = res.outputs_size();
predict_res_batch._models.resize(model_num); predict_res_batch._models.resize(model_num);
for (uint32_t m_idx = 0; m_idx < model_num; ++m_idx) { for (uint32_t m_idx = 0; m_idx < model_num; ++m_idx) {
...@@ -385,9 +386,11 @@ int PredictorClient::batch_predict( ...@@ -385,9 +386,11 @@ int PredictorClient::batch_predict(
predict_res_batch._models[m_idx]._int64_map[name].resize(batch_size); predict_res_batch._models[m_idx]._int64_map[name].resize(batch_size);
predict_res_batch._models[m_idx]._float_map[name].resize(batch_size); predict_res_batch._models[m_idx]._float_map[name].resize(batch_size);
} }
VLOG(2) << "response batch size " << output.insts_size();
VLOG(2) << "response var nmae " << output.insts(0).tensor_array_size();
for (int bi = 0; bi < batch_size; bi++) { for (int bi = 0; bi < batch_size; bi++) {
int idx = 0;
for (auto &name : fetch_name) { for (auto &name : fetch_name) {
int idx = _fetch_name_to_idx[name];
int len = output.insts(bi).tensor_array(idx).data_size(); int len = output.insts(bi).tensor_array(idx).data_size();
if (_fetch_name_to_type[name] == 0) { if (_fetch_name_to_type[name] == 0) {
int len = output.insts(bi).tensor_array(idx).int64_data_size(); int len = output.insts(bi).tensor_array(idx).int64_data_size();
...@@ -412,6 +415,7 @@ int PredictorClient::batch_predict( ...@@ -412,6 +415,7 @@ int PredictorClient::batch_predict(
} }
} }
} }
idx += 1;
} }
} }
postprocess_end = timeline.TimeStampUS(); postprocess_end = timeline.TimeStampUS();
......
...@@ -19,6 +19,7 @@ sh get_data.sh ...@@ -19,6 +19,7 @@ sh get_data.sh
The following Python code will process the data `test_data/part-0` and write to the `processed.data` file. The following Python code will process the data `test_data/part-0` and write to the `processed.data` file.
[//file]:#process.py
``` python ``` python
from imdb_reader import IMDBDataset from imdb_reader import IMDBDataset
imdb_dataset = IMDBDataset() imdb_dataset = IMDBDataset()
...@@ -59,7 +60,8 @@ exit ...@@ -59,7 +60,8 @@ exit
Run the following Python code on the host computer to start client. Make sure that the host computer is installed with the `paddle-serving-client` package. Run the following Python code on the host computer to start client. Make sure that the host computer is installed with the `paddle-serving-client` package.
``` go [//file]:#ab_client.py
``` python
from paddle_serving_client import Client from paddle_serving_client import Client
client = Client() client = Client()
...@@ -94,3 +96,24 @@ When making prediction on the client side, if the parameter `need_variant_tag=Tr ...@@ -94,3 +96,24 @@ When making prediction on the client side, if the parameter `need_variant_tag=Tr
[lstm](total: 1867) acc: 0.490091055169 [lstm](total: 1867) acc: 0.490091055169
[bow](total: 217) acc: 0.73732718894 [bow](total: 217) acc: 0.73732718894
``` ```
<!--
cp ../Serving/python/examples/imdb/get_data.sh .
cp ../Serving/python/examples/imdb/imdb_reader.py .
pip install -U paddle_serving_server
pip install -U paddle_serving_client
pip install -U paddlepaddle
sh get_data.sh
python process.py
python -m paddle_serving_server.serve --model imdb_bow_model --port 8000 --workdir workdir1 &
sleep 5
python -m paddle_serving_server.serve --model imdb_lstm_model --port 9000 --workdir workdir2 &
sleep 5
python ab_client.py >log.txt
if [[ $? -eq 0 ]]; then
echo "test success"
else
echo "test fail"
fi
ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
-->
...@@ -102,4 +102,5 @@ if [[ $? -eq 0 ]]; then ...@@ -102,4 +102,5 @@ if [[ $? -eq 0 ]]; then
else else
echo "test fail" echo "test fail"
fi fi
ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
--> -->
...@@ -4,14 +4,19 @@ ...@@ -4,14 +4,19 @@
## Compilation environment requirements ## Compilation environment requirements
- os: CentOS 6u3 - OS: CentOS 7
- gcc: 4.8.2 and later - GCC: 4.8.2 and later
- go: 1.9.2 and later - Golang: 1.9.2 and later
- git:2.17.1 and later - Git:2.17.1 and later
- cmake:3.2.2 and later - CMake:3.2.2 and later
- python:2.7.2 and later - Python:2.7.2 and later
It is recommended to use Docker to prepare the compilation environment for the Paddle service: [CPU Dockerfile.devel](../tools/Dockerfile.devel), [GPU Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel) It is recommended to use Docker for compilation. We have prepared the Paddle Serving compilation environment for you:
- CPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-devel`,dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel)
- GPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu-devel`,dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
This document will take Python2 as an example to show how to compile Paddle Serving. If you want to compile with Python 3, just adjust the Python options of cmake.
## Get Code ## Get Code
......
...@@ -4,14 +4,19 @@ ...@@ -4,14 +4,19 @@
## 编译环境设置 ## 编译环境设置
- os: CentOS 6u3 - OS: CentOS 7
- gcc: 4.8.2及以上 - GCC: 4.8.2及以上
- go: 1.9.2及以上 - Golang: 1.9.2及以上
- git:2.17.1及以上 - Git:2.17.1及以上
- cmake:3.2.2及以上 - CMake:3.2.2及以上
- python:2.7.2及以上 - Python:2.7.2及以上
推荐使用Docker准备Paddle Serving编译环境:[CPU Dockerfile.devel](../tools/Dockerfile.devel)[GPU Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel) 推荐使用Docker编译,我们已经为您准备好了Paddle Serving编译环境:
- CPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-devel`,dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel)
- GPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu-devel`,dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
本文档将以Python2为例介绍如何编译Paddle Serving。如果您想用Python3进行编译,只需要调整cmake的Python相关选项即可。
## 获取代码 ## 获取代码
......
# 单卡多模型预测服务
当客户端发送的请求数并不频繁的情况下,会造成服务端机器计算资源尤其是GPU资源的浪费,这种情况下,可以在服务端启动多个预测服务来提高资源利用率。Paddle Serving支持在单张显卡上部署多个预测服务,使用时只需要在启动单个服务时通过--gpu_ids参数将服务与显卡进行绑定,这样就可以将多个服务都绑定到同一张卡上。
例如:
```shell
python -m paddle_serving_server_gpu.serve --model bert_seq20_model --port 9292 --gpu_ids 0
python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9393 --gpu_ids 0
```
在卡0上,同时部署了bert示例和iamgenet示例。
**注意:** 单张显卡内部进行推理计算时仍然为串行计算,这种方式是为了减少server端显卡的空闲时间。
# 性能优化
由于模型结构的不同,在执行预测时不同的预测对计算资源的消耗也不相同,对于在线的预测服务来说,对计算资源要求较少的模型,通信的时间成本占比就会较高,称为通信密集型服务,对计算资源要求较多的模型,推理计算的时间成本较高,称为计算密集型服务。对于这两种服务类型,可以根据实际需求采取不同的方式进行优化
对于一个预测服务来说,想要判断属于哪种类型,最简单的方法就是看时间占比,Paddle Serving提供了[Timeline工具](../python/examples/util/README_CN.md),可以直观的展现预测服务中各阶段的耗时。
对于通信密集型的预测服务,可以将请求进行聚合,在对延时可以容忍的限度内,将多个预测请求合并成一个batch进行预测。
对于计算密集型的预测服务,可以使用GPU预测服务代替CPU预测服务,或者增加GPU预测服务的显卡数量。
在相同条件下,Paddle Serving提供的HTTP预测服务的通信时间是大于RPC预测服务的,因此对于通信密集型的服务请优先考虑使用RPC的通信方式。
对于模型较大,预测服务内存或显存占用较多的情况,可以通过将--mem_optim选项设置为True来开启内存/显存优化。
...@@ -6,6 +6,8 @@ ...@@ -6,6 +6,8 @@
Docker (GPU version requires nvidia-docker to be installed on the GPU machine) Docker (GPU version requires nvidia-docker to be installed on the GPU machine)
This document takes Python2 as an example to show how to run Paddle Serving in docker. You can also use Python3 to run related commands by replacing `python` with `python3`.
## CPU ## CPU
### Get docker image ### Get docker image
......
...@@ -6,6 +6,8 @@ ...@@ -6,6 +6,8 @@
Docker(GPU版本需要在GPU机器上安装nvidia-docker) Docker(GPU版本需要在GPU机器上安装nvidia-docker)
该文档以Python2为例展示如何在Docker中运行Paddle Serving,您也可以通过将`python`更换成`python3`来用Python3运行相关命令。
## CPU版本 ## CPU版本
### 获取镜像 ### 获取镜像
......
...@@ -288,7 +288,7 @@ The script receives data from standard input and prints out the probability that ...@@ -288,7 +288,7 @@ The script receives data from standard input and prints out the probability that
The client implemented in the previous step runs the prediction service as an example. The usage method is as follows: The client implemented in the previous step runs the prediction service as an example. The usage method is as follows:
```shell ```shell
cat test_data/part-0 | python test_client.py imdb_lstm_client_conf / serving_client_conf.prototxt imdb.vocab cat test_data/part-0 | python test_client.py imdb_lstm_client_conf/serving_client_conf.prototxt imdb.vocab
``` ```
Using 2084 samples in the test_data/part-0 file for test testing, the model prediction accuracy is 88.19%. Using 2084 samples in the test_data/part-0 file for test testing, the model prediction accuracy is 88.19%.
...@@ -350,7 +350,7 @@ In the above command, the first parameter is the saved server-side model and con ...@@ -350,7 +350,7 @@ In the above command, the first parameter is the saved server-side model and con
After starting the HTTP prediction service, you can make prediction with a single command: After starting the HTTP prediction service, you can make prediction with a single command:
``` ```
curl -H "Content-Type: application / json" -X POST -d '{"words": "i am very sad | 0", "fetch": ["prediction"]}' http://127.0.0.1:9292/imdb/prediction curl -H "Content-Type: application/json" -X POST -d '{"words": "i am very sad | 0", "fetch": ["prediction"]}' http://127.0.0.1:9292/imdb/prediction
``` ```
When the inference process is normal, the prediction probability is returned, as shown below. When the inference process is normal, the prediction probability is returned, as shown below.
......
BERT_10_MINS.md BERT_10_MINS.md
ABTEST_IN_PADDLE_SERVING.md
# Fit a line example, prediction through rpc service # Fit a line prediction example
([简体中文](./README_CN.md)|English) ([简体中文](./README_CN.md)|English)
## Start rpc service ## Get data
``` shell
```shell
sh get_data.sh sh get_data.sh
```
## RPC service
### Start server
``` shell
python test_server.py uci_housing_model/ python test_server.py uci_housing_model/
``` ```
## Prediction You can also start the default RPC service with the following line of code:
```shell
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
```
### Client prediction
The `paddlepaddle` package is used in `test_client.py`, and you may need to download the corresponding package(`pip install paddlepaddle`).
``` shell ``` shell
python test_client.py uci_housing_client/serving_client_conf.prototxt python test_client.py uci_housing_client/serving_client_conf.prototxt
``` ```
## prediction through http service
Start a web service with default web service hosting modules
## HTTP service
### Start server
Start a web service with default web service hosting modules:
``` shell ``` shell
python -m paddle_serving_server.web_serve --model uci_housing_model/ --thread 10 --name uci --port 9393 --name uci python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --name uci
``` ```
## Prediction through http post ### Client prediction
``` shell ``` shell
curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
``` ```
# 线性回归,RPC预测服务示例 # 线性回归预测服务示例
(简体中文|[English](./README.md)) (简体中文|[English](./README.md))
## 开启RPC服务端 ## 获取数据
``` shell
```shell
sh get_data.sh sh get_data.sh
```
## RPC服务
### 开启服务端
``` shell
python test_server.py uci_housing_model/ python test_server.py uci_housing_model/
``` ```
## RPC预测 也可以通过下面的一行代码开启默认RPC服务:
```shell
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
```
### 客户端预测
`test_client.py`中使用了`paddlepaddle`包,需要进行下载(`pip install paddlepaddle`)。
``` shell ``` shell
python test_client.py uci_housing_client/serving_client_conf.prototxt python test_client.py uci_housing_client/serving_client_conf.prototxt
``` ```
## 开启HTTP服务端
Start a web service with default web service hosting modules
## HTTP服务
### 开启服务端
通过下面的一行代码开启默认web服务:
``` shell ``` shell
python -m paddle_serving_server.web_serve --model uci_housing_model/ --thread 10 --name uci --port 9393 --name uci python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --name uci
``` ```
## HTTP预测 ### 客户端预测
``` shell ``` shell
curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
``` ```
...@@ -10,7 +10,7 @@ time_dict = collections.OrderedDict() ...@@ -10,7 +10,7 @@ time_dict = collections.OrderedDict()
def prase(line): def prase(line):
profile_list = line.split(" ") profile_list = line.split(" ")
num = len(profile_list) num = len(profile_list)
for idx in range(num / 2): for idx in range(int(num / 2)):
profile_0_list = profile_list[idx * 2].split(":") profile_0_list = profile_list[idx * 2].split(":")
profile_1_list = profile_list[idx * 2 + 1].split(":") profile_1_list = profile_list[idx * 2 + 1].split(":")
if len(profile_0_list[0].split("_")) == 2: if len(profile_0_list[0].split("_")) == 2:
...@@ -18,7 +18,7 @@ def prase(line): ...@@ -18,7 +18,7 @@ def prase(line):
else: else:
name = profile_0_list[0].split("_")[0] + "_" + profile_0_list[ name = profile_0_list[0].split("_")[0] + "_" + profile_0_list[
0].split("_")[1] 0].split("_")[1]
cost = long(profile_1_list[1]) - long(profile_0_list[1]) cost = int(profile_1_list[1]) - int(profile_0_list[1])
if name not in time_dict: if name not in time_dict:
time_dict[name] = cost time_dict[name] = cost
else: else:
......
...@@ -175,7 +175,6 @@ class Client(object): ...@@ -175,7 +175,6 @@ class Client(object):
return self.fetch_names_ return self.fetch_names_
def shape_check(self, feed, key): def shape_check(self, feed, key):
seq_shape = 1
if key in self.lod_tensor_set: if key in self.lod_tensor_set:
return return
if len(feed[key]) != self.feed_tensor_len[key]: if len(feed[key]) != self.feed_tensor_len[key]:
...@@ -192,7 +191,7 @@ class Client(object): ...@@ -192,7 +191,7 @@ class Client(object):
elif isinstance(fetch, list): elif isinstance(fetch, list):
fetch_list = fetch fetch_list = fetch
else: else:
raise ValueError("fetch only accepts string and list of string") raise ValueError("Fetch only accepts string and list of string")
feed_batch = [] feed_batch = []
if isinstance(feed, dict): if isinstance(feed, dict):
...@@ -200,7 +199,7 @@ class Client(object): ...@@ -200,7 +199,7 @@ class Client(object):
elif isinstance(feed, list): elif isinstance(feed, list):
feed_batch = feed feed_batch = feed
else: else:
raise ValueError("feed only accepts dict and list of dict") raise ValueError("Feed only accepts dict and list of dict")
int_slot_batch = [] int_slot_batch = []
float_slot_batch = [] float_slot_batch = []
...@@ -216,7 +215,7 @@ class Client(object): ...@@ -216,7 +215,7 @@ class Client(object):
if len(fetch_names) == 0: if len(fetch_names) == 0:
raise ValueError( raise ValueError(
"fetch names should not be empty or out of saved fetch list") "Fetch names should not be empty or out of saved fetch list.")
return {} return {}
for i, feed_i in enumerate(feed_batch): for i, feed_i in enumerate(feed_batch):
...@@ -224,7 +223,8 @@ class Client(object): ...@@ -224,7 +223,8 @@ class Client(object):
float_slot = [] float_slot = []
for key in feed_i: for key in feed_i:
if key not in self.feed_names_: if key not in self.feed_names_:
continue raise ValueError("Wrong feed name: {}.".format(key))
self.shape_check(feed_i, key)
if self.feed_types_[key] == int_type: if self.feed_types_[key] == int_type:
if i == 0: if i == 0:
int_feed_names.append(key) int_feed_names.append(key)
...@@ -233,6 +233,8 @@ class Client(object): ...@@ -233,6 +233,8 @@ class Client(object):
if i == 0: if i == 0:
float_feed_names.append(key) float_feed_names.append(key)
float_slot.append(feed_i[key]) float_slot.append(feed_i[key])
if len(int_slot) + len(float_slot) == 0:
raise ValueError("No feed data for predict.")
int_slot_batch.append(int_slot) int_slot_batch.append(int_slot)
float_slot_batch.append(float_slot) float_slot_batch.append(float_slot)
......
...@@ -144,7 +144,7 @@ class Server(object): ...@@ -144,7 +144,7 @@ class Server(object):
self.bin_path = os.environ["SERVING_BIN"] self.bin_path = os.environ["SERVING_BIN"]
def check_cuda(self): def check_cuda(self):
r = os.system("nvcc --version > /dev/null") r = os.system("cat /usr/local/cuda/version.txt")
if r != 0: if r != 0:
raise SystemExit( raise SystemExit(
"CUDA not found, please check your environment or use cpu version by \"pip install paddle_serving_server\"" "CUDA not found, please check your environment or use cpu version by \"pip install paddle_serving_server\""
......
...@@ -3,6 +3,7 @@ FROM centos:7.3.1611 ...@@ -3,6 +3,7 @@ FROM centos:7.3.1611
RUN yum -y install wget && \ RUN yum -y install wget && \
yum -y install epel-release && yum -y install patchelf && \ yum -y install epel-release && yum -y install patchelf && \
yum -y install gcc make python-devel && \ yum -y install gcc make python-devel && \
yum -y install python3 python3-devel && \
yum clean all && \ yum clean all && \
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
python get-pip.py && rm get-pip.py python get-pip.py && rm get-pip.py
...@@ -26,6 +26,8 @@ RUN yum -y install wget >/dev/null \ ...@@ -26,6 +26,8 @@ RUN yum -y install wget >/dev/null \
&& make >/dev/null && make install >/dev/null \ && make >/dev/null && make install >/dev/null \
&& cd .. \ && cd .. \
&& rm -rf patchelf-0.10* \ && rm -rf patchelf-0.10* \
&& yum install -y python3 python3-devel \
&& pip3 install google protobuf setuptools wheel flask \
&& yum -y update >/dev/null \ && yum -y update >/dev/null \
&& yum -y install dnf >/dev/null \ && yum -y install dnf >/dev/null \
&& yum -y install dnf-plugins-core >/dev/null \ && yum -y install dnf-plugins-core >/dev/null \
......
...@@ -18,5 +18,7 @@ RUN yum -y install wget >/dev/null \ ...@@ -18,5 +18,7 @@ RUN yum -y install wget >/dev/null \
&& python get-pip.py >/dev/null \ && python get-pip.py >/dev/null \
&& pip install google protobuf setuptools wheel flask >/dev/null \ && pip install google protobuf setuptools wheel flask >/dev/null \
&& rm get-pip.py \ && rm get-pip.py \
&& yum install -y python3 python3-devel \
&& pip3 install google protobuf setuptools wheel flask \
&& yum -y install epel-release && yum -y install patchelf \ && yum -y install epel-release && yum -y install patchelf \
&& yum clean all && yum clean all
...@@ -6,6 +6,7 @@ RUN yum -y install wget && \ ...@@ -6,6 +6,7 @@ RUN yum -y install wget && \
yum -y install libSM-1.2.2-2.el7.x86_64 --setopt=protected_multilib=false && \ yum -y install libSM-1.2.2-2.el7.x86_64 --setopt=protected_multilib=false && \
yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false && \ yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false && \
yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false && \ yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false && \
yum -y install python3 python3-devel && \
yum clean all && \ yum clean all && \
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
python get-pip.py && rm get-pip.py && \ python get-pip.py && rm get-pip.py && \
......
...@@ -19,5 +19,7 @@ RUN yum -y install wget >/dev/null \ ...@@ -19,5 +19,7 @@ RUN yum -y install wget >/dev/null \
&& python get-pip.py >/dev/null \ && python get-pip.py >/dev/null \
&& pip install google protobuf setuptools wheel flask >/dev/null \ && pip install google protobuf setuptools wheel flask >/dev/null \
&& rm get-pip.py \ && rm get-pip.py \
&& yum install -y python3 python3-devel \
&& pip3 install google protobuf setuptools wheel flask \
&& yum -y install epel-release && yum -y install patchelf \ && yum -y install epel-release && yum -y install patchelf \
&& yum clean all && yum clean all
...@@ -266,12 +266,119 @@ function python_run_criteo_ctr_with_cube() { ...@@ -266,12 +266,119 @@ function python_run_criteo_ctr_with_cube() {
cd .. # pwd: /Serving/python/examples cd .. # pwd: /Serving/python/examples
} }
function python_test_bert() {
# pwd: /Serving/python/examples
local TYPE=$1
yum install -y libXext libSM libXrender >/dev/null
pip install ujson
export SERVING_BIN=${SERVING_WORKDIR}/build-server-${TYPE}/core/general-server/serving
cd bert # pwd: /Serving/python/examples/bert
case $TYPE in
CPU)
pip install paddlehub
python prepare_model.py 20
sh get_data.sh
check_cmd "python -m paddle_serving_server.serve --model bert_seq20_model/ --port 9292 &"
sleep 5
pip install paddle_serving_app
check_cmd "head -n 10 data-c.txt | python bert_client.py --model bert_seq20_client/serving_client_conf.prototxt"
kill_server_process
ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
ps -ef | grep "serving" | grep -v grep | awk '{print $2}' | xargs kill
echo "bert RPC inference pass"
;;
GPU)
pip install paddlehub
python prepare_model.py 20
sh get_data.sh
check_cmd "python -m paddle_serving_server_gpu.serve --model bert_seq20_model/ --port 9292 --gpu_ids 0 &"
sleep 5
pip install paddle_serving_app
check_cmd "head -n 10 data-c.txt | python bert_client.py --model bert_seq20_client/serving_client_conf.prototxt"
kill_server_process
ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
echo "bert RPC inference pass"
;;
*)
esac
echo "test bert $TYPE finished as expected."
unset SERVING_BIN
cd ..
}
function python_test_imdb() {
# pwd: /Serving/python/examples
local TYPE=$1
export SERVING_BIN=${SERVING_WORKDIR}/build-server-${TYPE}/core/general-server/serving
cd imdb # pwd: /Serving/python/examples/imdb
case $TYPE in
CPU)
sh get_data.sh
sleep 5
check_cmd "python -m paddle_serving_server.serve --model imdb_cnn_model/ --port 9292 &"
check_cmd "head test_data/part-0 | python test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab"
echo "imdb CPU RPC inference pass"
ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
rm -rf work_dir1
sleep 5
check_cmd "python text_classify_service.py imdb_cnn_model/workdir/9292 imdb.vocab &"
sleep 5
check_cmd "curl -H "Content-Type:application/json" -X POST -d '{"words": "i am very sad | 0", "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction"
ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
ps -ef | grep "text_classify_service.py" | grep -v grep | awk '{print $2}' | xargs kill
echo "imdb CPU HTTP inference pass"
;;
GPU)
echo "imdb ignore GPU test"
;;
*)
esac
echo "test imdb $TYPE finished as expected."
unset SERVING_BIN
cd ..
}
function python_test_lac() {
# pwd: /Serving/python/examples
local TYPE=$1
export SERVING_BIN=${SERVING_WORKDIR}/build-server-${TYPE}/core/general-server/serving
cd lac # pwd: /Serving/python/examples/lac
case $TYPE in
CPU)
sh get_data.sh
check_cmd "python -m paddle_serving_server.serve --model jieba_server_model/ --port 9292 &"
sleep 5
check_cmd "echo "我爱北京天安门" | python lac_client.py jieba_client_conf/serving_client_conf.prototxt lac_dict/"
echo "lac CPU RPC inference pass"
ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
check_cmd "python lac_web_service.py jieba_server_model/ lac_workdir 9292 &"
sleep 5
check_cmd "curl -H "Content-Type:application/json" -X POST -d '{"words": "我爱北京天安门", "fetch":["word_seg"]}' http://127.0.0.1:9292/lac/prediction"
ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
ps -ef | grep "lac_web_service" | grep -v grep | awk '{print $2}' | xargs kill
echo "lac CPU HTTP inference pass"
;;
GPU)
echo "lac ignore GPU test"
;;
*)
esac
echo "test lac $TYPE finished as expected."
unset SERVING_BIN
cd ..
}
function python_run_test() { function python_run_test() {
# Using the compiled binary # Using the compiled binary
local TYPE=$1 # pwd: /Serving local TYPE=$1 # pwd: /Serving
cd python/examples # pwd: /Serving/python/examples cd python/examples # pwd: /Serving/python/examples
python_test_fit_a_line $TYPE # pwd: /Serving/python/examples python_test_fit_a_line $TYPE # pwd: /Serving/python/examples
python_run_criteo_ctr_with_cube $TYPE # pwd: /Serving/python/examples python_run_criteo_ctr_with_cube $TYPE # pwd: /Serving/python/examples
python_test_bert $TYPE # pwd: /Serving/python/examples
python_test_imdb $TYPE
python_test_lac $TYPE
echo "test python $TYPE part finished as expected." echo "test python $TYPE part finished as expected."
cd ../.. # pwd: /Serving cd ../.. # pwd: /Serving
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册