diff --git a/README.md b/README.md index 17730e2a071facf7c939cb7fb686596b2b752aa6..d018db0a1f0a358bde750da0075b1736b15a7d39 100644 --- a/README.md +++ b/README.md @@ -37,14 +37,14 @@ We consider deploying deep learning inference service online to be a user-facing We **highly recommend** you to **run Paddle Serving in Docker**, please visit [Run in Docker](https://github.com/PaddlePaddle/Serving/blob/develop/doc/RUN_IN_DOCKER.md) ``` # Run CPU Docker -docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0 -docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0 +docker pull hub.baidubce.com/paddlepaddle/serving:latest +docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest docker exec -it test bash ``` ``` # Run GPU Docker -nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu -nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu +nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:latest-gpu +nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest-gpu nvidia-docker exec -it test bash ``` @@ -261,6 +261,8 @@ curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"url": "https://pa - [How to develop a new Web Service?](doc/NEW_WEB_SERVICE.md) - [Golang client](doc/IMDB_GO_CLIENT.md) - [Compile from source code](doc/COMPILE.md) +- [Deploy Web Service with uWSGI](doc/UWSGI_DEPLOY.md) +- [Hot loading for model file](doc/HOT_LOADING_IN_SERVING.md) ### About Efficiency - [How to profile Paddle Serving latency?](python/examples/util) diff --git a/README_CN.md b/README_CN.md index 3302d4850e8255e8d2d6460c201892fd6035b260..3e39e1854c9fda8545172dfb7679fde881827741 100644 --- a/README_CN.md +++ b/README_CN.md @@ -39,14 +39,14 @@ Paddle Serving 旨在帮助深度学习开发者轻易部署在线预测服务 ``` # 启动 CPU Docker -docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0 -docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0 +docker pull hub.baidubce.com/paddlepaddle/serving:latest +docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest docker exec -it test bash ``` ``` # 启动 GPU Docker -nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu -nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu +nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:latest-gpu +nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest-gpu nvidia-docker exec -it test bash ``` ```shell @@ -267,6 +267,8 @@ curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"url": "https://pa - [如何开发一个新的Web Service?](doc/NEW_WEB_SERVICE_CN.md) - [如何在Paddle Serving使用Go Client?](doc/IMDB_GO_CLIENT_CN.md) - [如何编译PaddleServing?](doc/COMPILE_CN.md) +- [如何使用uWSGI部署Web Service](doc/UWSGI_DEPLOY_CN.md) +- [如何实现模型文件热加载](doc/HOT_LOADING_IN_SERVING_CN.md) ### 关于Paddle Serving性能 - [如何测试Paddle Serving性能?](python/examples/util/) diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp index 126accfd0a406f420f57eef4e04268e9081c744f..5667a174d9bb6e134e58de72524c60839dc82356 100644 --- a/core/general-server/op/general_response_op.cpp +++ b/core/general-server/op/general_response_op.cpp @@ -89,50 +89,41 @@ int GeneralResponseOp::inference() { output->set_engine_name(pre_name); FetchInst *fetch_inst = output->add_insts(); - std::map fetch_index_map; - for (int i = 0; i < in->size(); ++i) { - VLOG(2) << "index " << i << " var " << in->at(i).name; - fetch_index_map.insert(std::pair(in->at(i).name, i)); - } - for (auto &idx : fetch_index) { Tensor *tensor = fetch_inst->add_tensor_array(); tensor->set_elem_type(1); - int true_idx = fetch_index_map[model_config->_fetch_name[idx]]; if (model_config->_is_lod_fetch[idx]) { VLOG(2) << "out[" << idx << "] " << model_config->_fetch_name[idx] << " is lod_tensor"; - for (int k = 0; k < in->at(true_idx).shape.size(); ++k) { + for (int k = 0; k < in->at(idx).shape.size(); ++k) { VLOG(2) << "shape[" << k << "]: " << in->at(idx).shape[k]; - tensor->add_shape(in->at(true_idx).shape[k]); + tensor->add_shape(in->at(idx).shape[k]); } } else { VLOG(2) << "out[" << idx << "] " << model_config->_fetch_name[idx] << " is tensor"; - for (int k = 0; k < in->at(true_idx).shape.size(); ++k) { - VLOG(2) << "shape[" << k << "]: " << in->at(true_idx).shape[k]; - tensor->add_shape(in->at(true_idx).shape[k]); + for (int k = 0; k < in->at(idx).shape.size(); ++k) { + VLOG(2) << "shape[" << k << "]: " << in->at(idx).shape[k]; + tensor->add_shape(in->at(idx).shape[k]); } } } int var_idx = 0; for (auto &idx : fetch_index) { - int true_idx = fetch_index_map[model_config->_fetch_name[idx]]; int cap = 1; - for (int j = 0; j < in->at(true_idx).shape.size(); ++j) { - cap *= in->at(true_idx).shape[j]; + for (int j = 0; j < in->at(idx).shape.size(); ++j) { + cap *= in->at(idx).shape[j]; } - if (in->at(true_idx).dtype == paddle::PaddleDType::INT64) { + if (in->at(idx).dtype == paddle::PaddleDType::INT64) { VLOG(2) << "Prepare float var [" << model_config->_fetch_name[idx] << "]."; - int64_t *data_ptr = - static_cast(in->at(true_idx).data.data()); + int64_t *data_ptr = static_cast(in->at(idx).data.data()); if (model_config->_is_lod_fetch[idx]) { FetchInst *fetch_p = output->mutable_insts(0); - for (int j = 0; j < in->at(true_idx).lod[0].size(); ++j) { + for (int j = 0; j < in->at(idx).lod[0].size(); ++j) { fetch_p->mutable_tensor_array(var_idx)->add_lod( - in->at(true_idx).lod[0][j]); + in->at(idx).lod[0][j]); } for (int j = 0; j < cap; ++j) { fetch_p->mutable_tensor_array(var_idx)->add_int64_data(data_ptr[j]); @@ -145,15 +136,15 @@ int GeneralResponseOp::inference() { } VLOG(2) << "fetch var [" << model_config->_fetch_name[idx] << "] ready"; var_idx++; - } else if (in->at(true_idx).dtype == paddle::PaddleDType::FLOAT32) { + } else if (in->at(idx).dtype == paddle::PaddleDType::FLOAT32) { VLOG(2) << "Prepare float var [" << model_config->_fetch_name[idx] << "]."; - float *data_ptr = static_cast(in->at(true_idx).data.data()); + float *data_ptr = static_cast(in->at(idx).data.data()); if (model_config->_is_lod_fetch[idx]) { FetchInst *fetch_p = output->mutable_insts(0); - for (int j = 0; j < in->at(true_idx).lod[0].size(); ++j) { + for (int j = 0; j < in->at(idx).lod[0].size(); ++j) { fetch_p->mutable_tensor_array(var_idx)->add_lod( - in->at(true_idx).lod[0][j]); + in->at(idx).lod[0][j]); } for (int j = 0; j < cap; ++j) { fetch_p->mutable_tensor_array(var_idx)->add_float_data(data_ptr[j]); diff --git a/doc/ABTEST_IN_PADDLE_SERVING.md b/doc/ABTEST_IN_PADDLE_SERVING.md index 69e5ff4b6fdf11d3764f94cba83beee82f959c85..f2302e611bc68607ed68f45f81cd833a91938ae6 100644 --- a/doc/ABTEST_IN_PADDLE_SERVING.md +++ b/doc/ABTEST_IN_PADDLE_SERVING.md @@ -39,7 +39,7 @@ Here, we [use docker](https://github.com/PaddlePaddle/Serving/blob/develop/doc/R First, start the BOW server, which enables the `8000` port: ``` shell -docker run -dit -v $PWD/imdb_bow_model:/model -p 8000:8000 --name bow-server hub.baidubce.com/paddlepaddle/serving:0.2.0 +docker run -dit -v $PWD/imdb_bow_model:/model -p 8000:8000 --name bow-server hub.baidubce.com/paddlepaddle/serving:latest docker exec -it bow-server bash pip install paddle-serving-server python -m paddle_serving_server.serve --model model --port 8000 >std.log 2>err.log & @@ -49,7 +49,7 @@ exit Similarly, start the LSTM server, which enables the `9000` port: ```bash -docker run -dit -v $PWD/imdb_lstm_model:/model -p 9000:9000 --name lstm-server hub.baidubce.com/paddlepaddle/serving:0.2.0 +docker run -dit -v $PWD/imdb_lstm_model:/model -p 9000:9000 --name lstm-server hub.baidubce.com/paddlepaddle/serving:latest docker exec -it lstm-server bash pip install paddle-serving-server python -m paddle_serving_server.serve --model model --port 9000 >std.log 2>err.log & diff --git a/doc/ABTEST_IN_PADDLE_SERVING_CN.md b/doc/ABTEST_IN_PADDLE_SERVING_CN.md index 1991c7e665aae97e36a690fcd4f96c4f85450cea..7ba4e5d7dbe643d87fc15e783afea2955b98fa1e 100644 --- a/doc/ABTEST_IN_PADDLE_SERVING_CN.md +++ b/doc/ABTEST_IN_PADDLE_SERVING_CN.md @@ -38,7 +38,7 @@ with open('test_data/part-0') as fin: 首先启动BOW Server,该服务启用`8000`端口: ```bash -docker run -dit -v $PWD/imdb_bow_model:/model -p 8000:8000 --name bow-server hub.baidubce.com/paddlepaddle/serving:0.2.0 +docker run -dit -v $PWD/imdb_bow_model:/model -p 8000:8000 --name bow-server hub.baidubce.com/paddlepaddle/serving:latest docker exec -it bow-server bash pip install paddle-serving-server -i https://pypi.tuna.tsinghua.edu.cn/simple python -m paddle_serving_server.serve --model model --port 8000 >std.log 2>err.log & @@ -48,7 +48,7 @@ exit 同理启动LSTM Server,该服务启用`9000`端口: ```bash -docker run -dit -v $PWD/imdb_lstm_model:/model -p 9000:9000 --name lstm-server hub.baidubce.com/paddlepaddle/serving:0.2.0 +docker run -dit -v $PWD/imdb_lstm_model:/model -p 9000:9000 --name lstm-server hub.baidubce.com/paddlepaddle/serving:latest docker exec -it lstm-server bash pip install paddle-serving-server -i https://pypi.tuna.tsinghua.edu.cn/simple python -m paddle_serving_server.serve --model model --port 9000 >std.log 2>err.log & diff --git a/doc/COMPILE.md b/doc/COMPILE.md index f61ac061883581090087a2202e694c9a07468c5f..411620af2ee10a769384c36cebc3aa3ecb93ea49 100644 --- a/doc/COMPILE.md +++ b/doc/COMPILE.md @@ -13,8 +13,8 @@ It is recommended to use Docker for compilation. We have prepared the Paddle Serving compilation environment for you: -- CPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-devel`,dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel) -- GPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu-devel`,dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel) +- CPU: `hub.baidubce.com/paddlepaddle/serving:latest-devel`,dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel) +- GPU: `hub.baidubce.com/paddlepaddle/serving:latest-gpu-devel`,dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel) This document will take Python2 as an example to show how to compile Paddle Serving. If you want to compile with Python3, just adjust the Python options of cmake: diff --git a/doc/COMPILE_CN.md b/doc/COMPILE_CN.md index c6e5426f02335598277ceb40fafc5215c7f03b2b..44802260719d37a3140ca15f6a2ccc15479e32d6 100644 --- a/doc/COMPILE_CN.md +++ b/doc/COMPILE_CN.md @@ -13,8 +13,8 @@ 推荐使用Docker编译,我们已经为您准备好了Paddle Serving编译环境: -- CPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-devel`,dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel) -- GPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu-devel`,dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel) +- CPU: `hub.baidubce.com/paddlepaddle/serving:latest-devel`,dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel) +- GPU: `hub.baidubce.com/paddlepaddle/serving:latest-gpu-devel`,dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel) 本文档将以Python2为例介绍如何编译Paddle Serving。如果您想用Python3进行编译,只需要调整cmake的Python相关选项即可: diff --git a/doc/DESIGN_DOC_CN.md b/doc/DESIGN_DOC_CN.md index c068ac35bb6beebe70a6f873318c6d5059fc51e7..7b6e237f787c12a7201898ee9403a6467473ef8c 100644 --- a/doc/DESIGN_DOC_CN.md +++ b/doc/DESIGN_DOC_CN.md @@ -26,7 +26,7 @@ serving_io.save_model("serving_model", "client_conf", {"words": data}, {"prediction": prediction}, fluid.default_main_program()) ``` -代码示例中,`{"words": data}`和`{"prediction": prediction}`分别指定了模型的输入和输出,`"words"`和`"prediction"`是输出和输出变量的别名,设计别名的目的是为了使开发者能够记忆自己训练模型的输入输出对应的字段。`data`和`prediction`则是Paddle训练过程中的`[Variable](https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/fluid_cn/Variable_cn.html#variable)`,通常代表张量([Tensor](https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/fluid_cn/Tensor_cn.html#tensor))或变长张量([LodTensor](https://www.paddlepaddle.org.cn/documentation/docs/zh/beginners_guide/basic_concept/lod_tensor.html#lodtensor))。调用保存命令后,会按照用户指定的`"serving_model"`和`"client_conf"`生成两个目录,内容如下: +代码示例中,`{"words": data}`和`{"prediction": prediction}`分别指定了模型的输入和输出,`"words"`和`"prediction"`是输入和输出变量的别名,设计别名的目的是为了使开发者能够记忆自己训练模型的输入输出对应的字段。`data`和`prediction`则是Paddle训练过程中的`[Variable](https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/fluid_cn/Variable_cn.html#variable)`,通常代表张量([Tensor](https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/fluid_cn/Tensor_cn.html#tensor))或变长张量([LodTensor](https://www.paddlepaddle.org.cn/documentation/docs/zh/beginners_guide/basic_concept/lod_tensor.html#lodtensor))。调用保存命令后,会按照用户指定的`"serving_model"`和`"client_conf"`生成两个目录,内容如下: ``` shell . ├── client_conf diff --git a/doc/PERFORMANCE_OPTIM.md b/doc/PERFORMANCE_OPTIM.md index 4b025e94d6f8d3ed69fb76898eb6afada9ca6613..0de06c16988d14d8f92eced491db7dc423831afe 100644 --- a/doc/PERFORMANCE_OPTIM.md +++ b/doc/PERFORMANCE_OPTIM.md @@ -1,8 +1,10 @@ -# Performance optimization +# Performance Optimization + +([简体中文](./PERFORMANCE_OPTIM_CN.md)|English) Due to different model structures, different prediction services consume different computing resources when performing predictions. For online prediction services, models that require less computing resources will have a higher proportion of communication time cost, which is called communication-intensive service. Models that require more computing resources have a higher time cost for inference calculations, which is called computationa-intensive services. -For a prediction service, the easiest way to determine what type it is is to look at the time ratio. Paddle Serving provides [Timeline tool] (../python/examples/util/README_CN.md), which can intuitively display the time spent in each stage of the prediction service. +For a prediction service, the easiest way to determine what type it is is to look at the time ratio. Paddle Serving provides [Timeline tool](../python/examples/util/README_CN.md), which can intuitively display the time spent in each stage of the prediction service. For communication-intensive prediction services, requests can be aggregated, and within a limit that can tolerate delay, multiple prediction requests can be combined into a batch for prediction. diff --git a/doc/PERFORMANCE_OPTIM_CN.md b/doc/PERFORMANCE_OPTIM_CN.md index 7bd64d3e2d645c9328ead55e867d0b97946840ad..1a2c3840942930060a1805bcb999f01b5780cbae 100644 --- a/doc/PERFORMANCE_OPTIM_CN.md +++ b/doc/PERFORMANCE_OPTIM_CN.md @@ -1,5 +1,7 @@ # 性能优化 +(简体中文|[English](./PERFORMANCE_OPTIM.md)) + 由于模型结构的不同,在执行预测时不同的预测服务对计算资源的消耗也不相同。对于在线的预测服务来说,对计算资源要求较少的模型,通信的时间成本占比就会较高,称为通信密集型服务,对计算资源要求较多的模型,推理计算的时间成本较高,称为计算密集型服务。对于这两种服务类型,可以根据实际需求采取不同的方式进行优化 对于一个预测服务来说,想要判断属于哪种类型,最简单的方法就是看时间占比,Paddle Serving提供了[Timeline工具](../python/examples/util/README_CN.md),可以直观的展现预测服务中各阶段的耗时。 diff --git a/doc/RUN_IN_DOCKER.md b/doc/RUN_IN_DOCKER.md index 327176297518ff65d788e3e59b23db27f1e7178c..32a4aae1fb2bf866fe250de0b4ed055a707c8fd0 100644 --- a/doc/RUN_IN_DOCKER.md +++ b/doc/RUN_IN_DOCKER.md @@ -17,7 +17,7 @@ You can get images in two ways: 1. Pull image directly ```bash - docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0 + docker pull hub.baidubce.com/paddlepaddle/serving:latest ``` 2. Building image based on dockerfile @@ -25,13 +25,13 @@ You can get images in two ways: Create a new folder and copy [Dockerfile](../tools/Dockerfile) to this folder, and run the following command: ```bash - docker build -t hub.baidubce.com/paddlepaddle/serving:0.2.0 . + docker build -t hub.baidubce.com/paddlepaddle/serving:latest . ``` ### Create container ```bash -docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0 +docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest docker exec -it test bash ``` @@ -109,7 +109,7 @@ You can also get images in two ways: 1. Pull image directly ```bash - nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu + nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:latest-gpu ``` 2. Building image based on dockerfile @@ -117,13 +117,13 @@ You can also get images in two ways: Create a new folder and copy [Dockerfile.gpu](../tools/Dockerfile.gpu) to this folder, and run the following command: ```bash - nvidia-docker build -t hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu . + nvidia-docker build -t hub.baidubce.com/paddlepaddle/serving:latest-gpu . ``` ### Create container ```bash -nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu +nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest-gpu nvidia-docker exec -it test bash ``` diff --git a/doc/RUN_IN_DOCKER_CN.md b/doc/RUN_IN_DOCKER_CN.md index 4a995f9acf611c550e866ed12502734220a2e71c..b95344923605ade590b8bed509a2dd6f59640433 100644 --- a/doc/RUN_IN_DOCKER_CN.md +++ b/doc/RUN_IN_DOCKER_CN.md @@ -17,7 +17,7 @@ Docker(GPU版本需要在GPU机器上安装nvidia-docker) 1. 直接拉取镜像 ```bash - docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0 + docker pull hub.baidubce.com/paddlepaddle/serving:latest ``` 2. 基于Dockerfile构建镜像 @@ -25,13 +25,13 @@ Docker(GPU版本需要在GPU机器上安装nvidia-docker) 建立新目录,复制[Dockerfile](../tools/Dockerfile)内容到该目录下Dockerfile文件。执行 ```bash - docker build -t hub.baidubce.com/paddlepaddle/serving:0.2.0 . + docker build -t hub.baidubce.com/paddlepaddle/serving:latest . ``` ### 创建容器并进入 ```bash -docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0 +docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest docker exec -it test bash ``` @@ -107,7 +107,7 @@ GPU版本与CPU版本基本一致,只有部分接口命名的差别(GPU版 1. 直接拉取镜像 ```bash - nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu + nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:latest-gpu ``` 2. 基于Dockerfile构建镜像 @@ -115,13 +115,13 @@ GPU版本与CPU版本基本一致,只有部分接口命名的差别(GPU版 建立新目录,复制[Dockerfile.gpu](../tools/Dockerfile.gpu)内容到该目录下Dockerfile文件。执行 ```bash - nvidia-docker build -t hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu . + nvidia-docker build -t hub.baidubce.com/paddlepaddle/serving:latest-gpu . ``` ### 创建容器并进入 ```bash -nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu +nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest-gpu nvidia-docker exec -it test bash ``` diff --git a/python/examples/bert/README.md b/python/examples/bert/README.md index d598fc3b057c85d80e8d10549f7c5b0cf1e725fb..4cfa5590ffb4501c78e9e6ff886f5f82c94dd2db 100644 --- a/python/examples/bert/README.md +++ b/python/examples/bert/README.md @@ -71,28 +71,3 @@ set environmental variable to specify which gpus are used, the command above mea ``` curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction ``` - -### Benchmark - -Model:bert_chinese_L-12_H-768_A-12 - -GPU:GPU V100 * 1 - -CUDA/cudnn Version:CUDA 9.2,cudnn 7.1.4 - - -In the test, 10 thousand samples in the sample data are copied into 100 thousand samples. Each client thread sends a sample of the number of threads. The batch size is 1, the max_seq_len is 20(not 128 as described above), and the time unit is seconds. - -When the number of client threads is 4, the prediction speed can reach 432 samples per second. -Because a single GPU can only perform serial calculations internally, increasing the number of client threads can only reduce the idle time of the GPU. Therefore, after the number of threads reaches 4, the increase in the number of threads does not improve the prediction speed. - -| client thread num | prepro | client infer | op0 | op1 | op2 | postpro | total | -| ------------------ | ------ | ------------ | ----- | ------ | ---- | ------- | ------ | -| 1 | 3.05 | 290.54 | 0.37 | 239.15 | 6.43 | 0.71 | 365.63 | -| 4 | 0.85 | 213.66 | 0.091 | 200.39 | 1.62 | 0.2 | 231.45 | -| 8 | 0.42 | 223.12 | 0.043 | 110.99 | 0.8 | 0.098 | 232.05 | -| 12 | 0.32 | 225.26 | 0.029 | 73.87 | 0.53 | 0.078 | 231.45 | -| 16 | 0.23 | 227.26 | 0.022 | 55.61 | 0.4 | 0.056 | 231.9 | - -the following is the client thread num - latency bar chart: -![bert benchmark](../../../doc/bert-benchmark-batch-size-1.png) diff --git a/python/examples/bert/README_CN.md b/python/examples/bert/README_CN.md index 7f1d2911ba4a5017137e659fe1f1367e64026de4..93ec8f2adbd9ae31489011900472a0077cb33783 100644 --- a/python/examples/bert/README_CN.md +++ b/python/examples/bert/README_CN.md @@ -67,27 +67,3 @@ head data-c.txt | python bert_client.py --model bert_seq128_client/serving_clien ``` curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction ``` - -### Benchmark - -模型:bert_chinese_L-12_H-768_A-12 - -设备:GPU V100 * 1 - -环境:CUDA 9.2,cudnn 7.1.4 - -测试中将样例数据中的1W个样本复制为10W个样本,每个client线程发送线程数分之一个样本,batch size为1,max_seq_len为20(而不是上面的128),时间单位为秒. - -在client线程数为4时,预测速度可以达到432样本每秒。 -由于单张GPU内部只能串行计算,client线程增多只能减少GPU的空闲时间,因此在线程数达到4之后,线程数增多对预测速度没有提升。 - -| client thread num | prepro | client infer | op0 | op1 | op2 | postpro | total | -| ------------------ | ------ | ------------ | ----- | ------ | ---- | ------- | ------ | -| 1 | 3.05 | 290.54 | 0.37 | 239.15 | 6.43 | 0.71 | 365.63 | -| 4 | 0.85 | 213.66 | 0.091 | 200.39 | 1.62 | 0.2 | 231.45 | -| 8 | 0.42 | 223.12 | 0.043 | 110.99 | 0.8 | 0.098 | 232.05 | -| 12 | 0.32 | 225.26 | 0.029 | 73.87 | 0.53 | 0.078 | 231.45 | -| 16 | 0.23 | 227.26 | 0.022 | 55.61 | 0.4 | 0.056 | 231.9 | - -总耗时变化规律如下: -![bert benchmark](../../../doc/bert-benchmark-batch-size-1.png) diff --git a/python/examples/bert/benchmark.py b/python/examples/bert/benchmark.py index e14c02fe1231c1ab04bcf1fda67046ea6b3806bb..af75b718b78b2bc130c2411d05d190fc0d298006 100644 --- a/python/examples/bert/benchmark.py +++ b/python/examples/bert/benchmark.py @@ -26,7 +26,7 @@ from batching import pad_batch_data import tokenization import requests import json -from bert_reader import BertReader +from paddle_serving_app.reader import ChineseBertReader args = benchmark_args() @@ -37,7 +37,7 @@ def single_func(idx, resource): for line in fin: dataset.append(line.strip()) if args.request == "rpc": - reader = BertReader(vocab_file="vocab.txt", max_seq_len=20) + reader = ChineseBertReader(vocab_file="vocab.txt", max_seq_len=20) fetch = ["pooled_output"] client = Client() client.load_client_config(args.model) diff --git a/python/examples/bert/bert_client.py b/python/examples/bert/bert_client.py index b33a80d88fcc28200a61bc6125afcea0a0352dab..b72d17f142c65bafe8ef13e1a963aacce6b3e821 100644 --- a/python/examples/bert/bert_client.py +++ b/python/examples/bert/bert_client.py @@ -25,7 +25,7 @@ from paddlehub.common.logger import logger import socket from paddle_serving_client import Client from paddle_serving_client.utils import benchmark_args -from paddle_serving_app import ChineseBertReader +from paddle_serving_app.reader import ChineseBertReader args = benchmark_args() diff --git a/python/examples/bert/bert_web_service.py b/python/examples/bert/bert_web_service.py index 6a5830ea179b033f9f761010d8cf9213d9b1e40b..d72150878c51d4f95bbc5d2263ad00fb1ed2c387 100644 --- a/python/examples/bert/bert_web_service.py +++ b/python/examples/bert/bert_web_service.py @@ -14,14 +14,14 @@ # limitations under the License. # pylint: disable=doc-string-missing from paddle_serving_server_gpu.web_service import WebService -from bert_reader import BertReader +from paddle_serving_app.reader import ChineseBertReader import sys import os class BertService(WebService): def load(self): - self.reader = BertReader(vocab_file="vocab.txt", max_seq_len=128) + self.reader = ChineseBertReader(vocab_file="vocab.txt", max_seq_len=128) def preprocess(self, feed=[], fetch=[]): feed_res = [ @@ -37,5 +37,5 @@ gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"] bert_service.set_gpus(gpu_ids) bert_service.prepare_server( workdir="workdir", port=int(sys.argv[2]), device="gpu") -bert_service.run_server() -bert_service.run_flask() +bert_service.run_rpc_service() +bert_service.run_web_service() diff --git a/python/examples/faster_rcnn_model/new_test_client.py b/python/examples/faster_rcnn_model/test_client.py similarity index 86% rename from python/examples/faster_rcnn_model/new_test_client.py rename to python/examples/faster_rcnn_model/test_client.py index 0c6c615f8f3dff10626256de59101c401457509f..ce577a3c4396d33af33e45694a573f8b1cbcb52b 100755 --- a/python/examples/faster_rcnn_model/new_test_client.py +++ b/python/examples/faster_rcnn_model/test_client.py @@ -14,6 +14,8 @@ from paddle_serving_client import Client from paddle_serving_app.reader import * +import sys +import numpy as np preprocess = Sequential([ File2Image(), BGR2RGB(), Div(255.0), @@ -24,11 +26,10 @@ preprocess = Sequential([ postprocess = RCNNPostprocess("label_list.txt", "output") client = Client() -client.load_client_config( - "faster_rcnn_client_conf/serving_client_conf.prototxt") -client.connect(['127.0.0.1:9393']) +client.load_client_config(sys.argv[1]) +client.connect(['127.0.0.1:9494']) -im = preprocess(sys.argv[2]) +im = preprocess(sys.argv[3]) fetch_map = client.predict( feed={ "image": im, @@ -36,5 +37,5 @@ fetch_map = client.predict( "im_shape": np.array(list(im.shape[1:]) + [1.0]) }, fetch=["multiclass_nms"]) -fetch_map["image"] = sys.argv[1] +fetch_map["image"] = sys.argv[3] postprocess(fetch_map) diff --git a/python/examples/imagenet/benchmark.py b/python/examples/imagenet/benchmark.py index 6b21719e7b665906e7abd02a7a3b8aef50136685..caa952f121fbd8725c2a6bfe36f0dd84b6a82707 100644 --- a/python/examples/imagenet/benchmark.py +++ b/python/examples/imagenet/benchmark.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- +# # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -13,16 +15,26 @@ # limitations under the License. # pylint: disable=doc-string-missing +from __future__ import unicode_literals, absolute_import +import os import sys -from image_reader import ImageReader +import time +import requests +import json +import base64 from paddle_serving_client import Client from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import benchmark_args -import time -import os +from paddle_serving_app.reader import Sequential, URL2Image, Resize +from paddle_serving_app.reader import CenterCrop, RGB2BGR, Transpose, Div, Normalize args = benchmark_args() +seq_preprocess = Sequential([ + URL2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)), + Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True) +]) + def single_func(idx, resource): file_list = [] @@ -31,30 +43,61 @@ def single_func(idx, resource): img_list = [] for i in range(1000): img_list.append(open("./image_data/n01440764/" + file_list[i]).read()) + profile_flags = False + if "FLAGS_profile_client" in os.environ and os.environ[ + "FLAGS_profile_client"]: + profile_flags = True if args.request == "rpc": reader = ImageReader() fetch = ["score"] client = Client() client.load_client_config(args.model) client.connect([resource["endpoint"][idx % len(resource["endpoint"])]]) + start = time.time() + for i in range(1000): + if args.batch_size >= 1: + feed_batch = [] + i_start = time.time() + for bi in range(args.batch_size): + img = seq_preprocess(img_list[i]) + feed_batch.append({"image": img}) + i_end = time.time() + if profile_flags: + print("PROFILE\tpid:{}\timage_pre_0:{} image_pre_1:{}". + format(os.getpid(), + int(round(i_start * 1000000)), + int(round(i_end * 1000000)))) + + result = client.predict(feed=feed_batch, fetch=fetch) + else: + print("unsupport batch size {}".format(args.batch_size)) + elif args.request == "http": + py_version = 2 + server = "http://" + resource["endpoint"][idx % len(resource[ + "endpoint"])] + "/image/prediction" start = time.time() - for i in range(100): - img = reader.process_image(img_list[i]) - fetch_map = client.predict(feed={"image": img}, fetch=["score"]) - end = time.time() - return [[end - start]] + for i in range(1000): + if py_version == 2: + image = base64.b64encode( + open("./image_data/n01440764/" + file_list[i]).read()) + else: + image = base64.b64encode(open(image_path, "rb").read()).decode( + "utf-8") + req = json.dumps({"feed": [{"image": image}], "fetch": ["score"]}) + r = requests.post( + server, data=req, headers={"Content-Type": "application/json"}) + end = time.time() return [[end - start]] -if __name__ == "__main__": +if __name__ == '__main__': multi_thread_runner = MultiThreadRunner() - endpoint_list = ["127.0.0.1:9292"] - #card_num = 4 - #for i in range(args.thread): - # endpoint_list.append("127.0.0.1:{}".format(9295 + i % card_num)) + endpoint_list = ["127.0.0.1:9696"] + #endpoint_list = endpoint_list + endpoint_list + endpoint_list result = multi_thread_runner.run(single_func, args.thread, {"endpoint": endpoint_list}) + #result = single_func(0, {"endpoint": endpoint_list}) avg_cost = 0 for i in range(args.thread): avg_cost += result[0][i] diff --git a/python/examples/imagenet/benchmark.sh b/python/examples/imagenet/benchmark.sh index 16fadbbac6cd7e616d11135653cfbcfeebe6d4f2..618a62c063c0bc4955baf8516bc5bc93e4832394 100644 --- a/python/examples/imagenet/benchmark.sh +++ b/python/examples/imagenet/benchmark.sh @@ -1,9 +1,12 @@ rm profile_log -for thread_num in 1 2 4 8 16 +for thread_num in 1 2 4 8 do - $PYTHONROOT/bin/python benchmark.py --thread $thread_num --model ResNet101_vd_client_config/serving_client_conf.prototxt --request rpc > profile 2>&1 +for batch_size in 1 2 4 8 16 32 64 128 +do + $PYTHONROOT/bin/python benchmark.py --thread $thread_num --batch_size $batch_size --model ResNet50_vd_client_config/serving_client_conf.prototxt --request rpc > profile 2>&1 echo "========================================" echo "batch size : $batch_size" >> profile_log $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log tail -n 1 profile >> profile_log done +done diff --git a/python/examples/imagenet/benchmark_batch.py b/python/examples/imagenet/benchmark_batch.py deleted file mode 100644 index 1646fb9a94d6953f90f9f4907aa74940f13c2730..0000000000000000000000000000000000000000 --- a/python/examples/imagenet/benchmark_batch.py +++ /dev/null @@ -1,99 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# pylint: disable=doc-string-missing - -from __future__ import unicode_literals, absolute_import -import os -import sys -import time -from paddle_serving_client import Client -from paddle_serving_client.utils import MultiThreadRunner -from paddle_serving_client.utils import benchmark_args -import requests -import json -import base64 -from image_reader import ImageReader - -args = benchmark_args() - - -def single_func(idx, resource): - file_list = [] - for file_name in os.listdir("./image_data/n01440764"): - file_list.append(file_name) - img_list = [] - for i in range(1000): - img_list.append(open("./image_data/n01440764/" + file_list[i]).read()) - profile_flags = False - if "FLAGS_profile_client" in os.environ and os.environ[ - "FLAGS_profile_client"]: - profile_flags = True - if args.request == "rpc": - reader = ImageReader() - fetch = ["score"] - client = Client() - client.load_client_config(args.model) - client.connect([resource["endpoint"][idx % len(resource["endpoint"])]]) - start = time.time() - for i in range(1000): - if args.batch_size >= 1: - feed_batch = [] - i_start = time.time() - for bi in range(args.batch_size): - img = reader.process_image(img_list[i]) - feed_batch.append({"image": img}) - i_end = time.time() - if profile_flags: - print("PROFILE\tpid:{}\timage_pre_0:{} image_pre_1:{}". - format(os.getpid(), - int(round(i_start * 1000000)), - int(round(i_end * 1000000)))) - - result = client.predict(feed=feed_batch, fetch=fetch) - else: - print("unsupport batch size {}".format(args.batch_size)) - - elif args.request == "http": - py_version = 2 - server = "http://" + resource["endpoint"][idx % len(resource[ - "endpoint"])] + "/image/prediction" - start = time.time() - for i in range(1000): - if py_version == 2: - image = base64.b64encode( - open("./image_data/n01440764/" + file_list[i]).read()) - else: - image = base64.b64encode(open(image_path, "rb").read()).decode( - "utf-8") - req = json.dumps({"feed": [{"image": image}], "fetch": ["score"]}) - r = requests.post( - server, data=req, headers={"Content-Type": "application/json"}) - end = time.time() - return [[end - start]] - - -if __name__ == '__main__': - multi_thread_runner = MultiThreadRunner() - endpoint_list = ["127.0.0.1:9292"] - #endpoint_list = endpoint_list + endpoint_list + endpoint_list - result = multi_thread_runner.run(single_func, args.thread, - {"endpoint": endpoint_list}) - #result = single_func(0, {"endpoint": endpoint_list}) - avg_cost = 0 - for i in range(args.thread): - avg_cost += result[0][i] - avg_cost = avg_cost / args.thread - print("average total cost {} s.".format(avg_cost)) diff --git a/python/examples/imagenet/benchmark_batch.py.lprof b/python/examples/imagenet/benchmark_batch.py.lprof new file mode 100644 index 0000000000000000000000000000000000000000..7ff4f1411ded79aba3390e606193ec4fedacf06f Binary files /dev/null and b/python/examples/imagenet/benchmark_batch.py.lprof differ diff --git a/python/examples/imagenet/benchmark_batch.sh b/python/examples/imagenet/benchmark_batch.sh deleted file mode 100644 index 4118ffcc755e6d47c69924efbb1b7d5474db8b00..0000000000000000000000000000000000000000 --- a/python/examples/imagenet/benchmark_batch.sh +++ /dev/null @@ -1,12 +0,0 @@ -rm profile_log -for thread_num in 1 2 4 8 16 -do -for batch_size in 1 2 4 8 16 32 64 128 256 512 -do - $PYTHONROOT/bin/python benchmark_batch.py --thread $thread_num --batch_size $batch_size --model ResNet101_vd_client_config/serving_client_conf.prototxt --request rpc > profile 2>&1 - echo "========================================" - echo "batch size : $batch_size" >> profile_log - $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log - tail -n 1 profile >> profile_log -done -done diff --git a/python/examples/imagenet/resnet50_web_service.py b/python/examples/imagenet/resnet50_web_service.py index ba40b41bbd9b773910ba0265b3604edd650570ff..3966d31c951d83d8f984e5a265504035ed273125 100644 --- a/python/examples/imagenet/resnet50_web_service.py +++ b/python/examples/imagenet/resnet50_web_service.py @@ -68,5 +68,5 @@ if device == "gpu": image_service.set_gpus("0,1") image_service.prepare_server( workdir="workdir", port=int(sys.argv[3]), device=device) -image_service.run_server() -image_service.run_flask() +image_service.run_rpc_service() +image_service.run_web_service() diff --git a/python/examples/imdb/README.md b/python/examples/imdb/README.md index 5f4d204d368a98cb47d4dac2ff3d481e519adb9d..e2b9a74c98e8993f19b14888f3e21343f526b81d 100644 --- a/python/examples/imdb/README.md +++ b/python/examples/imdb/README.md @@ -30,27 +30,3 @@ python text_classify_service.py imdb_cnn_model/ workdir/ 9292 imdb.vocab ``` curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction ``` - -### Benchmark - -CPU :Intel(R) Xeon(R) Gold 6271 CPU @ 2.60GHz * 48 - -Model :[CNN](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/imdb/nets.py) - -server thread num : 16 - -In this test, client sends 25000 test samples totally, the bar chart given later is the latency of single thread, the unit is second, from which we know the predict efficiency is improved greatly by multi-thread compared to single-thread. 8.7 times improvement is made by 16 threads prediction. - -| client thread num | prepro | client infer | op0 | op1 | op2 | postpro | total | -| ------------------ | ------ | ------------ | ------ | ----- | ------ | ------- | ----- | -| 1 | 1.09 | 28.79 | 0.094 | 20.59 | 0.047 | 0.034 | 31.41 | -| 4 | 0.22 | 7.41 | 0.023 | 5.01 | 0.011 | 0.0098 | 8.01 | -| 8 | 0.11 | 4.7 | 0.012 | 2.61 | 0.0062 | 0.0049 | 5.01 | -| 12 | 0.081 | 4.69 | 0.0078 | 1.72 | 0.0042 | 0.0035 | 4.91 | -| 16 | 0.058 | 3.46 | 0.0061 | 1.32 | 0.0033 | 0.003 | 3.63 | -| 20 | 0.049 | 3.77 | 0.0047 | 1.03 | 0.0025 | 0.0022 | 3.91 | -| 24 | 0.041 | 3.86 | 0.0039 | 0.85 | 0.002 | 0.0017 | 3.98 | - -The thread-latency bar chart is as follow: - -![total cost](../../../doc/imdb-benchmark-server-16.png) diff --git a/python/examples/imdb/README_CN.md b/python/examples/imdb/README_CN.md index 2b79938bbf0625786033d13ec2960ad2bc73acda..a669e29e94f6c6cce238473a8fc33405e29e8471 100644 --- a/python/examples/imdb/README_CN.md +++ b/python/examples/imdb/README_CN.md @@ -29,27 +29,3 @@ python text_classify_service.py imdb_cnn_model/ workdir/ 9292 imdb.vocab ``` curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction ``` - -### Benchmark - -设备 :Intel(R) Xeon(R) Gold 6271 CPU @ 2.60GHz * 48 - -模型 :[CNN](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/imdb/nets.py) - -server thread num : 16 - -测试中,client共发送25000条测试样本,图中数据为单个线程的耗时,时间单位为秒。可以看出,client端多线程的预测速度相比单线程有明显提升,在16线程时预测速度是单线程的8.7倍。 - -| client thread num | prepro | client infer | op0 | op1 | op2 | postpro | total | -| ------------------ | ------ | ------------ | ------ | ----- | ------ | ------- | ----- | -| 1 | 1.09 | 28.79 | 0.094 | 20.59 | 0.047 | 0.034 | 31.41 | -| 4 | 0.22 | 7.41 | 0.023 | 5.01 | 0.011 | 0.0098 | 8.01 | -| 8 | 0.11 | 4.7 | 0.012 | 2.61 | 0.0062 | 0.0049 | 5.01 | -| 12 | 0.081 | 4.69 | 0.0078 | 1.72 | 0.0042 | 0.0035 | 4.91 | -| 16 | 0.058 | 3.46 | 0.0061 | 1.32 | 0.0033 | 0.003 | 3.63 | -| 20 | 0.049 | 3.77 | 0.0047 | 1.03 | 0.0025 | 0.0022 | 3.91 | -| 24 | 0.041 | 3.86 | 0.0039 | 0.85 | 0.002 | 0.0017 | 3.98 | - -预测总耗时变化规律如下: - -![total cost](../../../doc/imdb-benchmark-server-16.png) diff --git a/python/examples/imdb/benchmark.py b/python/examples/imdb/benchmark.py index b8d7a70f30c5cf2d0ee985a8c30fada8fa9481b3..632d336ebf20363e257e6e60f08d773cea659a74 100644 --- a/python/examples/imdb/benchmark.py +++ b/python/examples/imdb/benchmark.py @@ -16,7 +16,7 @@ import sys import time import requests -from paddle_serving_app import IMDBDataset +from paddle_serving_app.reader import IMDBDataset from paddle_serving_client import Client from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import benchmark_args diff --git a/python/examples/imdb/test_client.py b/python/examples/imdb/test_client.py index 74364e5854d223e380cb386f9a8bc68b8517305a..cbdc6fe56e0f1078ad32c0d15f4e30a1a59f581b 100644 --- a/python/examples/imdb/test_client.py +++ b/python/examples/imdb/test_client.py @@ -13,7 +13,7 @@ # limitations under the License. # pylint: disable=doc-string-missing from paddle_serving_client import Client -from paddle_serving_app import IMDBDataset +from paddle_serving_app.reader import IMDBDataset import sys client = Client() diff --git a/python/examples/imdb/text_classify_service.py b/python/examples/imdb/text_classify_service.py index ae54b99030ee777ad127242d26c13cdbc05645e9..fe6ab0319deb0de5875781cf0890aa39a45c2415 100755 --- a/python/examples/imdb/text_classify_service.py +++ b/python/examples/imdb/text_classify_service.py @@ -14,7 +14,7 @@ # pylint: disable=doc-string-missing from paddle_serving_server.web_service import WebService -from paddle_serving_app import IMDBDataset +from paddle_serving_app.reader import IMDBDataset import sys @@ -37,5 +37,5 @@ imdb_service.load_model_config(sys.argv[1]) imdb_service.prepare_server( workdir=sys.argv[2], port=int(sys.argv[3]), device="cpu") imdb_service.prepare_dict({"dict_file_path": sys.argv[4]}) -imdb_service.run_server() -imdb_service.run_flask() +imdb_service.run_rpc_service() +imdb_service.run_web_service() diff --git a/python/examples/lac/lac_web_service.py b/python/examples/lac/lac_web_service.py index c9bd00986c62abde3ee24ddddbf08dda45bbed05..62a7148b230029bc781fa550597df25471a7fc8d 100644 --- a/python/examples/lac/lac_web_service.py +++ b/python/examples/lac/lac_web_service.py @@ -47,5 +47,5 @@ lac_service.load_model_config(sys.argv[1]) lac_service.load_reader() lac_service.prepare_server( workdir=sys.argv[2], port=int(sys.argv[3]), device="cpu") -lac_service.run_server() -lac_service.run_flask() +lac_service.run_rpc_service() +lac_service.run_web_service() diff --git a/python/examples/resnet_v2_50/resnet50_debug.py b/python/examples/resnet_v2_50/resnet50_debug.py index 62cb1812c5718ae1f9e10e9e9a57d7c1ae6736b7..768893c20bc3f6bfcb6e21f446d053391825c5fa 100644 --- a/python/examples/resnet_v2_50/resnet50_debug.py +++ b/python/examples/resnet_v2_50/resnet50_debug.py @@ -14,7 +14,7 @@ from paddle_serving_app.reader import Sequential, File2Image, Resize, CenterCrop from paddle_serving_app.reader import RGB2BGR, Transpose, Div, Normalize -from paddle_serving_app import Debugger +from paddle_serving_app.local_predict import Debugger import sys debugger = Debugger() diff --git a/python/examples/senta/get_data.sh b/python/examples/senta/get_data.sh index f1fb3844a703503177906a029bd42810e5fa3f33..fcd060f42aa2386e841f122c851394fc472d7f5b 100644 --- a/python/examples/senta/get_data.sh +++ b/python/examples/senta/get_data.sh @@ -1,6 +1,6 @@ wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SentimentAnalysis/senta_bilstm.tar.gz --no-check-certificate tar -xzvf senta_bilstm.tar.gz -wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/LexicalAnalysis/lac_model.tar.gz --no-check-certificate +wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/LexicalAnalysis/lac.tar.gz --no-check-certificate tar -xzvf lac_model.tar.gz wget https://paddle-serving.bj.bcebos.com/reader/lac/lac_dict.tar.gz --no-check-certificate tar -xzvf lac_dict.tar.gz diff --git a/python/examples/senta/senta_web_service.py b/python/examples/senta/senta_web_service.py index 5d20020c46d3b5ed23914cb9813ac889e232a2b3..0621ece74173596a1820f1b09258ecf5bb727f29 100644 --- a/python/examples/senta/senta_web_service.py +++ b/python/examples/senta/senta_web_service.py @@ -14,13 +14,10 @@ from paddle_serving_server_gpu.web_service import WebService from paddle_serving_client import Client -from paddle_serving_app import LACReader, SentaReader -import numpy as np +from paddle_serving_app.reader import LACReader, SentaReader import os -import io import sys -import subprocess -from multiprocessing import Process, Queue +from multiprocessing import Process class SentaService(WebService): @@ -33,10 +30,6 @@ class SentaService(WebService): self.lac_client_config_path = lac_model_path + "/serving_server_conf.prototxt" self.lac_dict_path = lac_dict_path self.senta_dict_path = senta_dict_path - self.show = False - - def show_detail(self, show=False): - self.show = show def start_lac_service(self): if not os.path.exists('./lac_serving'): @@ -64,34 +57,29 @@ class SentaService(WebService): self.lac_client.connect(["127.0.0.1:{}".format(self.lac_port)]) def init_lac_reader(self): - self.lac_reader = LACReader(self.lac_dict_path) + self.lac_reader = LACReader() def init_senta_reader(self): - self.senta_reader = SentaReader(vocab_path=self.senta_dict_path) + self.senta_reader = SentaReader() def preprocess(self, feed=[], fetch=[]): - feed_data = self.lac_reader.process(feed[0]["words"]) - if self.show: - print("---- lac reader ----") - print(feed_data) - lac_result = self.lac_predict(feed_data) - if self.show: - print("---- lac out ----") - print(lac_result) - segs = self.lac_reader.parse_result(feed[0]["words"], - lac_result["crf_decode"]) - if self.show: - print("---- lac parse ----") - print(segs) - feed_data = self.senta_reader.process(segs) - if self.show: - print("---- senta reader ----") - print("feed_data", feed_data) - return [{"words": feed_data}], fetch + feed_data = [{ + "words": self.lac_reader.process(x["words"]) + } for x in feed] + lac_result = self.lac_client.predict( + feed=feed_data, fetch=["crf_decode"]) + feed_batch = [] + result_lod = lac_result["crf_decode.lod"] + for i in range(len(feed)): + segs = self.lac_reader.parse_result( + feed[i]["words"], + lac_result["crf_decode"][result_lod[i]:result_lod[i + 1]]) + feed_data = self.senta_reader.process(segs) + feed_batch.append({"words": feed_data}) + return feed_batch, fetch senta_service = SentaService(name="senta") -#senta_service.show_detail(True) senta_service.set_config( lac_model_path="./lac_model", lac_dict_path="./lac_dict", @@ -102,5 +90,5 @@ senta_service.prepare_server( senta_service.init_lac_reader() senta_service.init_senta_reader() senta_service.init_lac_service() -senta_service.run_server() -senta_service.run_flask() +senta_service.run_rpc_service() +senta_service.run_web_service() diff --git a/python/paddle_serving_app/README.md b/python/paddle_serving_app/README.md index 1756b83993e67dcbc66b6809631c5e953eef08d7..a0fd35b7f02ce165f878238a757613c62d2fea26 100644 --- a/python/paddle_serving_app/README.md +++ b/python/paddle_serving_app/README.md @@ -158,7 +158,7 @@ Therefore, a local prediction tool is built into the paddle_serving_app, which i Taking [fit_a_line prediction service](../examples/fit_a_line) as an example, the following code can be used to run local prediction. ```python -from paddle_serving_app import Debugger +from paddle_serving_app.local_predict import Debugger import numpy as np debugger = Debugger() diff --git a/python/paddle_serving_app/README_CN.md b/python/paddle_serving_app/README_CN.md index 75dcf9ae78bec0c00b7662f7427d3816feaeca3d..2624c238e2dc212f1d10a251ee742891cae6a08c 100644 --- a/python/paddle_serving_app/README_CN.md +++ b/python/paddle_serving_app/README_CN.md @@ -147,7 +147,7 @@ Paddle Serving框架的server预测op使用了Paddle 的预测框架,在部署 以[fit_a_line预测服务](../examples/fit_a_line)为例,使用以下代码即可执行本地预测。 ```python -from paddle_serving_app import Debugger +from paddle_serving_app.local_predict import Debugger import numpy as np debugger = Debugger() diff --git a/python/paddle_serving_app/__init__.py b/python/paddle_serving_app/__init__.py index 2a6225570c3de61ba6e0a0587f81175816cd0f8d..11ad09a1d880a8b235e5cf1b99f6be91ec9cccbf 100644 --- a/python/paddle_serving_app/__init__.py +++ b/python/paddle_serving_app/__init__.py @@ -11,10 +11,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .reader.chinese_bert_reader import ChineseBertReader -from .reader.image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, CenterCrop, Resize, PadStride -from .reader.lac_reader import LACReader -from .reader.senta_reader import SentaReader -from .reader.imdb_reader import IMDBDataset from .models import ServingModels -from .local_predict import Debugger diff --git a/python/paddle_serving_app/models/model_list.py b/python/paddle_serving_app/models/model_list.py index b22bbe8934816e9ced881d352b9e2a54ed3c9234..3d08f2fea95cc07e0cb1b57b005f72b95c6a4bcd 100644 --- a/python/paddle_serving_app/models/model_list.py +++ b/python/paddle_serving_app/models/model_list.py @@ -37,7 +37,7 @@ class ServingModels(object): object_detection_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/ObjectDetection/" senta_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SentimentAnalysis/" semantic_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticRepresentation/" - wordseg_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/ChineseWordSegmentation/" + wordseg_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/LexicalAnalysis/" self.url_dict = {} diff --git a/python/paddle_serving_app/reader/__init__.py b/python/paddle_serving_app/reader/__init__.py index 9b556a119d47ec693a667cf7c5ab10c0e56ace53..0eee878284e2028657a660acd38a21934bb5ccd7 100644 --- a/python/paddle_serving_app/reader/__init__.py +++ b/python/paddle_serving_app/reader/__init__.py @@ -11,4 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from .chinese_bert_reader import ChineseBertReader from .image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, CenterCrop, Resize, Transpose, Div, RGB2BGR, BGR2RGB, RCNNPostprocess, SegPostprocess, PadStride +from .lac_reader import LACReader +from .senta_reader import SentaReader +from .imdb_reader import IMDBDataset diff --git a/python/paddle_serving_app/reader/lac_reader.py b/python/paddle_serving_app/reader/lac_reader.py index 720bbf9c61051dcdc877f0a1f4933718be32263d..7e804ff371e2d90d79f7f663e83a854b1b0c9647 100644 --- a/python/paddle_serving_app/reader/lac_reader.py +++ b/python/paddle_serving_app/reader/lac_reader.py @@ -48,10 +48,16 @@ def load_kv_dict(dict_path, class LACReader(object): """data reader""" - def __init__(self, dict_folder): + def __init__(self, dict_folder=""): # read dict #basepath = os.path.abspath(__file__) #folder = os.path.dirname(basepath) + if dict_folder == "": + dict_folder = "lac_dict" + if not os.path.exists(dict_folder): + r = os.system( + "wget https://paddle-serving.bj.bcebos.com/reader/lac/lac_dict.tar.gz --no-check-certificate && tar -xzvf lac_dict.tar.gz" + ) word_dict_path = os.path.join(dict_folder, "word.dic") label_dict_path = os.path.join(dict_folder, "tag.dic") replace_dict_path = os.path.join(dict_folder, "q2b.dic") diff --git a/python/paddle_serving_app/reader/senta_reader.py b/python/paddle_serving_app/reader/senta_reader.py index 6e608b822fbb66f11288ea0080c8e264d8e5c34a..e0c93c00d1a6acb0c3d30294d40fb63b4929a639 100644 --- a/python/paddle_serving_app/reader/senta_reader.py +++ b/python/paddle_serving_app/reader/senta_reader.py @@ -14,10 +14,11 @@ import sys import io +import os class SentaReader(): - def __init__(self, vocab_path, max_seq_len=20): + def __init__(self, vocab_path="", max_seq_len=20): self.max_seq_len = max_seq_len self.word_dict = self.load_vocab(vocab_path) @@ -25,6 +26,13 @@ class SentaReader(): """ load the given vocabulary """ + if vocab_path == "": + vocab_path = "senta_vocab.txt" + if not os.path.exists(vocab_path): + r = os.system( + " wget https://paddle-serving.bj.bcebos.com/reader/senta/senta_vocab.txt --no-check-certificate" + ) + vocab = {} with io.open(vocab_path, 'r', encoding='utf8') as f: for line in f: diff --git a/python/paddle_serving_app/version.py b/python/paddle_serving_app/version.py index 766bf4e397e46153193b1e3cac6fed5323241c45..c91808f95e7a5b62729eb630a3203ad42f7a5889 100644 --- a/python/paddle_serving_app/version.py +++ b/python/paddle_serving_app/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. """ Paddle Serving App version string """ -serving_app_version = "0.0.3" +serving_app_version = "0.1.0" diff --git a/python/paddle_serving_client/__init__.py b/python/paddle_serving_client/__init__.py index 8c189d415b5718788da2ff0e6757ba3af259e750..e3302c14239c8bfc37a6bafb39b112cfed5230fd 100644 --- a/python/paddle_serving_client/__init__.py +++ b/python/paddle_serving_client/__init__.py @@ -207,8 +207,9 @@ class Client(object): key)) if type(feed[key]).__module__ == np.__name__ and np.size(feed[ key]) != self.feed_tensor_len[key]: - raise SystemExit("The shape of feed tensor {} not match.".format( - key)) + #raise SystemExit("The shape of feed tensor {} not match.".format( + # key)) + pass def predict(self, feed=None, fetch=None, need_variant_tag=False): self.profile_.record('py_prepro_0') diff --git a/python/paddle_serving_client/io/__init__.py b/python/paddle_serving_client/io/__init__.py index 93ae37056320c2c7d779c5bbfc4d004a1be4f639..20d29e2bdfe0d2753d2f23cda028d76a3b13c699 100644 --- a/python/paddle_serving_client/io/__init__.py +++ b/python/paddle_serving_client/io/__init__.py @@ -33,7 +33,11 @@ def save_model(server_model_folder, executor = Executor(place=CPUPlace()) feed_var_names = [feed_var_dict[x].name for x in feed_var_dict] - target_vars = list(fetch_var_dict.values()) + target_vars = [] + target_var_names = [] + for key in sorted(fetch_var_dict.keys()): + target_vars.append(fetch_var_dict[key]) + target_var_names.append(key) save_inference_model( server_model_folder, @@ -64,7 +68,7 @@ def save_model(server_model_folder, feed_var.shape.extend(tmp_shape) config.feed_var.extend([feed_var]) - for key in fetch_var_dict: + for key in target_var_names: fetch_var = model_conf.FetchVar() fetch_var.alias_name = key fetch_var.name = fetch_var_dict[key].name diff --git a/python/paddle_serving_client/version.py b/python/paddle_serving_client/version.py index 4870767dfcb95f9502dfa5880a85b1c11c62964f..5a1f35c598f044e80cff12ce661ff80a61647543 100644 --- a/python/paddle_serving_client/version.py +++ b/python/paddle_serving_client/version.py @@ -12,6 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. """ Paddle Serving Client version string """ -serving_client_version = "0.2.2" -serving_server_version = "0.2.2" -module_proto_version = "0.2.2" +serving_client_version = "0.3.0" +serving_server_version = "0.3.0" +module_proto_version = "0.3.0" diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py index 70aafbf5c3da4d1a2a8ec50ce5a2258383863057..894b0c5b132845cbde589982e1fb471f028e820b 100644 --- a/python/paddle_serving_server/serve.py +++ b/python/paddle_serving_server/serve.py @@ -103,7 +103,7 @@ if __name__ == "__main__": service.load_model_config(args.model) service.prepare_server( workdir=args.workdir, port=args.port, device=args.device) - service.run_server() + service.run_rpc_service() app_instance = Flask(__name__) diff --git a/python/paddle_serving_server/version.py b/python/paddle_serving_server/version.py index 4870767dfcb95f9502dfa5880a85b1c11c62964f..5a1f35c598f044e80cff12ce661ff80a61647543 100644 --- a/python/paddle_serving_server/version.py +++ b/python/paddle_serving_server/version.py @@ -12,6 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. """ Paddle Serving Client version string """ -serving_client_version = "0.2.2" -serving_server_version = "0.2.2" -module_proto_version = "0.2.2" +serving_client_version = "0.3.0" +serving_server_version = "0.3.0" +module_proto_version = "0.3.0" diff --git a/python/paddle_serving_server/web_service.py b/python/paddle_serving_server/web_service.py index f8c43707660e08e1bc44fdd62e40e20523f6cb6d..7f37b10be05e84e29cf6cda3cd3cc3d939910027 100755 --- a/python/paddle_serving_server/web_service.py +++ b/python/paddle_serving_server/web_service.py @@ -92,7 +92,7 @@ class WebService(object): result = {"result": "Request Value Error"} return result - def run_server(self): + def run_rpc_service(self): import socket localIP = socket.gethostbyname(socket.gethostname()) print("web service address:") @@ -115,7 +115,7 @@ class WebService(object): self.app_instance = app_instance - def run_flask(self): + def run_web_service(self): self.app_instance.run(host="0.0.0.0", port=self.port, threaded=False, diff --git a/python/paddle_serving_server_gpu/serve.py b/python/paddle_serving_server_gpu/serve.py index 297ff25d2084bead186fa4b9037e5de8282df0fe..309896a876bda5fc9b1baceb089242baa6d77dc5 100644 --- a/python/paddle_serving_server_gpu/serve.py +++ b/python/paddle_serving_server_gpu/serve.py @@ -118,7 +118,7 @@ if __name__ == "__main__": web_service.set_gpus(gpu_ids) web_service.prepare_server( workdir=args.workdir, port=args.port, device=args.device) - web_service.run_server() + web_service.run_rpc_service() app_instance = Flask(__name__) diff --git a/python/paddle_serving_server_gpu/version.py b/python/paddle_serving_server_gpu/version.py index 4870767dfcb95f9502dfa5880a85b1c11c62964f..5a1f35c598f044e80cff12ce661ff80a61647543 100644 --- a/python/paddle_serving_server_gpu/version.py +++ b/python/paddle_serving_server_gpu/version.py @@ -12,6 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. """ Paddle Serving Client version string """ -serving_client_version = "0.2.2" -serving_server_version = "0.2.2" -module_proto_version = "0.2.2" +serving_client_version = "0.3.0" +serving_server_version = "0.3.0" +module_proto_version = "0.3.0" diff --git a/python/paddle_serving_server_gpu/web_service.py b/python/paddle_serving_server_gpu/web_service.py index e64e73197d02a80e43bbc77a7589ab43efe2f244..2328453268f6cefa9c5bddb818677cc3962ea7ea 100644 --- a/python/paddle_serving_server_gpu/web_service.py +++ b/python/paddle_serving_server_gpu/web_service.py @@ -133,12 +133,11 @@ class WebService(object): result = self.postprocess( feed=feed, fetch=fetch, fetch_map=fetch_map) result = {"result": result} - result = {"result": fetch_map} except ValueError: result = {"result": "Request Value Error"} return result - def run_server(self): + def run_rpc_service(self): import socket localIP = socket.gethostbyname(socket.gethostname()) print("web service address:") @@ -165,7 +164,7 @@ class WebService(object): self.app_instance = app_instance - def run_flask(self): + def run_web_service(self): self.app_instance.run(host="0.0.0.0", port=self.port, threaded=False,