提交 e1c47de4 编写于 作者: Z zhangjun

rename paddle_serving_server_gpu

上级 98ab4b0d
...@@ -48,7 +48,7 @@ python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 #c ...@@ -48,7 +48,7 @@ python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 #c
``` ```
Or,start gpu inference service,Run Or,start gpu inference service,Run
``` ```
python -m paddle_serving_server_gpu.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #launch gpu inference service at GPU 0 python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #launch gpu inference service at GPU 0
``` ```
### RPC Inference ### RPC Inference
......
...@@ -45,7 +45,7 @@ python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 # ...@@ -45,7 +45,7 @@ python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 #
``` ```
或者,启动gpu预测服务,执行 或者,启动gpu预测服务,执行
``` ```
python -m paddle_serving_server_gpu.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #在gpu 0上启动gpu预测服务 python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #在gpu 0上启动gpu预测服务
``` ```
......
...@@ -12,7 +12,7 @@ else ...@@ -12,7 +12,7 @@ else
mkdir utilization mkdir utilization
fi fi
#start server #start server
$PYTHONROOT/bin/python3 -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim > elog 2>&1 & $PYTHONROOT/bin/python3 -m paddle_serving_server.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim > elog 2>&1 &
sleep 5 sleep 5
#warm up #warm up
......
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle_serving_server_gpu.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog & python -m paddle_serving_server.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog &
export FLAGS_profile_client=1 export FLAGS_profile_client=1
export FLAGS_profile_server=1 export FLAGS_profile_server=1
sleep 5 sleep 5
......
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
import os import os
import sys import sys
from paddle_serving_server_gpu import OpMaker from paddle_serving_server import OpMaker
from paddle_serving_server_gpu import OpSeqMaker from paddle_serving_server import OpSeqMaker
from paddle_serving_server_gpu import Server from paddle_serving_server import Server
op_maker = OpMaker() op_maker = OpMaker()
read_op = op_maker.create('general_reader') read_op = op_maker.create('general_reader')
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# pylint: disable=doc-string-missing # pylint: disable=doc-string-missing
from paddle_serving_server_gpu.web_service import WebService from paddle_serving_server.web_service import WebService
from paddle_serving_app.reader import ChineseBertReader from paddle_serving_app.reader import ChineseBertReader
import sys import sys
import os import os
......
...@@ -10,7 +10,7 @@ If you want to have more detection models, please refer to [Paddle Detection Mod ...@@ -10,7 +10,7 @@ If you want to have more detection models, please refer to [Paddle Detection Mod
### Start the service ### Start the service
``` ```
python -m paddle_serving_server_gpu.serve --model serving_server --port 9292 --gpu_id 0 python -m paddle_serving_server.serve --model serving_server --port 9292 --gpu_id 0
``` ```
### Perform prediction ### Perform prediction
......
...@@ -10,7 +10,7 @@ sh get_data.sh ...@@ -10,7 +10,7 @@ sh get_data.sh
### 启动服务 ### 启动服务
``` ```
python -m paddle_serving_server_gpu.serve --model serving_server --port 9292 --gpu_id 0 python -m paddle_serving_server.serve --model serving_server --port 9292 --gpu_id 0
``` ```
### 执行预测 ### 执行预测
......
...@@ -20,7 +20,7 @@ the directories like `ctr_serving_model` and `ctr_client_conf` will appear. ...@@ -20,7 +20,7 @@ the directories like `ctr_serving_model` and `ctr_client_conf` will appear.
``` ```
python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 #CPU RPC Service python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 #CPU RPC Service
python -m paddle_serving_server_gpu.serve --model ctr_serving_model/ --port 9292 --gpu_ids 0 #RPC Service on GPU 0 python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 --gpu_ids 0 #RPC Service on GPU 0
``` ```
### RPC Infer ### RPC Infer
......
...@@ -20,7 +20,7 @@ mv models/ctr_serving_model . ...@@ -20,7 +20,7 @@ mv models/ctr_serving_model .
``` ```
python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 #启动CPU预测服务 python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 #启动CPU预测服务
python -m paddle_serving_server_gpu.serve --model ctr_serving_model/ --port 9292 --gpu_ids 0 #在GPU 0上启动预测服务 python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 --gpu_ids 0 #在GPU 0上启动预测服务
``` ```
### 执行预测 ### 执行预测
......
...@@ -12,7 +12,7 @@ tar -xzvf deeplabv3.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf deeplabv3.tar.gz
### Start Service ### Start Service
``` ```
python -m paddle_serving_server_gpu.serve --model deeplabv3_server --gpu_ids 0 --port 9494 python -m paddle_serving_server.serve --model deeplabv3_server --gpu_ids 0 --port 9494
``` ```
### Client Prediction ### Client Prediction
......
...@@ -12,7 +12,7 @@ tar -xzvf deeplabv3.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf deeplabv3.tar.gz
### 启动服务端 ### 启动服务端
``` ```
python -m paddle_serving_server_gpu.serve --model deeplabv3_server --gpu_ids 0 --port 9494 python -m paddle_serving_server.serve --model deeplabv3_server --gpu_ids 0 --port 9494
``` ```
### 客户端预测 ### 客户端预测
......
...@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### Start the service ### Start the service
``` ```
tar xf faster_rcnn_r50_fpn_1x_coco.tar tar xf faster_rcnn_r50_fpn_1x_coco.tar
python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0 python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
This model support TensorRT, if you want a faster inference, please use `--use_trt`. This model support TensorRT, if you want a faster inference, please use `--use_trt`.
......
...@@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### 启动服务 ### 启动服务
``` ```
tar xf faster_rcnn_r50_fpn_1x_coco.tar tar xf faster_rcnn_r50_fpn_1x_coco.tar
python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0 python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。 该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。
......
...@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### Start the service ### Start the service
``` ```
tar xf ppyolo_r50vd_dcn_1x_coco.tar tar xf ppyolo_r50vd_dcn_1x_coco.tar
python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0 python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
This model support TensorRT, if you want a faster inference, please use `--use_trt`. This model support TensorRT, if you want a faster inference, please use `--use_trt`.
......
...@@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### 启动服务 ### 启动服务
``` ```
tar xf ppyolo_r50vd_dcn_1x_coco.tar tar xf ppyolo_r50vd_dcn_1x_coco.tar
python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0 python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。 该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。
......
...@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### Start the service ### Start the service
``` ```
tar xf ttfnet_darknet53_1x_coco.tar tar xf ttfnet_darknet53_1x_coco.tar
python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0 python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
This model support TensorRT, if you want a faster inference, please use `--use_trt`. This model support TensorRT, if you want a faster inference, please use `--use_trt`.
......
...@@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### 启动服务 ### 启动服务
``` ```
tar xf ttfnet_darknet53_1x_coco.tar tar xf ttfnet_darknet53_1x_coco.tar
python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0 python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。 该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。
......
...@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### Start the service ### Start the service
``` ```
tar xf yolov3_darknet53_270e_coco.tar tar xf yolov3_darknet53_270e_coco.tar
python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0 python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
This model support TensorRT, if you want a faster inference, please use `--use_trt`. This model support TensorRT, if you want a faster inference, please use `--use_trt`.
......
...@@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### 启动服务 ### 启动服务
``` ```
tar xf yolov3_darknet53_270e_coco.tar tar xf yolov3_darknet53_270e_coco.tar
python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0 python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。 该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。
......
...@@ -26,7 +26,7 @@ python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_ ...@@ -26,7 +26,7 @@ python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_
``` ```
GPU Service GPU Service
``` ```
python -m paddle_serving_server_gpu.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0 python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0
``` ```
## Prediction ## Prediction
......
...@@ -24,7 +24,7 @@ python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_ ...@@ -24,7 +24,7 @@ python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_
``` ```
GPU预测服务 GPU预测服务
``` ```
python -m paddle_serving_server_gpu.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0 python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0
``` ```
## 预测 ## 预测
......
...@@ -15,9 +15,9 @@ ...@@ -15,9 +15,9 @@
import os import os
import sys import sys
from paddle_serving_server_gpu import OpMaker from paddle_serving_server import OpMaker
from paddle_serving_server_gpu import OpSeqMaker from paddle_serving_server import OpSeqMaker
from paddle_serving_server_gpu import MultiLangServer as Server from paddle_serving_server import MultiLangServer as Server
op_maker = OpMaker() op_maker = OpMaker()
read_op = op_maker.create('general_reader') read_op = op_maker.create('general_reader')
......
...@@ -12,7 +12,7 @@ tar -xzvf yolov4.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf yolov4.tar.gz
## Start RPC Service ## Start RPC Service
``` ```
python -m paddle_serving_server_gpu.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang
``` ```
## Prediction ## Prediction
......
...@@ -12,7 +12,7 @@ tar -xzvf yolov4.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf yolov4.tar.gz
## 启动RPC服务 ## 启动RPC服务
``` ```
python -m paddle_serving_server_gpu.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang
``` ```
## 预测 ## 预测
......
...@@ -39,7 +39,7 @@ python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu ...@@ -39,7 +39,7 @@ python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu
``` ```
``` ```
python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu inference service python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu inference service
``` ```
client send inference request client send inference request
......
...@@ -39,7 +39,7 @@ python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu ...@@ -39,7 +39,7 @@ python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu
``` ```
``` ```
python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu预测服务 python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu预测服务
``` ```
client端进行预测 client端进行预测
......
...@@ -2,7 +2,7 @@ rm profile_log* ...@@ -2,7 +2,7 @@ rm profile_log*
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_profile_server=1 export FLAGS_profile_server=1
export FLAGS_profile_client=1 export FLAGS_profile_client=1
python -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim 2> elog > stdlog & python -m paddle_serving_server.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim 2> elog > stdlog &
sleep 5 sleep 5
gpu_id=0 gpu_id=0
......
...@@ -25,7 +25,7 @@ device = sys.argv[2] ...@@ -25,7 +25,7 @@ device = sys.argv[2]
if device == "cpu": if device == "cpu":
from paddle_serving_server.web_service import WebService from paddle_serving_server.web_service import WebService
else: else:
from paddle_serving_server_gpu.web_service import WebService from paddle_serving_server.web_service import WebService
class ImageService(WebService): class ImageService(WebService):
......
...@@ -12,7 +12,7 @@ tar -xzvf mobilenet_v2_imagenet.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf mobilenet_v2_imagenet.tar.gz
### Start Service ### Start Service
``` ```
python -m paddle_serving_server_gpu.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393 python -m paddle_serving_server.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393
``` ```
### Client Prediction ### Client Prediction
......
...@@ -12,7 +12,7 @@ tar -xzvf mobilenet_v2_imagenet.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf mobilenet_v2_imagenet.tar.gz
### 启动服务端 ### 启动服务端
``` ```
python -m paddle_serving_server_gpu.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393 python -m paddle_serving_server.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393
``` ```
### 客户端预测 ### 客户端预测
......
...@@ -26,7 +26,7 @@ tar xf test_imgs.tar ...@@ -26,7 +26,7 @@ tar xf test_imgs.tar
python -m paddle_serving_server.serve --model ocr_det_model --port 9293 python -m paddle_serving_server.serve --model ocr_det_model --port 9293
python ocr_web_server.py cpu python ocr_web_server.py cpu
#for gpu user #for gpu user
python -m paddle_serving_server_gpu.serve --model ocr_det_model --port 9293 --gpu_id 0 python -m paddle_serving_server.serve --model ocr_det_model --port 9293 --gpu_id 0
python ocr_web_server.py gpu python ocr_web_server.py gpu
``` ```
......
...@@ -25,7 +25,7 @@ tar xf test_imgs.tar ...@@ -25,7 +25,7 @@ tar xf test_imgs.tar
python -m paddle_serving_server.serve --model ocr_det_model --port 9293 python -m paddle_serving_server.serve --model ocr_det_model --port 9293
python ocr_web_server.py cpu python ocr_web_server.py cpu
#for gpu user #for gpu user
python -m paddle_serving_server_gpu.serve --model ocr_det_model --port 9293 --gpu_id 0 python -m paddle_serving_server.serve --model ocr_det_model --port 9293 --gpu_id 0
python ocr_web_server.py gpu python ocr_web_server.py gpu
``` ```
......
...@@ -22,7 +22,7 @@ from paddle_serving_app.reader import Sequential, ResizeByFactor ...@@ -22,7 +22,7 @@ from paddle_serving_app.reader import Sequential, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes from paddle_serving_app.reader import DBPostProcess, FilterBoxes
if sys.argv[1] == 'gpu': if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService from paddle_serving_server.web_service import WebService
elif sys.argv[1] == 'cpu': elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService from paddle_serving_server.web_service import WebService
import time import time
......
...@@ -22,7 +22,7 @@ from paddle_serving_app.reader import Sequential, ResizeByFactor ...@@ -22,7 +22,7 @@ from paddle_serving_app.reader import Sequential, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes from paddle_serving_app.reader import DBPostProcess, FilterBoxes
if sys.argv[1] == 'gpu': if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService from paddle_serving_server.web_service import WebService
elif sys.argv[1] == 'cpu': elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService from paddle_serving_server.web_service import WebService
import time import time
......
...@@ -23,7 +23,7 @@ from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor ...@@ -23,7 +23,7 @@ from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
if sys.argv[1] == 'gpu': if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService from paddle_serving_server.web_service import WebService
elif sys.argv[1] == 'cpu': elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService from paddle_serving_server.web_service import WebService
from paddle_serving_app.local_predict import LocalPredictor from paddle_serving_app.local_predict import LocalPredictor
......
...@@ -23,7 +23,7 @@ from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor ...@@ -23,7 +23,7 @@ from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
if sys.argv[1] == 'gpu': if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService from paddle_serving_server.web_service import WebService
elif sys.argv[1] == 'cpu': elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService from paddle_serving_server.web_service import WebService
import time import time
......
...@@ -23,7 +23,7 @@ from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor ...@@ -23,7 +23,7 @@ from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
if sys.argv[1] == 'gpu': if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService from paddle_serving_server.web_service import WebService
elif sys.argv[1] == 'cpu': elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService from paddle_serving_server.web_service import WebService
import time import time
......
...@@ -23,7 +23,7 @@ from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor ...@@ -23,7 +23,7 @@ from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
if sys.argv[1] == 'gpu': if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService from paddle_serving_server.web_service import WebService
elif sys.argv[1] == 'cpu': elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService from paddle_serving_server.web_service import WebService
import time import time
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
try: try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError: except ImportError:
from paddle_serving_server.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import sys import sys
from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
try: try:
from paddle_serving_server_gpu.web_service import WebService, Op from paddle_serving_server.web_service import WebService, Op
except ImportError: except ImportError:
from paddle_serving_server.web_service import WebService, Op from paddle_serving_server.web_service import WebService, Op
import logging import logging
......
...@@ -22,7 +22,7 @@ import logging ...@@ -22,7 +22,7 @@ import logging
try: try:
from paddle_serving_server.web_service import WebService from paddle_serving_server.web_service import WebService
except ImportError: except ImportError:
from paddle_serving_server_gpu.web_service import WebService from paddle_serving_server.web_service import WebService
_LOGGER = logging.getLogger() _LOGGER = logging.getLogger()
user_handler = logging.StreamHandler() user_handler = logging.StreamHandler()
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
try: try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError: except ImportError:
from paddle_serving_server.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
try: try:
from paddle_serving_server.web_service import WebService, Op from paddle_serving_server.web_service import WebService, Op
except ImportError: except ImportError:
from paddle_serving_server_gpu.web_service import WebService, Op from paddle_serving_server.web_service import WebService, Op
import logging import logging
import numpy as np import numpy as np
import cv2 import cv2
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
try: try:
from paddle_serving_server_gpu.web_service import WebService, Op from paddle_serving_server.web_service import WebService, Op
except ImportError: except ImportError:
from paddle_serving_server.web_service import WebService, Op from paddle_serving_server.web_service import WebService, Op
import logging import logging
......
...@@ -12,7 +12,7 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz
### Start Service ### Start Service
``` ```
python -m paddle_serving_server_gpu.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393 python -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393
``` ```
### Client Prediction ### Client Prediction
......
...@@ -12,7 +12,7 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz
### 启动服务端 ### 启动服务端
``` ```
python -m paddle_serving_server_gpu.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393 python -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393
``` ```
### 客户端预测 ### 客户端预测
......
...@@ -12,7 +12,7 @@ tar -xzvf unet.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf unet.tar.gz
### Start Service ### Start Service
``` ```
python -m paddle_serving_server_gpu.serve --model unet_model --gpu_ids 0 --port 9494 python -m paddle_serving_server.serve --model unet_model --gpu_ids 0 --port 9494
``` ```
### Client Prediction ### Client Prediction
......
...@@ -12,7 +12,7 @@ tar -xzvf unet.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf unet.tar.gz
### 启动服务端 ### 启动服务端
``` ```
python -m paddle_serving_server_gpu.serve --model unet_model --gpu_ids 0 --port 9494 python -m paddle_serving_server.serve --model unet_model --gpu_ids 0 --port 9494
``` ```
### 客户端预测 ### 客户端预测
......
...@@ -15,7 +15,7 @@ sh get_data.sh ...@@ -15,7 +15,7 @@ sh get_data.sh
### Start server ### Start server
```shell ```shell
python -m paddle_serving_server_gpu.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim
``` ```
### Client prediction ### Client prediction
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
# pylint: disable=doc-string-missing # pylint: disable=doc-string-missing
from paddle_serving_server_gpu.web_service import WebService from paddle_serving_server.web_service import WebService
import numpy as np import numpy as np
......
...@@ -12,7 +12,7 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz
### Start Service ### Start Service
``` ```
python -m paddle_serving_server_gpu.serve --model resnet_v2_50_imagenet_model --port 9393 --use_lite --use_xpu --ir_optim python -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --port 9393 --use_lite --use_xpu --ir_optim
``` ```
### Client Prediction ### Client Prediction
......
...@@ -12,7 +12,7 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz
### 启动服务端 ### 启动服务端
``` ```
python -m paddle_serving_server_gpu.serve --model resnet_v2_50_imagenet_model --port 9393 --use_lite --use_xpu --ir_optim python -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --port 9393 --use_lite --use_xpu --ir_optim
``` ```
### 客户端预测 ### 客户端预测
......
...@@ -12,7 +12,7 @@ tar -xzvf yolov4.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf yolov4.tar.gz
## Start RPC Service ## Start RPC Service
``` ```
python -m paddle_serving_server_gpu.serve --model yolov4_model --port 9393 --gpu_ids 0 python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0
``` ```
## Prediction ## Prediction
......
...@@ -12,7 +12,7 @@ tar -xzvf yolov4.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf yolov4.tar.gz
## 启动RPC服务 ## 启动RPC服务
``` ```
python -m paddle_serving_server_gpu.serve --model yolov4_model --port 9393 --gpu_ids 0 python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0
``` ```
## 预测 ## 预测
......
...@@ -13,703 +13,25 @@ ...@@ -13,703 +13,25 @@
# limitations under the License. # limitations under the License.
# pylint: disable=doc-string-missing # pylint: disable=doc-string-missing
import paddle_serving_client from . import convert
import os from . import client
from .proto import sdk_configure_pb2 as sdk from . import version
from .proto import general_model_config_pb2 as m_config from . import io
import google.protobuf.text_format from . import utils
import numpy as np from . import metric
import requests
import json from convert import *
import base64 from client import *
import time from version import *
import sys from io import *
from utils import *
import grpc from metric import *
from .proto import multi_lang_general_model_service_pb2
sys.path.append( __all__ = convert.__all__ \
os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proto')) + client.__all__ \
from .proto import multi_lang_general_model_service_pb2_grpc + version.__all__ \
+ io.__all__ \
int64_type = 0 + utils.__all__ \
float32_type = 1 + metric.__all__
int32_type = 2
int_type = set([int64_type, int32_type]) __version__ = version.serving_client_version
float_type = set([float32_type])
class _NOPProfiler(object):
def record(self, name):
pass
def print_profile(self):
pass
class _TimeProfiler(object):
def __init__(self):
self.pid = os.getpid()
self.print_head = 'PROFILE\tpid:{}\t'.format(self.pid)
self.time_record = [self.print_head]
def record(self, name):
self.time_record.append('{}:{} '.format(
name, int(round(time.time() * 1000000))))
def print_profile(self):
self.time_record.append('\n')
sys.stderr.write(''.join(self.time_record))
self.time_record = [self.print_head]
_is_profile = int(os.environ.get('FLAGS_profile_client', 0))
_Profiler = _TimeProfiler if _is_profile else _NOPProfiler
class SDKConfig(object):
def __init__(self):
self.sdk_desc = sdk.SDKConf()
self.tag_list = []
self.cluster_list = []
self.variant_weight_list = []
self.rpc_timeout_ms = 20000
self.load_balance_strategy = "la"
def add_server_variant(self, tag, cluster, variant_weight):
self.tag_list.append(tag)
self.cluster_list.append(cluster)
self.variant_weight_list.append(variant_weight)
def set_load_banlance_strategy(self, strategy):
self.load_balance_strategy = strategy
def gen_desc(self, rpc_timeout_ms):
predictor_desc = sdk.Predictor()
predictor_desc.name = "general_model"
predictor_desc.service_name = \
"baidu.paddle_serving.predictor.general_model.GeneralModelService"
predictor_desc.endpoint_router = "WeightedRandomRender"
predictor_desc.weighted_random_render_conf.variant_weight_list = "|".join(
self.variant_weight_list)
for idx, tag in enumerate(self.tag_list):
variant_desc = sdk.VariantConf()
variant_desc.tag = tag
variant_desc.naming_conf.cluster = "list://{}".format(",".join(
self.cluster_list[idx]))
predictor_desc.variants.extend([variant_desc])
self.sdk_desc.predictors.extend([predictor_desc])
self.sdk_desc.default_variant_conf.tag = "default"
self.sdk_desc.default_variant_conf.connection_conf.connect_timeout_ms = 2000
self.sdk_desc.default_variant_conf.connection_conf.rpc_timeout_ms = rpc_timeout_ms
self.sdk_desc.default_variant_conf.connection_conf.connect_retry_count = 2
self.sdk_desc.default_variant_conf.connection_conf.max_connection_per_host = 100
self.sdk_desc.default_variant_conf.connection_conf.hedge_request_timeout_ms = -1
self.sdk_desc.default_variant_conf.connection_conf.hedge_fetch_retry_count = 2
self.sdk_desc.default_variant_conf.connection_conf.connection_type = "pooled"
self.sdk_desc.default_variant_conf.naming_conf.cluster_filter_strategy = "Default"
self.sdk_desc.default_variant_conf.naming_conf.load_balance_strategy = "la"
self.sdk_desc.default_variant_conf.rpc_parameter.compress_type = 0
self.sdk_desc.default_variant_conf.rpc_parameter.package_size = 20
self.sdk_desc.default_variant_conf.rpc_parameter.protocol = "baidu_std"
self.sdk_desc.default_variant_conf.rpc_parameter.max_channel_per_request = 3
return self.sdk_desc
class Client(object):
def __init__(self):
self.feed_names_ = []
self.fetch_names_ = []
self.client_handle_ = None
self.feed_shapes_ = {}
self.feed_types_ = {}
self.feed_names_to_idx_ = {}
self.pid = os.getpid()
self.predictor_sdk_ = None
self.producers = []
self.consumer = None
self.profile_ = _Profiler()
self.all_numpy_input = True
self.has_numpy_input = False
self.rpc_timeout_ms = 20000
from .serving_client import PredictorRes
self.predictorres_constructor = PredictorRes
def load_client_config(self, path):
from .serving_client import PredictorClient
model_conf = m_config.GeneralModelConfig()
f = open(path, 'r')
model_conf = google.protobuf.text_format.Merge(
str(f.read()), model_conf)
# load configuraion here
# get feed vars, fetch vars
# get feed shapes, feed types
# map feed names to index
self.client_handle_ = PredictorClient()
self.client_handle_.init(path)
if "FLAGS_max_body_size" not in os.environ:
os.environ["FLAGS_max_body_size"] = str(512 * 1024 * 1024)
read_env_flags = ["profile_client", "profile_server", "max_body_size"]
self.client_handle_.init_gflags([sys.argv[
0]] + ["--tryfromenv=" + ",".join(read_env_flags)])
self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
self.feed_names_to_idx_ = {}
self.fetch_names_to_type_ = {}
self.fetch_names_to_idx_ = {}
self.lod_tensor_set = set()
self.feed_tensor_len = {}
self.key = None
for i, var in enumerate(model_conf.feed_var):
self.feed_names_to_idx_[var.alias_name] = i
self.feed_types_[var.alias_name] = var.feed_type
self.feed_shapes_[var.alias_name] = var.shape
if var.is_lod_tensor:
self.lod_tensor_set.add(var.alias_name)
else:
counter = 1
for dim in self.feed_shapes_[var.alias_name]:
counter *= dim
self.feed_tensor_len[var.alias_name] = counter
for i, var in enumerate(model_conf.fetch_var):
self.fetch_names_to_idx_[var.alias_name] = i
self.fetch_names_to_type_[var.alias_name] = var.fetch_type
if var.is_lod_tensor:
self.lod_tensor_set.add(var.alias_name)
return
def add_variant(self, tag, cluster, variant_weight):
if self.predictor_sdk_ is None:
self.predictor_sdk_ = SDKConfig()
self.predictor_sdk_.add_server_variant(tag, cluster,
str(variant_weight))
def set_rpc_timeout_ms(self, rpc_timeout):
if not isinstance(rpc_timeout, int):
raise ValueError("rpc_timeout must be int type.")
else:
self.rpc_timeout_ms = rpc_timeout
def use_key(self, key_filename):
with open(key_filename, "rb") as f:
self.key = f.read()
def get_serving_port(self, endpoints):
if self.key is not None:
req = json.dumps({"key": base64.b64encode(self.key).decode()})
else:
req = json.dumps({})
r = requests.post("http://" + endpoints[0], req)
result = r.json()
print(result)
if "endpoint_list" not in result:
raise ValueError("server not ready")
else:
endpoints = [
endpoints[0].split(":")[0] + ":" +
str(result["endpoint_list"][0])
]
return endpoints
def connect(self, endpoints=None, encryption=False):
# check whether current endpoint is available
# init from client config
# create predictor here
if endpoints is None:
if self.predictor_sdk_ is None:
raise ValueError(
"You must set the endpoints parameter or use add_variant function to create a variant."
)
else:
if encryption:
endpoints = self.get_serving_port(endpoints)
if self.predictor_sdk_ is None:
self.add_variant('default_tag_{}'.format(id(self)), endpoints,
100)
else:
print(
"parameter endpoints({}) will not take effect, because you use the add_variant function.".
format(endpoints))
sdk_desc = self.predictor_sdk_.gen_desc(self.rpc_timeout_ms)
self.client_handle_.create_predictor_by_desc(sdk_desc.SerializeToString(
))
def get_feed_names(self):
return self.feed_names_
def get_fetch_names(self):
return self.fetch_names_
def shape_check(self, feed, key):
if key in self.lod_tensor_set:
return
if isinstance(feed[key],
list) and len(feed[key]) != self.feed_tensor_len[key]:
raise ValueError("The shape of feed tensor {} not match.".format(
key))
if type(feed[key]).__module__ == np.__name__ and np.size(feed[
key]) != self.feed_tensor_len[key]:
#raise SystemExit("The shape of feed tensor {} not match.".format(
# key))
pass
def predict(self,
feed=None,
fetch=None,
batch=False,
need_variant_tag=False,
log_id=0):
self.profile_.record('py_prepro_0')
if feed is None or fetch is None:
raise ValueError("You should specify feed and fetch for prediction")
fetch_list = []
if isinstance(fetch, str):
fetch_list = [fetch]
elif isinstance(fetch, list):
fetch_list = fetch
else:
raise ValueError("Fetch only accepts string and list of string")
feed_batch = []
if isinstance(feed, dict):
feed_batch.append(feed)
elif isinstance(feed, list):
feed_batch = feed
else:
raise ValueError("Feed only accepts dict and list of dict")
int_slot_batch = []
float_slot_batch = []
int_feed_names = []
float_feed_names = []
int_shape = []
int_lod_slot_batch = []
float_lod_slot_batch = []
float_shape = []
fetch_names = []
counter = 0
batch_size = len(feed_batch)
for key in fetch_list:
if key in self.fetch_names_:
fetch_names.append(key)
if len(fetch_names) == 0:
raise ValueError(
"Fetch names should not be empty or out of saved fetch list.")
return {}
for i, feed_i in enumerate(feed_batch):
int_slot = []
float_slot = []
int_lod_slot = []
float_lod_slot = []
for key in feed_i:
if ".lod" not in key and key not in self.feed_names_:
raise ValueError("Wrong feed name: {}.".format(key))
if ".lod" in key:
continue
#if not isinstance(feed_i[key], np.ndarray):
self.shape_check(feed_i, key)
if self.feed_types_[key] in int_type:
if i == 0:
int_feed_names.append(key)
shape_lst = []
if batch == False:
feed_i[key] = feed_i[key][np.newaxis, :]
if isinstance(feed_i[key], np.ndarray):
shape_lst.extend(list(feed_i[key].shape))
int_shape.append(shape_lst)
else:
int_shape.append(self.feed_shapes_[key])
if "{}.lod".format(key) in feed_i:
int_lod_slot_batch.append(feed_i["{}.lod".format(
key)])
else:
int_lod_slot_batch.append([])
if isinstance(feed_i[key], np.ndarray):
int_slot.append(feed_i[key])
self.has_numpy_input = True
else:
int_slot.append(feed_i[key])
self.all_numpy_input = False
elif self.feed_types_[key] in float_type:
if i == 0:
float_feed_names.append(key)
shape_lst = []
if batch == False:
feed_i[key] = feed_i[key][np.newaxis, :]
if isinstance(feed_i[key], np.ndarray):
shape_lst.extend(list(feed_i[key].shape))
float_shape.append(shape_lst)
else:
float_shape.append(self.feed_shapes_[key])
if "{}.lod".format(key) in feed_i:
float_lod_slot_batch.append(feed_i["{}.lod".format(
key)])
else:
float_lod_slot_batch.append([])
if isinstance(feed_i[key], np.ndarray):
float_slot.append(feed_i[key])
self.has_numpy_input = True
else:
float_slot.append(feed_i[key])
self.all_numpy_input = False
int_slot_batch.append(int_slot)
float_slot_batch.append(float_slot)
int_lod_slot_batch.append(int_lod_slot)
float_lod_slot_batch.append(float_lod_slot)
self.profile_.record('py_prepro_1')
self.profile_.record('py_client_infer_0')
result_batch_handle = self.predictorres_constructor()
if self.all_numpy_input:
res = self.client_handle_.numpy_predict(
float_slot_batch, float_feed_names, float_shape,
float_lod_slot_batch, int_slot_batch, int_feed_names, int_shape,
int_lod_slot_batch, fetch_names, result_batch_handle, self.pid,
log_id)
elif self.has_numpy_input == False:
raise ValueError(
"Please make sure all of your inputs are numpy array")
else:
raise ValueError(
"Please make sure the inputs are all in list type or all in numpy.array type"
)
self.profile_.record('py_client_infer_1')
self.profile_.record('py_postpro_0')
if res == -1:
return None
multi_result_map = []
model_engine_names = result_batch_handle.get_engine_names()
for mi, engine_name in enumerate(model_engine_names):
result_map = {}
# result map needs to be a numpy array
for i, name in enumerate(fetch_names):
if self.fetch_names_to_type_[name] == int64_type:
# result_map[name] will be py::array(numpy array)
result_map[name] = result_batch_handle.get_int64_by_name(
mi, name)
shape = result_batch_handle.get_shape(mi, name)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
result_map[name].shape = shape
if name in self.lod_tensor_set:
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
elif self.fetch_names_to_type_[name] == float32_type:
result_map[name] = result_batch_handle.get_float_by_name(
mi, name)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape
if name in self.lod_tensor_set:
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
elif self.fetch_names_to_type_[name] == int32_type:
# result_map[name] will be py::array(numpy array)
result_map[name] = result_batch_handle.get_int32_by_name(
mi, name)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape
if name in self.lod_tensor_set:
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
multi_result_map.append(result_map)
ret = None
if len(model_engine_names) == 1:
# If only one model result is returned, the format of ret is result_map
ret = multi_result_map[0]
else:
# If multiple model results are returned, the format of ret is {name: result_map}
ret = {
engine_name: multi_result_map[mi]
for mi, engine_name in enumerate(model_engine_names)
}
self.profile_.record('py_postpro_1')
self.profile_.print_profile()
# When using the A/B test, the tag of variant needs to be returned
return ret if not need_variant_tag else [
ret, result_batch_handle.variant_tag()
]
def release(self):
self.client_handle_.destroy_predictor()
self.client_handle_ = None
class MultiLangClient(object):
def __init__(self):
self.channel_ = None
self.stub_ = None
self.rpc_timeout_s_ = 2
self.profile_ = _Profiler()
def add_variant(self, tag, cluster, variant_weight):
# TODO
raise Exception("cannot support ABtest yet")
def set_rpc_timeout_ms(self, rpc_timeout):
if self.stub_ is None:
raise Exception("set timeout must be set after connect.")
if not isinstance(rpc_timeout, int):
# for bclient
raise ValueError("rpc_timeout must be int type.")
self.rpc_timeout_s_ = rpc_timeout / 1000.0
timeout_req = multi_lang_general_model_service_pb2.SetTimeoutRequest()
timeout_req.timeout_ms = rpc_timeout
resp = self.stub_.SetTimeout(timeout_req)
return resp.err_code == 0
def connect(self, endpoints):
# https://github.com/tensorflow/serving/issues/1382
options = [('grpc.max_receive_message_length', 512 * 1024 * 1024),
('grpc.max_send_message_length', 512 * 1024 * 1024),
('grpc.lb_policy_name', 'round_robin')]
# TODO: weight round robin
g_endpoint = 'ipv4:{}'.format(','.join(endpoints))
self.channel_ = grpc.insecure_channel(g_endpoint, options=options)
self.stub_ = multi_lang_general_model_service_pb2_grpc.MultiLangGeneralModelServiceStub(
self.channel_)
# get client model config
get_client_config_req = multi_lang_general_model_service_pb2.GetClientConfigRequest(
)
resp = self.stub_.GetClientConfig(get_client_config_req)
model_config_str = resp.client_config_str
self._parse_model_config(model_config_str)
def _flatten_list(self, nested_list):
for item in nested_list:
if isinstance(item, (list, tuple)):
for sub_item in self._flatten_list(item):
yield sub_item
else:
yield item
def _parse_model_config(self, model_config_str):
model_conf = m_config.GeneralModelConfig()
model_conf = google.protobuf.text_format.Merge(model_config_str,
model_conf)
self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
self.feed_types_ = {}
self.feed_shapes_ = {}
self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
self.fetch_types_ = {}
self.lod_tensor_set_ = set()
for i, var in enumerate(model_conf.feed_var):
self.feed_types_[var.alias_name] = var.feed_type
self.feed_shapes_[var.alias_name] = var.shape
if var.is_lod_tensor:
self.lod_tensor_set_.add(var.alias_name)
else:
counter = 1
for dim in self.feed_shapes_[var.alias_name]:
counter *= dim
for i, var in enumerate(model_conf.fetch_var):
self.fetch_types_[var.alias_name] = var.fetch_type
if var.is_lod_tensor:
self.lod_tensor_set_.add(var.alias_name)
def _pack_inference_request(self, feed, fetch, is_python, log_id):
req = multi_lang_general_model_service_pb2.InferenceRequest()
req.fetch_var_names.extend(fetch)
req.is_python = is_python
req.log_id = log_id
feed_var_names = []
for key in feed.keys():
if '.lod' not in key:
feed_var_names.append(key)
req.feed_var_names.extend(feed_var_names)
inst = multi_lang_general_model_service_pb2.FeedInst()
for name in req.feed_var_names:
tensor = multi_lang_general_model_service_pb2.Tensor()
var = feed[name]
v_type = self.feed_types_[name]
if is_python:
data = None
if isinstance(var, list):
if v_type == 0: # int64
data = np.array(var, dtype="int64")
elif v_type == 1: # float32
data = np.array(var, dtype="float32")
elif v_type == 2: # int32
data = np.array(var, dtype="int32")
else:
raise Exception("error tensor value type.")
elif isinstance(var, np.ndarray):
data = var
if v_type == 0:
if data.dtype != 'int64':
data = data.astype("int64")
elif v_type == 1:
if data.dtype != 'float32':
data = data.astype("float32")
elif v_type == 2:
if data.dtype != 'int32':
data = data.astype("int32")
else:
raise Exception("error tensor value type.")
else:
raise Exception("var must be list or ndarray.")
tensor.data = data.tobytes()
tensor.shape.extend(list(var.shape))
if "{}.lod".format(name) in feed.keys():
tensor.lod.extend(feed["{}.lod".format(name)])
inst.tensor_array.append(tensor)
req.insts.append(inst)
return req
def _unpack_inference_response(self, resp, fetch, is_python,
need_variant_tag):
if resp.err_code != 0:
return None
tag = resp.tag
multi_result_map = {}
for model_result in resp.outputs:
inst = model_result.insts[0]
result_map = {}
for i, name in enumerate(fetch):
var = inst.tensor_array[i]
v_type = self.fetch_types_[name]
if is_python:
if v_type == 0: # int64
result_map[name] = np.frombuffer(
var.data, dtype="int64")
elif v_type == 1: # float32
result_map[name] = np.frombuffer(
var.data, dtype="float32")
else:
raise Exception("error type.")
else:
if v_type == 0: # int64
result_map[name] = np.array(
list(var.int64_data), dtype="int64")
elif v_type == 1: # float32
result_map[name] = np.array(
list(var.float_data), dtype="float32")
else:
raise Exception("error type.")
result_map[name].shape = list(var.shape)
if name in self.lod_tensor_set_:
result_map["{}.lod".format(name)] = np.array(list(var.lod))
multi_result_map[model_result.engine_name] = result_map
ret = None
if len(resp.outputs) == 1:
ret = list(multi_result_map.values())[0]
else:
ret = multi_result_map
ret["serving_status_code"] = 0
return ret if not need_variant_tag else [ret, tag]
def _done_callback_func(self, fetch, is_python, need_variant_tag):
def unpack_resp(resp):
return self._unpack_inference_response(resp, fetch, is_python,
need_variant_tag)
return unpack_resp
def get_feed_names(self):
return self.feed_names_
def predict(self,
feed,
fetch,
batch=True,
need_variant_tag=False,
asyn=False,
is_python=True,
log_id=0):
if isinstance(feed, dict) is False:
raise ValueError("Type Error. grpc feed must be dict.")
if batch is False:
for key in feed:
if ".lod" not in key:
feed[key] = feed[key][np.newaxis, :]
if not asyn:
try:
self.profile_.record('py_prepro_0')
req = self._pack_inference_request(
feed, fetch, is_python=is_python, log_id=log_id)
self.profile_.record('py_prepro_1')
self.profile_.record('py_client_infer_0')
resp = self.stub_.Inference(req, timeout=self.rpc_timeout_s_)
self.profile_.record('py_client_infer_1')
self.profile_.record('py_postpro_0')
ret = self._unpack_inference_response(
resp,
fetch,
is_python=is_python,
need_variant_tag=need_variant_tag)
self.profile_.record('py_postpro_1')
self.profile_.print_profile()
return ret
except grpc.RpcError as e:
return {"serving_status_code": e.code()}
else:
req = self._pack_inference_request(
feed, fetch, is_python=is_python, log_id=log_id)
call_future = self.stub_.Inference.future(
req, timeout=self.rpc_timeout_s_)
return MultiLangPredictFuture(
call_future,
self._done_callback_func(
fetch,
is_python=is_python,
need_variant_tag=need_variant_tag))
class MultiLangPredictFuture(object):
def __init__(self, call_future, callback_func):
self.call_future_ = call_future
self.callback_func_ = callback_func
def result(self):
try:
resp = self.call_future_.result()
except grpc.RpcError as e:
return {"serving_status_code": e.code()}
return self.callback_func_(resp)
def add_done_callback(self, fn):
def __fn__(call_future):
assert call_future == self.call_future_
fn(self)
self.call_future_.add_done_callback(__fn__)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
import paddle_serving_client
import os
from .proto import sdk_configure_pb2 as sdk
from .proto import general_model_config_pb2 as m_config
import google.protobuf.text_format
import numpy as np
import requests
import json
import base64
import time
import sys
import grpc
from .proto import multi_lang_general_model_service_pb2
sys.path.append(
os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proto'))
from .proto import multi_lang_general_model_service_pb2_grpc
int64_type = 0
float32_type = 1
int32_type = 2
int_type = set([int64_type, int32_type])
float_type = set([float32_type])
class _NOPProfiler(object):
def record(self, name):
pass
def print_profile(self):
pass
class _TimeProfiler(object):
def __init__(self):
self.pid = os.getpid()
self.print_head = 'PROFILE\tpid:{}\t'.format(self.pid)
self.time_record = [self.print_head]
def record(self, name):
self.time_record.append('{}:{} '.format(
name, int(round(time.time() * 1000000))))
def print_profile(self):
self.time_record.append('\n')
sys.stderr.write(''.join(self.time_record))
self.time_record = [self.print_head]
_is_profile = int(os.environ.get('FLAGS_profile_client', 0))
_Profiler = _TimeProfiler if _is_profile else _NOPProfiler
class SDKConfig(object):
def __init__(self):
self.sdk_desc = sdk.SDKConf()
self.tag_list = []
self.cluster_list = []
self.variant_weight_list = []
self.rpc_timeout_ms = 20000
self.load_balance_strategy = "la"
def add_server_variant(self, tag, cluster, variant_weight):
self.tag_list.append(tag)
self.cluster_list.append(cluster)
self.variant_weight_list.append(variant_weight)
def set_load_banlance_strategy(self, strategy):
self.load_balance_strategy = strategy
def gen_desc(self, rpc_timeout_ms):
predictor_desc = sdk.Predictor()
predictor_desc.name = "general_model"
predictor_desc.service_name = \
"baidu.paddle_serving.predictor.general_model.GeneralModelService"
predictor_desc.endpoint_router = "WeightedRandomRender"
predictor_desc.weighted_random_render_conf.variant_weight_list = "|".join(
self.variant_weight_list)
for idx, tag in enumerate(self.tag_list):
variant_desc = sdk.VariantConf()
variant_desc.tag = tag
variant_desc.naming_conf.cluster = "list://{}".format(",".join(
self.cluster_list[idx]))
predictor_desc.variants.extend([variant_desc])
self.sdk_desc.predictors.extend([predictor_desc])
self.sdk_desc.default_variant_conf.tag = "default"
self.sdk_desc.default_variant_conf.connection_conf.connect_timeout_ms = 2000
self.sdk_desc.default_variant_conf.connection_conf.rpc_timeout_ms = rpc_timeout_ms
self.sdk_desc.default_variant_conf.connection_conf.connect_retry_count = 2
self.sdk_desc.default_variant_conf.connection_conf.max_connection_per_host = 100
self.sdk_desc.default_variant_conf.connection_conf.hedge_request_timeout_ms = -1
self.sdk_desc.default_variant_conf.connection_conf.hedge_fetch_retry_count = 2
self.sdk_desc.default_variant_conf.connection_conf.connection_type = "pooled"
self.sdk_desc.default_variant_conf.naming_conf.cluster_filter_strategy = "Default"
self.sdk_desc.default_variant_conf.naming_conf.load_balance_strategy = "la"
self.sdk_desc.default_variant_conf.rpc_parameter.compress_type = 0
self.sdk_desc.default_variant_conf.rpc_parameter.package_size = 20
self.sdk_desc.default_variant_conf.rpc_parameter.protocol = "baidu_std"
self.sdk_desc.default_variant_conf.rpc_parameter.max_channel_per_request = 3
return self.sdk_desc
class Client(object):
def __init__(self):
self.feed_names_ = []
self.fetch_names_ = []
self.client_handle_ = None
self.feed_shapes_ = {}
self.feed_types_ = {}
self.feed_names_to_idx_ = {}
self.pid = os.getpid()
self.predictor_sdk_ = None
self.producers = []
self.consumer = None
self.profile_ = _Profiler()
self.all_numpy_input = True
self.has_numpy_input = False
self.rpc_timeout_ms = 20000
from .serving_client import PredictorRes
self.predictorres_constructor = PredictorRes
def load_client_config(self, path):
from .serving_client import PredictorClient
model_conf = m_config.GeneralModelConfig()
f = open(path, 'r')
model_conf = google.protobuf.text_format.Merge(
str(f.read()), model_conf)
# load configuraion here
# get feed vars, fetch vars
# get feed shapes, feed types
# map feed names to index
self.client_handle_ = PredictorClient()
self.client_handle_.init(path)
if "FLAGS_max_body_size" not in os.environ:
os.environ["FLAGS_max_body_size"] = str(512 * 1024 * 1024)
read_env_flags = ["profile_client", "profile_server", "max_body_size"]
self.client_handle_.init_gflags([sys.argv[
0]] + ["--tryfromenv=" + ",".join(read_env_flags)])
self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
self.feed_names_to_idx_ = {}
self.fetch_names_to_type_ = {}
self.fetch_names_to_idx_ = {}
self.lod_tensor_set = set()
self.feed_tensor_len = {}
self.key = None
for i, var in enumerate(model_conf.feed_var):
self.feed_names_to_idx_[var.alias_name] = i
self.feed_types_[var.alias_name] = var.feed_type
self.feed_shapes_[var.alias_name] = var.shape
if var.is_lod_tensor:
self.lod_tensor_set.add(var.alias_name)
else:
counter = 1
for dim in self.feed_shapes_[var.alias_name]:
counter *= dim
self.feed_tensor_len[var.alias_name] = counter
for i, var in enumerate(model_conf.fetch_var):
self.fetch_names_to_idx_[var.alias_name] = i
self.fetch_names_to_type_[var.alias_name] = var.fetch_type
if var.is_lod_tensor:
self.lod_tensor_set.add(var.alias_name)
return
def add_variant(self, tag, cluster, variant_weight):
if self.predictor_sdk_ is None:
self.predictor_sdk_ = SDKConfig()
self.predictor_sdk_.add_server_variant(tag, cluster,
str(variant_weight))
def set_rpc_timeout_ms(self, rpc_timeout):
if not isinstance(rpc_timeout, int):
raise ValueError("rpc_timeout must be int type.")
else:
self.rpc_timeout_ms = rpc_timeout
def use_key(self, key_filename):
with open(key_filename, "rb") as f:
self.key = f.read()
def get_serving_port(self, endpoints):
if self.key is not None:
req = json.dumps({"key": base64.b64encode(self.key).decode()})
else:
req = json.dumps({})
r = requests.post("http://" + endpoints[0], req)
result = r.json()
print(result)
if "endpoint_list" not in result:
raise ValueError("server not ready")
else:
endpoints = [
endpoints[0].split(":")[0] + ":" +
str(result["endpoint_list"][0])
]
return endpoints
def connect(self, endpoints=None, encryption=False):
# check whether current endpoint is available
# init from client config
# create predictor here
if endpoints is None:
if self.predictor_sdk_ is None:
raise ValueError(
"You must set the endpoints parameter or use add_variant function to create a variant."
)
else:
if encryption:
endpoints = self.get_serving_port(endpoints)
if self.predictor_sdk_ is None:
self.add_variant('default_tag_{}'.format(id(self)), endpoints,
100)
else:
print(
"parameter endpoints({}) will not take effect, because you use the add_variant function.".
format(endpoints))
sdk_desc = self.predictor_sdk_.gen_desc(self.rpc_timeout_ms)
self.client_handle_.create_predictor_by_desc(sdk_desc.SerializeToString(
))
def get_feed_names(self):
return self.feed_names_
def get_fetch_names(self):
return self.fetch_names_
def shape_check(self, feed, key):
if key in self.lod_tensor_set:
return
if isinstance(feed[key],
list) and len(feed[key]) != self.feed_tensor_len[key]:
raise ValueError("The shape of feed tensor {} not match.".format(
key))
if type(feed[key]).__module__ == np.__name__ and np.size(feed[
key]) != self.feed_tensor_len[key]:
#raise SystemExit("The shape of feed tensor {} not match.".format(
# key))
pass
def predict(self,
feed=None,
fetch=None,
batch=False,
need_variant_tag=False,
log_id=0):
self.profile_.record('py_prepro_0')
if feed is None or fetch is None:
raise ValueError("You should specify feed and fetch for prediction")
fetch_list = []
if isinstance(fetch, str):
fetch_list = [fetch]
elif isinstance(fetch, list):
fetch_list = fetch
else:
raise ValueError("Fetch only accepts string and list of string")
feed_batch = []
if isinstance(feed, dict):
feed_batch.append(feed)
elif isinstance(feed, list):
feed_batch = feed
else:
raise ValueError("Feed only accepts dict and list of dict")
int_slot_batch = []
float_slot_batch = []
int_feed_names = []
float_feed_names = []
int_shape = []
int_lod_slot_batch = []
float_lod_slot_batch = []
float_shape = []
fetch_names = []
counter = 0
batch_size = len(feed_batch)
for key in fetch_list:
if key in self.fetch_names_:
fetch_names.append(key)
if len(fetch_names) == 0:
raise ValueError(
"Fetch names should not be empty or out of saved fetch list.")
return {}
for i, feed_i in enumerate(feed_batch):
int_slot = []
float_slot = []
int_lod_slot = []
float_lod_slot = []
for key in feed_i:
if ".lod" not in key and key not in self.feed_names_:
raise ValueError("Wrong feed name: {}.".format(key))
if ".lod" in key:
continue
#if not isinstance(feed_i[key], np.ndarray):
self.shape_check(feed_i, key)
if self.feed_types_[key] in int_type:
if i == 0:
int_feed_names.append(key)
shape_lst = []
if batch == False:
feed_i[key] = feed_i[key][np.newaxis, :]
if isinstance(feed_i[key], np.ndarray):
shape_lst.extend(list(feed_i[key].shape))
int_shape.append(shape_lst)
else:
int_shape.append(self.feed_shapes_[key])
if "{}.lod".format(key) in feed_i:
int_lod_slot_batch.append(feed_i["{}.lod".format(
key)])
else:
int_lod_slot_batch.append([])
if isinstance(feed_i[key], np.ndarray):
int_slot.append(feed_i[key])
self.has_numpy_input = True
else:
int_slot.append(feed_i[key])
self.all_numpy_input = False
elif self.feed_types_[key] in float_type:
if i == 0:
float_feed_names.append(key)
shape_lst = []
if batch == False:
feed_i[key] = feed_i[key][np.newaxis, :]
if isinstance(feed_i[key], np.ndarray):
shape_lst.extend(list(feed_i[key].shape))
float_shape.append(shape_lst)
else:
float_shape.append(self.feed_shapes_[key])
if "{}.lod".format(key) in feed_i:
float_lod_slot_batch.append(feed_i["{}.lod".format(
key)])
else:
float_lod_slot_batch.append([])
if isinstance(feed_i[key], np.ndarray):
float_slot.append(feed_i[key])
self.has_numpy_input = True
else:
float_slot.append(feed_i[key])
self.all_numpy_input = False
int_slot_batch.append(int_slot)
float_slot_batch.append(float_slot)
int_lod_slot_batch.append(int_lod_slot)
float_lod_slot_batch.append(float_lod_slot)
self.profile_.record('py_prepro_1')
self.profile_.record('py_client_infer_0')
result_batch_handle = self.predictorres_constructor()
if self.all_numpy_input:
res = self.client_handle_.numpy_predict(
float_slot_batch, float_feed_names, float_shape,
float_lod_slot_batch, int_slot_batch, int_feed_names, int_shape,
int_lod_slot_batch, fetch_names, result_batch_handle, self.pid,
log_id)
elif self.has_numpy_input == False:
raise ValueError(
"Please make sure all of your inputs are numpy array")
else:
raise ValueError(
"Please make sure the inputs are all in list type or all in numpy.array type"
)
self.profile_.record('py_client_infer_1')
self.profile_.record('py_postpro_0')
if res == -1:
return None
multi_result_map = []
model_engine_names = result_batch_handle.get_engine_names()
for mi, engine_name in enumerate(model_engine_names):
result_map = {}
# result map needs to be a numpy array
for i, name in enumerate(fetch_names):
if self.fetch_names_to_type_[name] == int64_type:
# result_map[name] will be py::array(numpy array)
result_map[name] = result_batch_handle.get_int64_by_name(
mi, name)
shape = result_batch_handle.get_shape(mi, name)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
result_map[name].shape = shape
if name in self.lod_tensor_set:
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
elif self.fetch_names_to_type_[name] == float32_type:
result_map[name] = result_batch_handle.get_float_by_name(
mi, name)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape
if name in self.lod_tensor_set:
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
elif self.fetch_names_to_type_[name] == int32_type:
# result_map[name] will be py::array(numpy array)
result_map[name] = result_batch_handle.get_int32_by_name(
mi, name)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape
if name in self.lod_tensor_set:
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
multi_result_map.append(result_map)
ret = None
if len(model_engine_names) == 1:
# If only one model result is returned, the format of ret is result_map
ret = multi_result_map[0]
else:
# If multiple model results are returned, the format of ret is {name: result_map}
ret = {
engine_name: multi_result_map[mi]
for mi, engine_name in enumerate(model_engine_names)
}
self.profile_.record('py_postpro_1')
self.profile_.print_profile()
# When using the A/B test, the tag of variant needs to be returned
return ret if not need_variant_tag else [
ret, result_batch_handle.variant_tag()
]
def release(self):
self.client_handle_.destroy_predictor()
self.client_handle_ = None
class MultiLangClient(object):
def __init__(self):
self.channel_ = None
self.stub_ = None
self.rpc_timeout_s_ = 2
self.profile_ = _Profiler()
def add_variant(self, tag, cluster, variant_weight):
# TODO
raise Exception("cannot support ABtest yet")
def set_rpc_timeout_ms(self, rpc_timeout):
if self.stub_ is None:
raise Exception("set timeout must be set after connect.")
if not isinstance(rpc_timeout, int):
# for bclient
raise ValueError("rpc_timeout must be int type.")
self.rpc_timeout_s_ = rpc_timeout / 1000.0
timeout_req = multi_lang_general_model_service_pb2.SetTimeoutRequest()
timeout_req.timeout_ms = rpc_timeout
resp = self.stub_.SetTimeout(timeout_req)
return resp.err_code == 0
def connect(self, endpoints):
# https://github.com/tensorflow/serving/issues/1382
options = [('grpc.max_receive_message_length', 512 * 1024 * 1024),
('grpc.max_send_message_length', 512 * 1024 * 1024),
('grpc.lb_policy_name', 'round_robin')]
# TODO: weight round robin
g_endpoint = 'ipv4:{}'.format(','.join(endpoints))
self.channel_ = grpc.insecure_channel(g_endpoint, options=options)
self.stub_ = multi_lang_general_model_service_pb2_grpc.MultiLangGeneralModelServiceStub(
self.channel_)
# get client model config
get_client_config_req = multi_lang_general_model_service_pb2.GetClientConfigRequest(
)
resp = self.stub_.GetClientConfig(get_client_config_req)
model_config_str = resp.client_config_str
self._parse_model_config(model_config_str)
def _flatten_list(self, nested_list):
for item in nested_list:
if isinstance(item, (list, tuple)):
for sub_item in self._flatten_list(item):
yield sub_item
else:
yield item
def _parse_model_config(self, model_config_str):
model_conf = m_config.GeneralModelConfig()
model_conf = google.protobuf.text_format.Merge(model_config_str,
model_conf)
self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
self.feed_types_ = {}
self.feed_shapes_ = {}
self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
self.fetch_types_ = {}
self.lod_tensor_set_ = set()
for i, var in enumerate(model_conf.feed_var):
self.feed_types_[var.alias_name] = var.feed_type
self.feed_shapes_[var.alias_name] = var.shape
if var.is_lod_tensor:
self.lod_tensor_set_.add(var.alias_name)
else:
counter = 1
for dim in self.feed_shapes_[var.alias_name]:
counter *= dim
for i, var in enumerate(model_conf.fetch_var):
self.fetch_types_[var.alias_name] = var.fetch_type
if var.is_lod_tensor:
self.lod_tensor_set_.add(var.alias_name)
def _pack_inference_request(self, feed, fetch, is_python, log_id):
req = multi_lang_general_model_service_pb2.InferenceRequest()
req.fetch_var_names.extend(fetch)
req.is_python = is_python
req.log_id = log_id
feed_var_names = []
for key in feed.keys():
if '.lod' not in key:
feed_var_names.append(key)
req.feed_var_names.extend(feed_var_names)
inst = multi_lang_general_model_service_pb2.FeedInst()
for name in req.feed_var_names:
tensor = multi_lang_general_model_service_pb2.Tensor()
var = feed[name]
v_type = self.feed_types_[name]
if is_python:
data = None
if isinstance(var, list):
if v_type == 0: # int64
data = np.array(var, dtype="int64")
elif v_type == 1: # float32
data = np.array(var, dtype="float32")
elif v_type == 2: # int32
data = np.array(var, dtype="int32")
else:
raise Exception("error tensor value type.")
elif isinstance(var, np.ndarray):
data = var
if v_type == 0:
if data.dtype != 'int64':
data = data.astype("int64")
elif v_type == 1:
if data.dtype != 'float32':
data = data.astype("float32")
elif v_type == 2:
if data.dtype != 'int32':
data = data.astype("int32")
else:
raise Exception("error tensor value type.")
else:
raise Exception("var must be list or ndarray.")
tensor.data = data.tobytes()
tensor.shape.extend(list(var.shape))
if "{}.lod".format(name) in feed.keys():
tensor.lod.extend(feed["{}.lod".format(name)])
inst.tensor_array.append(tensor)
req.insts.append(inst)
return req
def _unpack_inference_response(self, resp, fetch, is_python,
need_variant_tag):
if resp.err_code != 0:
return None
tag = resp.tag
multi_result_map = {}
for model_result in resp.outputs:
inst = model_result.insts[0]
result_map = {}
for i, name in enumerate(fetch):
var = inst.tensor_array[i]
v_type = self.fetch_types_[name]
if is_python:
if v_type == 0: # int64
result_map[name] = np.frombuffer(
var.data, dtype="int64")
elif v_type == 1: # float32
result_map[name] = np.frombuffer(
var.data, dtype="float32")
else:
raise Exception("error type.")
else:
if v_type == 0: # int64
result_map[name] = np.array(
list(var.int64_data), dtype="int64")
elif v_type == 1: # float32
result_map[name] = np.array(
list(var.float_data), dtype="float32")
else:
raise Exception("error type.")
result_map[name].shape = list(var.shape)
if name in self.lod_tensor_set_:
result_map["{}.lod".format(name)] = np.array(list(var.lod))
multi_result_map[model_result.engine_name] = result_map
ret = None
if len(resp.outputs) == 1:
ret = list(multi_result_map.values())[0]
else:
ret = multi_result_map
ret["serving_status_code"] = 0
return ret if not need_variant_tag else [ret, tag]
def _done_callback_func(self, fetch, is_python, need_variant_tag):
def unpack_resp(resp):
return self._unpack_inference_response(resp, fetch, is_python,
need_variant_tag)
return unpack_resp
def get_feed_names(self):
return self.feed_names_
def predict(self,
feed,
fetch,
batch=True,
need_variant_tag=False,
asyn=False,
is_python=True,
log_id=0):
if isinstance(feed, dict) is False:
raise ValueError("Type Error. grpc feed must be dict.")
if batch is False:
for key in feed:
if ".lod" not in key:
feed[key] = feed[key][np.newaxis, :]
if not asyn:
try:
self.profile_.record('py_prepro_0')
req = self._pack_inference_request(
feed, fetch, is_python=is_python, log_id=log_id)
self.profile_.record('py_prepro_1')
self.profile_.record('py_client_infer_0')
resp = self.stub_.Inference(req, timeout=self.rpc_timeout_s_)
self.profile_.record('py_client_infer_1')
self.profile_.record('py_postpro_0')
ret = self._unpack_inference_response(
resp,
fetch,
is_python=is_python,
need_variant_tag=need_variant_tag)
self.profile_.record('py_postpro_1')
self.profile_.print_profile()
return ret
except grpc.RpcError as e:
return {"serving_status_code": e.code()}
else:
req = self._pack_inference_request(
feed, fetch, is_python=is_python, log_id=log_id)
call_future = self.stub_.Inference.future(
req, timeout=self.rpc_timeout_s_)
return MultiLangPredictFuture(
call_future,
self._done_callback_func(
fetch,
is_python=is_python,
need_variant_tag=need_variant_tag))
class MultiLangPredictFuture(object):
def __init__(self, call_future, callback_func):
self.call_future_ = call_future
self.callback_func_ = callback_func
def result(self):
try:
resp = self.call_future_.result()
except grpc.RpcError as e:
return {"serving_status_code": e.code()}
return self.callback_func_(resp)
def add_done_callback(self, fn):
def __fn__(call_future):
assert call_future == self.call_future_
fn(self)
self.call_future_.add_done_callback(__fn__)
...@@ -23,7 +23,6 @@ import json ...@@ -23,7 +23,6 @@ import json
import base64 import base64
import time import time
from multiprocessing import Pool, Process from multiprocessing import Pool, Process
from paddle_serving_server import serve_args
from flask import Flask, request from flask import Flask, request
import sys import sys
if sys.version_info.major == 2: if sys.version_info.major == 2:
...@@ -91,7 +90,58 @@ def serve_args(): ...@@ -91,7 +90,58 @@ def serve_args():
help="container_id for authentication") help="container_id for authentication")
return parser.parse_args() return parser.parse_args()
def start_gpu_card_model(port, args, index = 0, gpuid): # pylint: disable=doc-string-missing def start_standard_model(serving_port): # pylint: disable=doc-string-missing
args = parse_args()
thread_num = args.thread
model = args.model
port = serving_port
workdir = args.workdir
device = args.device
mem_optim = args.mem_optim_off is False
ir_optim = args.ir_optim
max_body_size = args.max_body_size
use_mkl = args.use_mkl
use_encryption_model = args.use_encryption_model
use_multilang = args.use_multilang
if model == "":
print("You must specify your serving model")
exit(-1)
import paddle_serving_server as serving
op_maker = serving.OpMaker()
read_op = op_maker.create('general_reader')
general_infer_op = op_maker.create('general_infer')
general_response_op = op_maker.create('general_response')
op_seq_maker = serving.OpSeqMaker()
op_seq_maker.add_op(read_op)
op_seq_maker.add_op(general_infer_op)
op_seq_maker.add_op(general_response_op)
server = None
if use_multilang:
server = serving.MultiLangServer()
else:
server = serving.Server()
server.set_op_sequence(op_seq_maker.get_op_sequence())
server.set_num_threads(thread_num)
server.set_memory_optimize(mem_optim)
server.set_ir_optimize(ir_optim)
server.use_mkl(use_mkl)
server.set_max_body_size(max_body_size)
server.set_port(port)
server.use_encryption_model(use_encryption_model)
if args.product_name != None:
server.set_product_name(args.product_name)
if args.container_id != None:
server.set_container_id(args.container_id)
server.load_model_config(model)
server.prepare_server(workdir=workdir, port=port, device=device)
server.run_server()
def start_gpu_card_model(index, gpuid, port, args): # pylint: disable=doc-string-missing
workdir = args.workdir workdir = args.workdir
gpuid = int(gpuid) gpuid = int(gpuid)
device = "gpu" device = "gpu"
...@@ -113,7 +163,7 @@ def start_gpu_card_model(port, args, index = 0, gpuid): # pylint: disable=doc-s ...@@ -113,7 +163,7 @@ def start_gpu_card_model(port, args, index = 0, gpuid): # pylint: disable=doc-s
print("You must specify your serving model") print("You must specify your serving model")
exit(-1) exit(-1)
import paddle_serving_server_gpu as serving import paddle_serving_server as serving
op_maker = serving.OpMaker() op_maker = serving.OpMaker()
read_op = op_maker.create('general_reader') read_op = op_maker.create('general_reader')
general_infer_op = op_maker.create('general_infer') general_infer_op = op_maker.create('general_infer')
......
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
import os import os
import logging import logging
import multiprocessing import multiprocessing
#from paddle_serving_server_gpu import OpMaker, OpSeqMaker #from paddle_serving_server import OpMaker, OpSeqMaker
#from paddle_serving_server_gpu import Server as GpuServer #from paddle_serving_server import Server as GpuServer
#from paddle_serving_server import Server as CpuServer #from paddle_serving_server import Server as CpuServer
from . import util from . import util
#from paddle_serving_app.local_predict import LocalPredictor #from paddle_serving_app.local_predict import LocalPredictor
...@@ -235,7 +235,7 @@ class LocalServiceHandler(object): ...@@ -235,7 +235,7 @@ class LocalServiceHandler(object):
server = Server() server = Server()
else: else:
#gpu or arm #gpu or arm
from paddle_serving_server_gpu import OpMaker, OpSeqMaker, Server from paddle_serving_server import OpMaker, OpSeqMaker, Server
op_maker = OpMaker() op_maker = OpMaker()
read_op = op_maker.create('general_reader') read_op = op_maker.create('general_reader')
general_infer_op = op_maker.create('general_infer') general_infer_op = op_maker.create('general_infer')
......
...@@ -19,7 +19,7 @@ from __future__ import print_function ...@@ -19,7 +19,7 @@ from __future__ import print_function
from setuptools import setup, Distribution, Extension from setuptools import setup, Distribution, Extension
from setuptools import find_packages from setuptools import find_packages
from setuptools import setup from setuptools import setup
from paddle_serving_server_gpu.version import serving_server_version, cuda_version from paddle_serving_server.version import serving_server_version, cuda_version
import util import util
if cuda_version != "trt": if cuda_version != "trt":
...@@ -27,34 +27,34 @@ if cuda_version != "trt": ...@@ -27,34 +27,34 @@ if cuda_version != "trt":
max_version, mid_version, min_version = util.python_version() max_version, mid_version, min_version = util.python_version()
# gen pipeline proto code # gen pipeline proto code
util.gen_pipeline_code("paddle_serving_server_gpu") util.gen_pipeline_code("paddle_serving_server")
REQUIRED_PACKAGES = [ REQUIRED_PACKAGES = [
'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio <= 1.33.2', 'grpcio-tools <= 1.33.2', 'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio <= 1.33.2', 'grpcio-tools <= 1.33.2',
'flask >= 1.1.1', 'func_timeout', 'pyyaml' 'flask >= 1.1.1', 'func_timeout', 'pyyaml'
] ]
packages=['paddle_serving_server_gpu', packages=['paddle_serving_server',
'paddle_serving_server_gpu.proto', 'paddle_serving_server.proto',
'paddle_serving_server_gpu.pipeline', 'paddle_serving_server.pipeline',
'paddle_serving_server_gpu.pipeline.proto', 'paddle_serving_server.pipeline.proto',
'paddle_serving_server_gpu.pipeline.gateway', 'paddle_serving_server.pipeline.gateway',
'paddle_serving_server_gpu.pipeline.gateway.proto'] 'paddle_serving_server.pipeline.gateway.proto']
package_dir={'paddle_serving_server_gpu': package_dir={'paddle_serving_server':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu', '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server',
'paddle_serving_server_gpu.proto': 'paddle_serving_server.proto':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto', '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto',
'paddle_serving_server_gpu.pipeline': 'paddle_serving_server.pipeline':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/pipeline', '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/pipeline',
'paddle_serving_server_gpu.pipeline.proto': 'paddle_serving_server.pipeline.proto':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/pipeline/proto', '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/pipeline/proto',
'paddle_serving_server_gpu.pipeline.gateway': 'paddle_serving_server.pipeline.gateway':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/pipeline/gateway', '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/pipeline/gateway',
'paddle_serving_server_gpu.pipeline.gateway.proto': 'paddle_serving_server.pipeline.gateway.proto':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/pipeline/gateway/proto'} '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/pipeline/gateway/proto'}
package_data={'paddle_serving_server_gpu': ['pipeline/gateway/libproxy_server.so'],} package_data={'paddle_serving_server': ['pipeline/gateway/libproxy_server.so'],}
setup( setup(
name='paddle-serving-server-gpu', name='paddle-serving-server-gpu',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册