提交 4493dc04 编写于 作者: T TeslaZhao

supporting local_predictor of pipeline in thread/process mode, fixing pipeline...

supporting local_predictor of pipeline in thread/process mode, fixing pipeline examples, and adding lots of comments
上级 62f73236
rpc_port: 18080
#rpc端口, rpc_port和http_port不允许同时为空。当rpc_port为空且http_port不为空时,会自动将rpc_port设置为http_port+1
rpc_port: 18070
#http端口, rpc_port和http_port不允许同时为空。当rpc_port可用且http_port为空时,不自动生成http_port
http_port: 18071
#worker_num, 最大并发数。当build_dag_each_worker=True时, 框架会创建worker_num个进程,每个进程内构建grpcSever和DAG
#当build_dag_each_worker=False时,框架会设置主线程grpc线程池的max_workers=worker_num
worker_num: 4
build_dag_each_worker: false
#build_dag_each_worker, False,框架在进程内创建一条DAG;True,框架会每个进程内创建多个独立的DAG
build_dag_each_worker: False
dag:
is_thread_op: true
#op资源类型, True, 为线程模型;False,为进程模型
is_thread_op: True
#重试次数
retry: 1
use_profile: false
#使用性能分析, True,生成Timeline性能数据,对性能有一定影响;False为不使用
use_profile: False
#channel的最大长度,默认为0
channel_size: 0
#tracer, 跟踪框架吞吐,每个OP和channel的工作情况。无tracer时不生成数据
tracer:
#每次trace的时间间隔,单位秒/s
interval_s: 10
op:
bow:
concurrency: 2
remote_service_conf:
client_type: brpc
model_config: imdb_bow_model
devices: ""
rpc_port : 9393
#并发数,is_thread_op=True时,为线程并发;否则为进程并发
concurrency: 1
#client连接类型,brpc
client_type: brpc
#Serving交互重试次数,默认不重试
retry: 1
#Serving交互超时时间, 单位ms
timeout: 3000
#Serving IPs
server_endpoints: ["127.0.0.1:9393"]
#bow模型client端配置
client_config: "imdb_bow_client_conf/serving_client_conf.prototxt"
#Fetch结果列表,以client_config中fetch_var的alias_name为准
fetch_list: ["prediction"]
#批量查询Serving的数量, 默认1。batch_size>1要设置auto_batching_timeout,否则不足batch_size时会阻塞
batch_size: 1
#批量查询超时,与batch_size配合使用
auto_batching_timeout: 2000
cnn:
concurrency: 2
remote_service_conf:
client_type: brpc
model_config: imdb_cnn_model
devices: ""
rpc_port : 9292
#并发数,is_thread_op=True时,为线程并发;否则为进程并发
concurrency: 1
#client连接类型,brpc
client_type: brpc
#Serving交互重试次数,默认不重试
retry: 1
#超时时间, 单位ms
timeout: 3000
#Serving IPs
server_endpoints: ["127.0.0.1:9292"]
#cnn模型client端配置
client_config: "imdb_cnn_client_conf/serving_client_conf.prototxt"
#Fetch结果列表,以client_config中fetch_var的alias_name为准
fetch_list: ["prediction"]
#批量查询Serving的数量, 默认1。batch_size>1要设置auto_batching_timeout,否则不足batch_size时会阻塞
batch_size: 1
#批量查询超时,与batch_size配合使用
auto_batching_timeout: 2000
combine:
#并发数,is_thread_op=True时,为线程并发;否则为进程并发
concurrency: 1
#Serving交互重试次数,默认不重试
retry: 1
#超时时间, 单位ms
timeout: 3000
#批量查询Serving的数量, 默认1。batch_size>1要设置auto_batching_timeout,否则不足batch_size时会阻塞
batch_size: 1
#批量查询超时,与batch_size配合使用
auto_batching_timeout: 2000
......@@ -15,21 +15,22 @@ from paddle_serving_server.pipeline import PipelineClient
import numpy as np
client = PipelineClient()
client.connect(['127.0.0.1:18080'])
client.connect(['127.0.0.1:18070'])
words = 'i am very sad | 0'
futures = []
for i in range(4):
for i in range(100):
futures.append(
client.predict(
feed_dict={"words": words},
feed_dict={"words": words,
"logid": 10000 + i},
fetch=["prediction"],
asyn=True,
profile=False))
for f in futures:
res = f.result()
if res["ecode"] != 0:
if res.err_no != 0:
print("predict failed: {}".format(res))
print(res)
......@@ -15,10 +15,14 @@
from paddle_serving_server.pipeline import Op, RequestOp, ResponseOp
from paddle_serving_server.pipeline import PipelineServer
from paddle_serving_server.pipeline.proto import pipeline_service_pb2
from paddle_serving_server.pipeline.channel import ChannelDataEcode
from paddle_serving_server.pipeline.channel import ChannelDataErrcode
import numpy as np
from paddle_serving_app.reader import IMDBDataset
from paddle_serving_app.reader.imdb_reader import IMDBDataset
import logging
try:
from paddle_serving_server.web_service import WebService
except ImportError:
from paddle_serving_server_gpu.web_service import WebService
_LOGGER = logging.getLogger()
user_handler = logging.StreamHandler()
......@@ -43,76 +47,66 @@ class ImdbRequestOp(RequestOp):
word_ids, _ = self.imdb_dataset.get_words_and_label(words)
word_len = len(word_ids)
dictdata[key] = np.array(word_ids).reshape(word_len, 1)
dictdata["{}.lod".format(key)] = [0, word_len]
return dictdata
dictdata["{}.lod".format(key)] = np.array([0, word_len])
log_id = None
if request.logid is not None:
log_id = request.logid
return dictdata, log_id, None, ""
class CombineOp(Op):
def preprocess(self, input_data):
def preprocess(self, input_data, data_id, log_id):
#_LOGGER.info("Enter CombineOp::preprocess")
combined_prediction = 0
for op_name, data in input_data.items():
_LOGGER.info("{}: {}".format(op_name, data["prediction"]))
combined_prediction += data["prediction"]
data = {"prediction": combined_prediction / 2}
return data
return data, False, None, ""
class ImdbResponseOp(ResponseOp):
# Here ImdbResponseOp is consistent with the default ResponseOp implementation
def pack_response_package(self, channeldata):
resp = pipeline_service_pb2.Response()
resp.ecode = channeldata.ecode
if resp.ecode == ChannelDataEcode.OK.value:
resp.err_no = channeldata.error_code
if resp.err_no == ChannelDataErrcode.OK.value:
feed = channeldata.parse()
# ndarray to string
for name, var in feed.items():
resp.value.append(var.__repr__())
resp.key.append(name)
else:
resp.error_info = channeldata.error_info
resp.err_msg = channeldata.error_info
return resp
read_op = ImdbRequestOp()
bow_op = Op(name="bow",
input_ops=[read_op],
server_endpoints=["127.0.0.1:9393"],
fetch_list=["prediction"],
client_config="imdb_bow_client_conf/serving_client_conf.prototxt",
client_type='brpc',
concurrency=1,
timeout=-1,
retry=1,
batch_size=1,
auto_batching_timeout=None)
cnn_op = Op(name="cnn",
input_ops=[read_op],
server_endpoints=["127.0.0.1:9292"],
fetch_list=["prediction"],
client_config="imdb_cnn_client_conf/serving_client_conf.prototxt",
client_type='brpc',
concurrency=1,
timeout=-1,
retry=1,
batch_size=1,
auto_batching_timeout=None)
combine_op = CombineOp(
name="combine",
input_ops=[bow_op, cnn_op],
concurrency=1,
timeout=-1,
retry=1,
batch_size=2,
auto_batching_timeout=None)
class BowOp(Op):
def init_op(self):
pass
class CnnOp(Op):
def init_op(self):
pass
bow_op = BowOp("bow", input_ops=[read_op])
cnn_op = CnnOp("cnn", input_ops=[read_op])
combine_op = CombineOp("combine", input_ops=[bow_op, cnn_op])
# fetch output of bow_op
# response_op = ImdbResponseOp(input_ops=[bow_op])
#response_op = ImdbResponseOp(input_ops=[bow_op])
# fetch output of combine_op
response_op = ImdbResponseOp(input_ops=[combine_op])
# use default ResponseOp implementation
# response_op = ResponseOp(input_ops=[combine_op])
#response_op = ResponseOp(input_ops=[combine_op])
server = PipelineServer()
server.set_response_op(response_op)
......
......@@ -28,31 +28,9 @@ python web_service.py &>log.txt &
python pipeline_http_client.py
```
<!--
## More (PipelineServing)
You can choose one of the following versions to start Service.
### Remote Service Version
```
python -m paddle_serving_server_gpu.serve --model ocr_det_model --port 12000 --gpu_id 0 &> det.log &
python -m paddle_serving_server_gpu.serve --model ocr_rec_model --port 12001 --gpu_id 0 &> rec.log &
python remote_service_pipeline_server.py &>pipeline.log &
```
### Local Service Version
```
python local_service_pipeline_server.py &>pipeline.log &
```
### Hybrid Service Version
```
python -m paddle_serving_server_gpu.serve --model ocr_rec_model --port 12001 --gpu_id 0 &> rec.log &
python hybrid_service_pipeline_server.py &>pipeline.log &
```
## Client Prediction
### RPC
......
......@@ -31,26 +31,6 @@ python pipeline_http_client.py
<!--
## 其他 (PipelineServing)
你可以选择下面任意一种版本启动服务。
### 远程服务版本
```
python -m paddle_serving_server.serve --model ocr_det_model --port 12000 --gpu_id 0 &> det.log &
python -m paddle_serving_server.serve --model ocr_rec_model --port 12001 --gpu_id 0 &> rec.log &
python remote_service_pipeline_server.py &>pipeline.log &
```
### 本地服务版本
```
python local_service_pipeline_server.py &>pipeline.log &
```
### 混合服务版本
```
python -m paddle_serving_server_gpu.serve --model ocr_rec_model --port 12001 --gpu_id 0 &> rec.log &
python hybrid_service_pipeline_server.py &>pipeline.log &
```
## 启动客户端
### RPC
......
rpc_port: 18080
worker_num: 4
build_dag_each_worker: false
#rpc端口, rpc_port和http_port不允许同时为空。当rpc_port为空且http_port不为空时,会自动将rpc_port设置为http_port+1
rpc_port: 18090
#http端口, rpc_port和http_port不允许同时为空。当rpc_port可用且http_port为空时,不自动生成http_port
http_port: 9999
#worker_num, 最大并发数。当build_dag_each_worker=True时, 框架会创建worker_num个进程,每个进程内构建grpcSever和DAG
##当build_dag_each_worker=False时,框架会设置主线程grpc线程池的max_workers=worker_num
worker_num: 1
#build_dag_each_worker, False,框架在进程内创建一条DAG;True,框架会每个进程内创建多个独立的DAG
build_dag_each_worker: false
dag:
is_thread_op: false
#op资源类型, True, 为线程模型;False,为进程模型
is_thread_op: False
#重试次数
retry: 1
#使用性能分析, True,生成Timeline性能数据,对性能有一定影响;False为不使用
use_profile: false
op:
det:
#并发数,is_thread_op=True时,为线程并发;否则为进程并发
concurrency: 2
#当op配置没有server_endpoints时,从local_service_conf读取本地服务配置
local_service_conf:
#client类型,包括brpc, grpc和local_predictor.local_predictor不启动Serving服务,进程内预测
client_type: local_predictor
#det模型路径
model_config: ocr_det_model
devices: ""
#Fetch结果列表,以client_config中fetch_var的alias_name为准
fetch_list: ["concat_1.tmp_0"]
#计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡
devices: "0"
rec:
concurrency: 1
#并发数,is_thread_op=True时,为线程并发;否则为进程并发
concurrency: 2
#超时时间, 单位ms
timeout: -1
#Serving交互重试次数,默认不重试
retry: 1
#当op配置没有server_endpoints时,从local_service_conf读取本地服务配置
local_service_conf:
#client类型,包括brpc, grpc和local_predictor。local_predictor不启动Serving服务,进程内预测
client_type: local_predictor
#rec模型路径
model_config: ocr_rec_model
devices: ""
#Fetch结果列表,以client_config中fetch_var的alias_name为准
fetch_list: ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
#计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡
devices: "0"
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_server_gpu.pipeline import Op, RequestOp, ResponseOp
from paddle_serving_server_gpu.pipeline import PipelineServer
from paddle_serving_server_gpu.pipeline.proto import pipeline_service_pb2
from paddle_serving_server_gpu.pipeline.channel import ChannelDataEcode
from paddle_serving_server_gpu.pipeline import LocalRpcServiceHandler
import numpy as np
import cv2
import time
import base64
import json
from paddle_serving_app.reader import OCRReader
from paddle_serving_app.reader import Sequential, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
import time
import re
import base64
import logging
_LOGGER = logging.getLogger()
class DetOp(Op):
def init_op(self):
self.det_preprocess = Sequential([
ResizeByFactor(32, 960), Div(255),
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose(
(2, 0, 1))
])
self.filter_func = FilterBoxes(10, 10)
self.post_func = DBPostProcess({
"thresh": 0.3,
"box_thresh": 0.5,
"max_candidates": 1000,
"unclip_ratio": 1.5,
"min_size": 3
})
def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items()
data = base64.b64decode(input_dict["image"].encode('utf8'))
data = np.fromstring(data, np.uint8)
# Note: class variables(self.var) can only be used in process op mode
self.im = cv2.imdecode(data, cv2.IMREAD_COLOR)
self.ori_h, self.ori_w, _ = self.im.shape
det_img = self.det_preprocess(self.im)
_, self.new_h, self.new_w = det_img.shape
return {"image": det_img}
def postprocess(self, input_dicts, fetch_dict):
det_out = fetch_dict["concat_1.tmp_0"]
ratio_list = [
float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
]
dt_boxes_list = self.post_func(det_out, [ratio_list])
dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w])
out_dict = {"dt_boxes": dt_boxes, "image": self.im}
return out_dict
class RecOp(Op):
def init_op(self):
self.ocr_reader = OCRReader()
self.get_rotate_crop_image = GetRotateCropImage()
self.sorted_boxes = SortedBoxes()
def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items()
im = input_dict["image"]
dt_boxes = input_dict["dt_boxes"]
dt_boxes = self.sorted_boxes(dt_boxes)
feed_list = []
img_list = []
max_wh_ratio = 0
for i, dtbox in enumerate(dt_boxes):
boximg = self.get_rotate_crop_image(im, dt_boxes[i])
img_list.append(boximg)
h, w = boximg.shape[0:2]
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
for img in img_list:
norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
feed = {"image": norm_img}
feed_list.append(feed)
return feed_list
def postprocess(self, input_dicts, fetch_dict):
rec_res = self.ocr_reader.postprocess(fetch_dict, with_score=True)
res_lst = []
for res in rec_res:
res_lst.append(res[0])
res = {"res": str(res_lst)}
return res
read_op = RequestOp()
det_op = DetOp(
name="det",
input_ops=[read_op],
local_rpc_service_handler=LocalRpcServiceHandler(
model_config="ocr_det_model",
workdir="det_workdir", # defalut: "workdir"
thread_num=2, # defalut: 2
devices="0", # gpu0. defalut: "" (cpu)
mem_optim=True, # defalut: True
ir_optim=False, # defalut: False
available_port_generator=None), # defalut: None
concurrency=1)
rec_op = RecOp(
name="rec",
input_ops=[det_op],
server_endpoints=["127.0.0.1:12001"],
fetch_list=["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"],
client_config="ocr_rec_client/serving_client_conf.prototxt",
concurrency=1)
response_op = ResponseOp(input_ops=[rec_op])
server = PipelineServer("ocr")
server.set_response_op(response_op)
server.prepare_server('config.yml')
server.run_server()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_server.pipeline import Op, RequestOp, ResponseOp
from paddle_serving_server.pipeline import PipelineServer
from paddle_serving_server.pipeline.proto import pipeline_service_pb2
from paddle_serving_server.pipeline.channel import ChannelDataEcode
from paddle_serving_server.pipeline import LocalServiceHandler
import numpy as np
import cv2
import time
import base64
import json
from paddle_serving_app.reader import OCRReader
from paddle_serving_app.reader import Sequential, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
import time
import re
import base64
import logging
_LOGGER = logging.getLogger()
class DetOp(Op):
def init_op(self):
self.det_preprocess = Sequential([
ResizeByFactor(32, 960), Div(255),
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose(
(2, 0, 1))
])
self.filter_func = FilterBoxes(10, 10)
self.post_func = DBPostProcess({
"thresh": 0.3,
"box_thresh": 0.5,
"max_candidates": 1000,
"unclip_ratio": 1.5,
"min_size": 3
})
def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items()
data = base64.b64decode(input_dict["image"].encode('utf8'))
data = np.fromstring(data, np.uint8)
# Note: class variables(self.var) can only be used in process op mode
self.im = cv2.imdecode(data, cv2.IMREAD_COLOR)
print(self.im)
self.ori_h, self.ori_w, _ = self.im.shape
det_img = self.det_preprocess(self.im)
_, self.new_h, self.new_w = det_img.shape
print("image", det_img)
return {"image": det_img}
def postprocess(self, input_dicts, fetch_dict):
det_out = fetch_dict["concat_1.tmp_0"]
ratio_list = [
float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
]
dt_boxes_list = self.post_func(det_out, [ratio_list])
dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w])
out_dict = {"dt_boxes": dt_boxes, "image": self.im}
return out_dict
class RecOp(Op):
def init_op(self):
self.ocr_reader = OCRReader()
self.get_rotate_crop_image = GetRotateCropImage()
self.sorted_boxes = SortedBoxes()
def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items()
im = input_dict["image"]
dt_boxes = input_dict["dt_boxes"]
dt_boxes = self.sorted_boxes(dt_boxes)
feed_list = []
img_list = []
max_wh_ratio = 0
for i, dtbox in enumerate(dt_boxes):
boximg = self.get_rotate_crop_image(im, dt_boxes[i])
img_list.append(boximg)
h, w = boximg.shape[0:2]
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
for img in img_list:
norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
feed = {"image": norm_img}
feed_list.append(feed)
return feed_list
def postprocess(self, input_dicts, fetch_dict):
rec_res = self.ocr_reader.postprocess(fetch_dict, with_score=True)
res_lst = []
for res in rec_res:
res_lst.append(res[0])
res = {"res": str(res_lst)}
return res
read_op = RequestOp()
det_op = DetOp(
name="det",
input_ops=[read_op],
client_type="local_predictor",
local_service_handler=LocalServiceHandler(
model_config="ocr_det_model",
workdir="det_workdir", # defalut: "workdir"
thread_num=2, # defalut: 2
mem_optim=True, # defalut: True
ir_optim=False, # defalut: False
available_port_generator=None), # defalut: None
concurrency=1)
rec_op = RecOp(
name="rec",
input_ops=[det_op],
client_type="local_predictor",
local_service_handler=LocalServiceHandler(model_config="ocr_rec_model"),
concurrency=1)
response_op = ResponseOp(input_ops=[rec_op])
server = PipelineServer("ocr")
server.set_response_op(response_op)
server.prepare_server('config.yml')
server.run_server()
......@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle_serving_server.pipeline import PipelineClient
from paddle_serving_server_gpu.pipeline import PipelineClient
import numpy as np
import requests
import json
......@@ -20,7 +20,7 @@ import base64
import os
client = PipelineClient()
client.connect(['127.0.0.1:18080'])
client.connect(['127.0.0.1:18090'])
def cv2_to_base64(image):
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_server_gpu.pipeline import Op, RequestOp, ResponseOp
from paddle_serving_server_gpu.pipeline import PipelineServer
from paddle_serving_server_gpu.pipeline.proto import pipeline_service_pb2
from paddle_serving_server_gpu.pipeline.channel import ChannelDataEcode
import numpy as np
import cv2
import time
import base64
import json
from paddle_serving_app.reader import OCRReader
from paddle_serving_app.reader import Sequential, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
import time
import re
import base64
import logging
_LOGGER = logging.getLogger()
class DetOp(Op):
def init_op(self):
self.det_preprocess = Sequential([
ResizeByFactor(32, 960), Div(255),
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose(
(2, 0, 1))
])
self.filter_func = FilterBoxes(10, 10)
self.post_func = DBPostProcess({
"thresh": 0.3,
"box_thresh": 0.5,
"max_candidates": 1000,
"unclip_ratio": 1.5,
"min_size": 3
})
def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items()
data = base64.b64decode(input_dict["image"].encode('utf8'))
data = np.fromstring(data, np.uint8)
# Note: class variables(self.var) can only be used in process op mode
self.im = cv2.imdecode(data, cv2.IMREAD_COLOR)
self.ori_h, self.ori_w, _ = self.im.shape
det_img = self.det_preprocess(self.im)
_, self.new_h, self.new_w = det_img.shape
return {"image": det_img}
def postprocess(self, input_dicts, fetch_dict):
det_out = fetch_dict["concat_1.tmp_0"]
ratio_list = [
float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
]
dt_boxes_list = self.post_func(det_out, [ratio_list])
dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w])
out_dict = {"dt_boxes": dt_boxes, "image": self.im}
return out_dict
class RecOp(Op):
def init_op(self):
self.ocr_reader = OCRReader()
self.get_rotate_crop_image = GetRotateCropImage()
self.sorted_boxes = SortedBoxes()
def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items()
im = input_dict["image"]
dt_boxes = input_dict["dt_boxes"]
dt_boxes = self.sorted_boxes(dt_boxes)
feed_list = []
img_list = []
max_wh_ratio = 0
for i, dtbox in enumerate(dt_boxes):
boximg = self.get_rotate_crop_image(im, dt_boxes[i])
img_list.append(boximg)
h, w = boximg.shape[0:2]
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
for img in img_list:
norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
feed = {"image": norm_img}
feed_list.append(feed)
return feed_list
def postprocess(self, input_dicts, fetch_dict):
rec_res = self.ocr_reader.postprocess(fetch_dict, with_score=True)
res_lst = []
for res in rec_res:
res_lst.append(res[0])
res = {"res": str(res_lst)}
return res
read_op = RequestOp()
det_op = DetOp(
name="det",
input_ops=[read_op],
server_endpoints=["127.0.0.1:12000"],
fetch_list=["concat_1.tmp_0"],
client_config="ocr_det_client/serving_client_conf.prototxt",
concurrency=1)
rec_op = RecOp(
name="rec",
input_ops=[det_op],
server_endpoints=["127.0.0.1:12001"],
fetch_list=["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"],
client_config="ocr_rec_client/serving_client_conf.prototxt",
concurrency=1)
response_op = ResponseOp(input_ops=[rec_op])
server = PipelineServer("ocr")
server.set_response_op(response_op)
server.prepare_server('config.yml')
server.run_server()
......@@ -14,7 +14,7 @@
try:
from paddle_serving_server.web_service import WebService, Op
except ImportError:
from paddle_serving_server.web_service import WebService, Op
from paddle_serving_server_gpu.web_service import WebService, Op
import logging
import numpy as np
import cv2
......@@ -43,7 +43,7 @@ class DetOp(Op):
"min_size": 3
})
def preprocess(self, input_dicts):
def preprocess(self, input_dicts, data_id, log_id):
(_, input_dict), = input_dicts.items()
data = base64.b64decode(input_dict["image"].encode('utf8'))
data = np.fromstring(data, np.uint8)
......@@ -52,9 +52,9 @@ class DetOp(Op):
self.ori_h, self.ori_w, _ = self.im.shape
det_img = self.det_preprocess(self.im)
_, self.new_h, self.new_w = det_img.shape
return {"image": det_img[np.newaxis, :]}
return {"image": det_img[np.newaxis, :].copy()}, False, None, ""
def postprocess(self, input_dicts, fetch_dict):
def postprocess(self, input_dicts, fetch_dict, log_id):
det_out = fetch_dict["concat_1.tmp_0"]
ratio_list = [
float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
......@@ -63,7 +63,7 @@ class DetOp(Op):
dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w])
out_dict = {"dt_boxes": dt_boxes, "image": self.im}
print("out dict", out_dict)
return out_dict
return out_dict, None, ""
class RecOp(Op):
......@@ -72,7 +72,7 @@ class RecOp(Op):
self.get_rotate_crop_image = GetRotateCropImage()
self.sorted_boxes = SortedBoxes()
def preprocess(self, input_dicts):
def preprocess(self, input_dicts, data_id, log_id):
(_, input_dict), = input_dicts.items()
im = input_dict["image"]
dt_boxes = input_dict["dt_boxes"]
......@@ -93,15 +93,15 @@ class RecOp(Op):
norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
imgs[id] = norm_img
feed = {"image": imgs.copy()}
return feed
return feed, False, None, ""
def postprocess(self, input_dicts, fetch_dict):
def postprocess(self, input_dicts, fetch_dict, log_id):
rec_res = self.ocr_reader.postprocess(fetch_dict, with_score=True)
res_lst = []
for res in rec_res:
res_lst.append(res[0])
res = {"res": str(res_lst)}
return res
return res, None, ""
class OcrService(WebService):
......@@ -112,5 +112,5 @@ class OcrService(WebService):
uci_service = OcrService(name="ocr")
uci_service.prepare_pipeline_config("brpc_config.yml")
uci_service.prepare_pipeline_config("config.yml")
uci_service.run_service()
......@@ -15,5 +15,5 @@ python web_service.py &>log.txt &
## Http test
```
curl -X POST -k http://localhost:18080/uci/prediction -d '{"key": ["x"], "value": ["0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332"]}'
curl -X POST -k http://localhost:18082/uci/prediction -d '{"key": ["x"], "value": ["0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332"]}'
```
......@@ -15,5 +15,5 @@ python web_service.py &>log.txt &
## 测试
```
curl -X POST -k http://localhost:18080/uci/prediction -d '{"key": ["x"], "value": ["0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332"]}'
curl -X POST -k http://localhost:18082/uci/prediction -d '{"key": ["x"], "value": ["0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332"]}'
```
worker_num: 4
http_port: 18080
#worker_num, 最大并发数。当build_dag_each_worker=True时, 框架会创建worker_num个进程,每个进程内构建grpcSever和DAG
##当build_dag_each_worker=False时,框架会设置主线程grpc线程池的max_workers=worker_num
worker_num: 1
#http端口, rpc_port和http_port不允许同时为空。当rpc_port可用且http_port为空时,不自动生成http_port
http_port: 18082
dag:
is_thread_op: false
#op资源类型, True, 为线程模型;False,为进程模型
is_thread_op: False
op:
uci:
#当op配置没有server_endpoints时,从local_service_conf读取本地服务配置
local_service_conf:
#并发数,is_thread_op=True时,为线程并发;否则为进程并发
concurrency: 2
#uci模型路径
model_config: uci_housing_model
devices: "" # "0,1"
client_type: brpc
#计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡
devices: "0" # "0,1"
#client类型,包括brpc, grpc和local_predictor.local_predictor不启动Serving服务,进程内预测
client_type: local_predictor
#Fetch结果列表,以client_config中fetch_var的alias_name为准
fetch_list: ["price"]
......@@ -25,20 +25,25 @@ class UciOp(Op):
def init_op(self):
self.separator = ","
def preprocess(self, input_dicts):
def preprocess(self, input_dicts, data_id, log_id):
(_, input_dict), = input_dicts.items()
_LOGGER.info(input_dict)
_LOGGER.error("UciOp::preprocess >>> log_id:{}, input:{}".format(
log_id, input_dict))
x_value = input_dict["x"]
proc_dict = {}
if isinstance(x_value, (str, unicode)):
input_dict["x"] = np.array(
[float(x.strip())
for x in x_value.split(self.separator)]).reshape(1, 13)
return input_dict
_LOGGER.error("input_dict:{}".format(input_dict))
def postprocess(self, input_dicts, fetch_dict):
# _LOGGER.info(fetch_dict)
return input_dict, False, None, ""
def postprocess(self, input_dicts, fetch_dict, log_id):
_LOGGER.info("UciOp::postprocess >>> log_id:{}, fetch_dict:{}".format(
log_id, fetch_dict))
fetch_dict["price"] = str(fetch_dict["price"][0][0])
return fetch_dict
return fetch_dict, None, ""
class UciService(WebService):
......
......@@ -32,6 +32,12 @@ logger.setLevel(logging.INFO)
class LocalPredictor(object):
"""
Prediction in the current process of the local environment, in process
call, Compared with RPC/HTTP, LocalPredictor has better performance,
because of no network and packaging load.
"""
def __init__(self):
self.feed_names_ = []
self.fetch_names_ = []
......@@ -42,13 +48,41 @@ class LocalPredictor(object):
self.fetch_names_to_idx_ = {}
self.fetch_names_to_type_ = {}
def load_model_config(self, model_path, gpu=False, profile=True, cpu_num=1):
def load_model_config(self,
model_path,
use_gpu=False,
gpu_id=0,
use_profile=False,
thread_num=1,
mem_optim=True,
ir_optim=False,
use_trt=False,
use_feed_fetch_ops=False):
"""
Load model config and set the engine config for the paddle predictor
Args:
model_path: model config path.
use_gpu: calculating with gpu, False default.
gpu_id: gpu id, 0 default.
use_profile: use predictor profiles, False default.
thread_num: thread nums, default 1.
mem_optim: memory optimization, True default.
ir_optim: open calculation chart optimization, False default.
use_trt: use nvidia TensorRT optimization, False default
use_feed_fetch_ops: use feed/fetch ops, False default.
"""
client_config = "{}/serving_server_conf.prototxt".format(model_path)
model_conf = m_config.GeneralModelConfig()
f = open(client_config, 'r')
model_conf = google.protobuf.text_format.Merge(
str(f.read()), model_conf)
config = AnalysisConfig(model_path)
logger.info("load_model_config params: model_path:{}, use_gpu:{},\
gpu_id:{}, use_profile:{}, thread_num:{}, mem_optim:{}, ir_optim:{},\
use_trt:{}, use_feed_fetch_ops:{}".format(
model_path, use_gpu, gpu_id, use_profile, thread_num, mem_optim,
ir_optim, use_trt, use_feed_fetch_ops))
self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
......@@ -64,19 +98,43 @@ class LocalPredictor(object):
self.fetch_names_to_idx_[var.alias_name] = i
self.fetch_names_to_type_[var.alias_name] = var.fetch_type
if not gpu:
config.disable_gpu()
else:
config.enable_use_gpu(100, 0)
if profile:
if use_profile:
config.enable_profile()
if mem_optim:
config.enable_memory_optim()
config.switch_ir_optim(ir_optim)
config.set_cpu_math_library_num_threads(thread_num)
config.switch_use_feed_fetch_ops(use_feed_fetch_ops)
config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
config.set_cpu_math_library_num_threads(cpu_num)
config.switch_ir_optim(False)
config.switch_use_feed_fetch_ops(False)
if not use_gpu:
config.disable_gpu()
else:
config.enable_use_gpu(100, gpu_id)
if use_trt:
config.enable_tensorrt_engine(
workspace_size=1 << 20,
max_batch_size=32,
min_subgraph_size=3,
use_static=False,
use_calib_mode=False)
self.predictor = create_paddle_predictor(config)
def predict(self, feed=None, fetch=None, batch=False, log_id=0):
"""
Predict locally
Args:
feed: feed var
fetch: fetch var
batch: batch data or not, False default.If batch is False, a new
dimension is added to header of the shape[np.newaxis].
log_id: for logging
Returns:
fetch_map: dict
"""
if feed is None or fetch is None:
raise ValueError("You should specify feed and fetch for prediction")
fetch_list = []
......
......@@ -18,5 +18,5 @@ from .image_reader import RCNNPostprocess, SegPostprocess, PadStride, BlazeFaceP
from .image_reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
from .lac_reader import LACReader
from .senta_reader import SentaReader
from .imdb_reader import IMDBDataset
#from .imdb_reader import IMDBDataset
from .ocr_reader import OCRReader
......@@ -22,18 +22,17 @@ import yaml
import copy
import argparse
import logging
import paddle.fluid as fluid
import json
FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
logging.basicConfig(level=logging.INFO, format=FORMAT)
logger = logging.getLogger(__name__)
precision_map = {
'trt_int8': fluid.core.AnalysisConfig.Precision.Int8,
'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32,
'trt_fp16': fluid.core.AnalysisConfig.Precision.Half
}
#precision_map = {
# 'trt_int8': fluid.core.AnalysisConfig.Precision.Int8,
# 'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32,
# 'trt_fp16': fluid.core.AnalysisConfig.Precision.Half
#}
class Resize(object):
......
......@@ -57,6 +57,9 @@ class ProductErrCode(enum.Enum):
class ChannelDataType(enum.Enum):
"""
Channel data type
"""
DICT = 0
CHANNEL_NPDATA = 1
ERROR = 2
......@@ -211,9 +214,9 @@ class ChannelData(object):
return 1
def __str__(self):
return "type[{}], error_code[{}], data_id[{}], log_id[{}]".format(
return "type[{}], error_code[{}], data_id[{}], log_id[{}], dict_data[{}]".format(
ChannelDataType(self.datatype).name, self.error_code, self.id,
self.log_id)
self.log_id, str(self.dictdata))
class ProcessChannel(object):
......@@ -310,8 +313,8 @@ class ProcessChannel(object):
def push(self, channeldata, op_name=None):
_LOGGER.debug(
self._log("(data_id={} log_id={}) Op({}) Pushing data".format(
channeldata.id, channeldata.log_id, op_name)))
self._log("(data_id={} log_id={}) Op({}) Enter channel::push".
format(channeldata.id, channeldata.log_id, op_name)))
if len(self._producers) == 0:
_LOGGER.critical(
self._log(
......
......@@ -39,7 +39,23 @@ _LOGGER = logging.getLogger(__name__)
class DAGExecutor(object):
"""
DAG Executor, the service entrance of DAG.
"""
def __init__(self, response_op, server_conf, worker_idx):
"""
Initialize DAGExecutor.
Args:
response_op: Response OP
server_conf: server conf. config.yaml
worker_idx: DAGExecutor index, PipelineServer creates many
DAGExecutors when _build_dag_each_worker is true.
Returns:
None.
"""
build_dag_each_worker = server_conf["build_dag_each_worker"]
server_worker_num = server_conf["worker_num"]
dag_conf = server_conf["dag"]
......@@ -76,7 +92,9 @@ class DAGExecutor(object):
if self._tracer is not None:
self._tracer.start()
# generate id: data_id == request_id == log_id
# generate id
# data_id: Server Unique ID, automatically generated by the framework
# log_id: Trace one product request, can be empty, not unique.
base_counter = 0
gen_id_step = 1
if build_dag_each_worker:
......@@ -96,6 +114,15 @@ class DAGExecutor(object):
self._client_profile_value = "1"
def start(self):
"""
Starting one thread for receiving data from the last channel background.
Args:
None
Returns:
None
"""
self._recive_func = threading.Thread(
target=DAGExecutor._recive_out_channel_func, args=(self, ))
self._recive_func.daemon = True
......@@ -103,11 +130,30 @@ class DAGExecutor(object):
_LOGGER.debug("[DAG Executor] Start recive thread")
def stop(self):
"""
Stopping DAG
Args:
None
Returns:
None
"""
self._dag.stop()
self._dag.join()
_LOGGER.info("[DAG Executor] Stop")
def _get_next_data_id(self):
"""
Generate data_id incrementally and Uniquely
Args:
None
Returns:
data_id: uniq id
cond_v: condition variable
"""
data_id = self._id_generator.next()
cond_v = threading.Condition()
with self._cv_for_cv_pool:
......@@ -116,6 +162,15 @@ class DAGExecutor(object):
return data_id, cond_v
def _set_in_channel(self, in_channel):
"""
Set in_channel of DAG
Args:
in_channel: input channel of DAG
Returns:
None
"""
if not isinstance(in_channel, (ThreadChannel, ProcessChannel)):
_LOGGER.critical("[DAG Executor] Failed to set in_channel: "
"in_channel must be Channel type, but get {}".
......@@ -123,8 +178,18 @@ class DAGExecutor(object):
os._exit(-1)
in_channel.add_producer(self.name)
self._in_channel = in_channel
_LOGGER.info("[DAG] set in channel succ, name [{}]".format(self.name))
def _set_out_channel(self, out_channel):
"""
Set out_channel of DAG
Args:
out_channel: output channel of DAG
Returns:
None
"""
if not isinstance(out_channel, (ThreadChannel, ProcessChannel)):
_LOGGER.critical("[DAG Executor] Failed to set out_channel: "
"must be Channel type, but get {}".format(
......@@ -134,6 +199,17 @@ class DAGExecutor(object):
self._out_channel = out_channel
def _recive_out_channel_func(self):
"""
Receiving data from the output channel, and pushing data into
_fetch_buffer. Function _get_channeldata_from_fetch_buffer gets
data by retry time.
Args:
None
Returns:
None
"""
cv = None
while True:
try:
......@@ -150,7 +226,6 @@ class DAGExecutor(object):
self._fetch_buffer[data_id] = closed_errror_data
cv.notify_all()
break
if len(channeldata_dict) != 1:
_LOGGER.critical(
"[DAG Executor] Failed to fetch result: out_channel "
......@@ -174,6 +249,16 @@ class DAGExecutor(object):
cond_v.notify_all()
def _get_channeldata_from_fetch_buffer(self, data_id, cond_v):
"""
Getting the channel data from _fetch_buffer.
Args:
data_id: search key
cond_v: conditional variable
Returns:
ready_data: one channel data processed
"""
ready_data = None
with cond_v:
......@@ -190,10 +275,20 @@ class DAGExecutor(object):
ready_data = self._fetch_buffer[data_id]
self._cv_pool.pop(data_id)
self._fetch_buffer.pop(data_id)
_LOGGER.debug("(logid={}) [resp thread] Got data".format(data_id))
_LOGGER.debug("(data_id={}) [resp thread] Got data".format(data_id))
return ready_data
def _pack_channeldata(self, rpc_request, data_id):
"""
Unpacking data from RPC request. and creating one channelData.
Args:
rpc_request: one RPC request
data_id: data id, unique
Returns:
ChannelData: one channel data to be processed
"""
dictdata = None
log_id = None
try:
......@@ -215,6 +310,8 @@ class DAGExecutor(object):
# in rpc_request
if prod_errcode is not None:
# product errors occured
_LOGGER.error("unpack_rpc_func prod_errcode:{}".format(
prod_errcode))
return ChannelData(
error_code=ChannelDataErrcode.PRODUCT_ERROR.value,
error_info="",
......@@ -226,8 +323,6 @@ class DAGExecutor(object):
profile_value = None
profile_value = dictdata.get(self._client_profile_key)
client_need_profile = (profile_value == self._client_profile_value)
_LOGGER.debug("(logid={}) Need profile in client: {}".format(
data_id, client_need_profile))
return ChannelData(
datatype=ChannelDataType.DICT.value,
dictdata=dictdata,
......@@ -236,11 +331,26 @@ class DAGExecutor(object):
client_need_profile=client_need_profile)
def call(self, rpc_request):
"""
DAGExcutor enterance function. There are 5 steps:
1._get_next_data_id: Generate an incremental ID
2._pack_channeldata: pack the channel data from request.
3.retry loop:
a. push channel_data into _in_channel
b. get_channeldata_from_fetch_buffer: get results.
4._pack_for_rpc_resp: pack RPC responses
5.profile: generte profile string and pack into response.
Args:
rpc_request: one RPC request
Returns:
rpc_resp: one RPC response
"""
if self._tracer is not None:
trace_buffer = self._tracer.data_buffer()
data_id, cond_v = self._get_next_data_id()
_LOGGER.info("(logid={}) Succ generate id".format(data_id))
start_call, end_call = None, None
if not self._is_thread_op:
......@@ -249,19 +359,33 @@ class DAGExecutor(object):
else:
start_call = self._profiler.record("call_{}#DAG_0".format(data_id))
_LOGGER.debug("(logid={}) Parsing RPC request package".format(data_id))
self._profiler.record("prepack_{}#{}_0".format(data_id, self.name))
req_channeldata = self._pack_channeldata(rpc_request, data_id)
self._profiler.record("prepack_{}#{}_1".format(data_id, self.name))
log_id = req_channeldata.log_id
_LOGGER.info("(data_id={} log_id={}) Succ Generate ID ".format(data_id,
log_id))
resp_channeldata = None
for i in range(self._retry):
_LOGGER.debug("(logid={}) Pushing data into Graph engine".format(
_LOGGER.debug("(data_id={}) Pushing data into Graph engine".format(
data_id))
try:
if req_channeldata is None:
_LOGGER.critical(
"(data_id={} log_id={}) req_channeldata is None"
.format(data_id, log_id))
if not isinstance(self._in_channel,
(ThreadChannel, ProcessChannel)):
_LOGGER.critical(
"(data_id={} log_id={})[DAG Executor] Failed to "
"set in_channel: in_channel must be Channel type, but get {}".
format(data_id, log_id, type(self._in_channel)))
self._in_channel.push(req_channeldata, self.name)
except ChannelStopError:
_LOGGER.debug("[DAG Executor] Stop")
_LOGGER.error("(data_id:{} log_id={})[DAG Executor] Stop".
format(data_id, log_id))
with self._cv_for_cv_pool:
self._cv_pool.pop(data_id)
return self._pack_for_rpc_resp(
......@@ -270,24 +394,29 @@ class DAGExecutor(object):
error_info="dag closed.",
data_id=data_id))
_LOGGER.debug("(logid={}) Wait for Graph engine...".format(data_id))
_LOGGER.debug("(data_id={} log_id={}) Wait for Graph engine...".
format(data_id, log_id))
resp_channeldata = self._get_channeldata_from_fetch_buffer(data_id,
cond_v)
if resp_channeldata.error_code == ChannelDataErrcode.OK.value:
_LOGGER.info("(logid={}) Succ predict".format(data_id))
_LOGGER.info("(data_id={} log_id={}) Succ predict".format(
data_id, log_id))
break
else:
_LOGGER.error("(logid={}) Failed to predict: {}"
.format(data_id, resp_channeldata.error_info))
_LOGGER.error("(data_id={} log_id={}) Failed to predict: {}"
.format(data_id, log_id,
resp_channeldata.error_info))
if resp_channeldata.error_code != ChannelDataErrcode.TIMEOUT.value:
break
if i + 1 < self._retry:
_LOGGER.warning("(logid={}) DAGExecutor retry({}/{})".format(
data_id, i + 1, self._retry))
_LOGGER.warning(
"(data_id={} log_id={}) DAGExecutor retry({}/{})"
.format(data_id, log_id, i + 1, self._retry))
_LOGGER.debug("(logid={}) Packing RPC response package".format(data_id))
_LOGGER.debug("(data_id={} log_id={}) Packing RPC response package"
.format(data_id, log_id))
self._profiler.record("postpack_{}#{}_0".format(data_id, self.name))
rpc_resp = self._pack_for_rpc_resp(resp_channeldata)
self._profiler.record("postpack_{}#{}_1".format(data_id, self.name))
......@@ -323,6 +452,15 @@ class DAGExecutor(object):
return rpc_resp
def _pack_for_rpc_resp(self, channeldata):
"""
Packing one RPC response
Args:
channeldata: one channel data to be packed
Returns:
resp: one RPC response
"""
try:
return self._pack_rpc_func(channeldata)
except Exception as e:
......@@ -333,11 +471,14 @@ class DAGExecutor(object):
resp = pipeline_service_pb2.Response()
resp.err_no = ChannelDataErrcode.RPC_PACKAGE_ERROR.value
resp.err_msg = "rpc package error: {}".format(e)
resp.result = ""
return resp
class DAG(object):
"""
Directed Acyclic Graph(DAG) engine, builds one DAG topology.
"""
def __init__(self, request_name, response_op, use_profile, is_thread_op,
channel_size, build_dag_each_worker, tracer):
self._request_name = request_name
......@@ -353,6 +494,18 @@ class DAG(object):
@staticmethod
def get_use_ops(response_op):
"""
Starting from ResponseOp, recursively traverse the front OPs. Getting
all used ops and the post op list of each op (excluding ResponseOp)
Args:
response_op: ResponseOp
Returns:
used_ops: used ops, set
succ_ops_of_use_op: op and the next op list, dict.
"""
unique_names = set()
used_ops = set()
succ_ops_of_use_op = {} # {op_name: succ_ops}
......@@ -378,6 +531,15 @@ class DAG(object):
return used_ops, succ_ops_of_use_op
def _gen_channel(self, name_gen):
"""
Generate one ThreadChannel or ProcessChannel.
Args:
name_gen: channel name
Returns:
channel: one channel generated
"""
channel = None
if self._is_thread_op:
channel = ThreadChannel(
......@@ -389,11 +551,37 @@ class DAG(object):
return channel
def _gen_virtual_op(self, name_gen):
"""
Generate one virtual Op
Args:
name_gen: Op name
Returns:
vir_op: one virtual Op object.
"""
vir_op = VirtualOp(name=name_gen.next())
_LOGGER.debug("[DAG] Generate virtual_op: {}".format(vir_op.name))
return vir_op
def _topo_sort(self, used_ops, response_op, out_degree_ops):
"""
Topological sort of DAG, creates inverted multi-layers views.
Args:
used_ops: op used in DAG
response_op: response op
out_degree_ops: Next op list for each op, dict. the output of
get_use_ops()
Returns:
dag_views: the inverted hierarchical topology list. examples:
DAG :[A -> B -> C -> E]
\-> D /
dag_views: [[E], [C, D], [B], [A]]
last_op:the last op front of ResponseOp
"""
out_degree_num = {
name: len(ops)
for name, ops in out_degree_ops.items()
......@@ -437,6 +625,23 @@ class DAG(object):
return dag_views, last_op
def _build_dag(self, response_op):
"""
Building DAG, the most important function in class DAG. Core steps:
1.get_use_ops: Getting used ops, and out degree op list for each op.
2._topo_sort: Topological sort creates inverted multi-layers views.
3.create channels and virtual ops.
Args:
response_op: ResponseOp
Returns:
actual_ops: all OPs used in DAG, including virtual OPs
channels: all channels used in DAG
input_channel: the channel of first OP
output_channel: the channel of last OP
pack_func: pack_response_package function of response_op
unpack_func: unpack_request_package function of request_op
"""
if response_op is None:
_LOGGER.critical("Failed to build DAG: ResponseOp"
" has not been set.")
......@@ -562,6 +767,18 @@ class DAG(object):
return self._channels
def build(self):
"""
Interface for building one DAG outside.
Args:
None
Returns:
_input_channel: the channel of first OP
_output_channel: the channel of last OP
_pack_func: pack_response_package function of response_op
_unpack_func: unpack_request_package function of request_op
"""
(actual_ops, channels, input_channel, output_channel, pack_func,
unpack_func) = self._build_dag(self._response_op)
_LOGGER.info("[DAG] Succ build DAG")
......@@ -579,6 +796,15 @@ class DAG(object):
return self._input_channel, self._output_channel, self._pack_func, self._unpack_func
def start(self):
"""
Each OP starts a thread or process by _is_thread_op
Args:
None
Returns:
_threads_or_proces: threads or process list.
"""
self._threads_or_proces = []
for op in self._actual_ops:
op.use_profiler(self._use_profile)
......@@ -593,11 +819,29 @@ class DAG(object):
return self._threads_or_proces
def join(self):
"""
All threads or processes join.
Args:
None
Returns:
None
"""
for x in self._threads_or_proces:
if x is not None:
x.join()
def stop(self):
"""
Stopping and cleanning all channels.
Args:
None
Returns:
None
"""
for chl in self._channels:
chl.stop()
for op in self._actual_ops:
......
......@@ -21,19 +21,17 @@ import "google/api/annotations.proto";
message Response {
int32 err_no = 1;
string err_msg = 2;
string result = 3;
repeated string key = 3;
repeated string value = 4;
};
message Request {
string name = 1;
string method = 2;
string appid = 3;
int64 logid = 4;
string format = 5;
string from = 6;
string cmdid = 7;
string clientip = 8;
string data = 9;
repeated string key = 1;
repeated string value = 2;
string name = 3;
string method = 4;
int64 logid = 5;
string clientip = 6;
};
service PipelineService {
......
......@@ -15,111 +15,203 @@
import os
import logging
import multiprocessing
try:
from paddle_serving_server_gpu import OpMaker, OpSeqMaker, Server
PACKAGE_VERSION = "GPU"
except ImportError:
from paddle_serving_server import OpMaker, OpSeqMaker, Server
PACKAGE_VERSION = "CPU"
#from paddle_serving_server_gpu import OpMaker, OpSeqMaker
#from paddle_serving_server_gpu import Server as GpuServer
#from paddle_serving_server import Server as CpuServer
from . import util
from paddle_serving_app.local_predict import LocalPredictor
#from paddle_serving_app.local_predict import LocalPredictor
_LOGGER = logging.getLogger(__name__)
_workdir_name_gen = util.NameGenerator("workdir_")
class LocalServiceHandler(object):
"""
LocalServiceHandler is the processor of the local service, contains
three client types, brpc, grpc and local_predictor.If you use the
brpc or grpc, serveing startup ability is provided.If you use
local_predictor, local predict ability is provided by paddle_serving_app.
"""
def __init__(self,
model_config,
client_type='local_predictor',
workdir="",
thread_num=2,
devices="",
fetch_names=None,
mem_optim=True,
ir_optim=False,
available_port_generator=None):
available_port_generator=None,
use_trt=False,
use_profile=False):
"""
Initialization of localservicehandler
Args:
model_config: model config path
client_type: brpc, grpc and local_predictor[default]
workdir: work directory
thread_num: number of threads, concurrent quantity.
devices: gpu id list[gpu], "" default[cpu]
fetch_names: get fetch names out of LocalServiceHandler in
local_predictor mode. fetch_names_ is compatible for Client().
mem_optim: use memory/graphics memory optimization, True default.
ir_optim: use calculation chart optimization, False default.
available_port_generator: generate available ports
use_trt: use nvidia tensorRt engine, False default.
use_profile: use profiling, False default.
Returns:
None
"""
if available_port_generator is None:
available_port_generator = util.GetAvailablePortGenerator()
self._model_config = model_config
self._port_list = []
self._device_type = "cpu"
if devices == "":
# cpu
devices = [-1]
self._device_type = "cpu"
self._port_list.append(available_port_generator.next())
_LOGGER.info("Model({}) will be launch in cpu device. Port({})"
.format(model_config, self._port_list))
else:
# gpu
if PACKAGE_VERSION == "CPU":
raise ValueError(
"You are using the CPU version package("
"paddle-serving-server), unable to set devices")
self._device_type = "gpu"
devices = [int(x) for x in devices.split(",")]
for _ in devices:
self._port_list.append(available_port_generator.next())
_LOGGER.info("Model({}) will be launch in gpu device: {}. Port({})"
.format(model_config, devices, self._port_list))
self.client_type = client_type
self._client_type = client_type
self._workdir = workdir
self._devices = devices
self._thread_num = thread_num
self._mem_optim = mem_optim
self._ir_optim = ir_optim
self.local_predictor_client = None
self._local_predictor_client = None
self._rpc_service_list = []
self._server_pros = []
self._fetch_vars = None
self._use_trt = use_trt
self._use_profile = use_profile
self.fetch_names_ = fetch_names
def get_fetch_list(self):
return self._fetch_vars
return self.fetch_names_
def get_port_list(self):
return self._port_list
def get_client(self): # for local_predictor_only
if self.local_predictor_client is None:
self.local_predictor_client = LocalPredictor()
self.local_predictor_client.load_model_config(
"{}".format(self._model_config), gpu=False, profile=False)
return self.local_predictor_client
def get_client(self):
"""
Function get_client is only used for local predictor case, creates one
LocalPredictor object, and initializes the paddle predictor by function
load_model_config.
Args:
None
Returns:
_local_predictor_client
"""
from paddle_serving_app.local_predict import LocalPredictor
if self._local_predictor_client is None:
self._local_predictor_client = LocalPredictor()
use_gpu = False
if self._device_type == "gpu":
use_gpu = True
self._local_predictor_client.load_model_config(
model_path=self._model_config,
use_gpu=use_gpu,
gpu_id=self._devices[0],
use_profile=self._use_profile,
thread_num=self._thread_num,
mem_optim=self._mem_optim,
ir_optim=self._ir_optim,
use_trt=self._use_trt)
return self._local_predictor_client
def get_client_config(self):
return os.path.join(self._model_config, "serving_server_conf.prototxt")
def _prepare_one_server(self, workdir, port, gpuid, thread_num, mem_optim,
ir_optim):
device = "gpu"
if gpuid == -1:
device = "cpu"
op_maker = OpMaker()
read_op = op_maker.create('general_reader')
general_infer_op = op_maker.create('general_infer')
general_response_op = op_maker.create('general_response')
op_seq_maker = OpSeqMaker()
op_seq_maker.add_op(read_op)
op_seq_maker.add_op(general_infer_op)
op_seq_maker.add_op(general_response_op)
server = Server()
"""
According to _device_type, generating one CpuServer or GpuServer, and
setting the model config amd startup params.
Args:
workdir: work directory
port: network port
gpuid: gpu id
thread_num: thread num
mem_optim: use memory/graphics memory optimization
ir_optim: use calculation chart optimization
Returns:
server: CpuServer/GpuServer
"""
if self._device_type == "cpu":
from paddle_serving_server import OpMaker, OpSeqMaker, Server
op_maker = OpMaker()
read_op = op_maker.create('general_reader')
general_infer_op = op_maker.create('general_infer')
general_response_op = op_maker.create('general_response')
op_seq_maker = OpSeqMaker()
op_seq_maker.add_op(read_op)
op_seq_maker.add_op(general_infer_op)
op_seq_maker.add_op(general_response_op)
server = Server()
else:
#gpu
from paddle_serving_server_gpu import OpMaker, OpSeqMaker, Server
op_maker = OpMaker()
read_op = op_maker.create('general_reader')
general_infer_op = op_maker.create('general_infer')
general_response_op = op_maker.create('general_response')
op_seq_maker = OpSeqMaker()
op_seq_maker.add_op(read_op)
op_seq_maker.add_op(general_infer_op)
op_seq_maker.add_op(general_response_op)
server = Server()
if gpuid >= 0:
server.set_gpuid(gpuid)
server.set_op_sequence(op_seq_maker.get_op_sequence())
server.set_num_threads(thread_num)
server.set_memory_optimize(mem_optim)
server.set_ir_optimize(ir_optim)
server.load_model_config(self._model_config)
if gpuid >= 0:
server.set_gpuid(gpuid)
server.prepare_server(workdir=workdir, port=port, device=device)
if self._fetch_vars is None:
self._fetch_vars = server.get_fetch_list()
server.prepare_server(
workdir=workdir, port=port, device=self._device_type)
if self.fetch_names_ is None:
self.fetch_names_ = server.get_fetch_list()
return server
def _start_one_server(self, service_idx):
"""
Start one server
Args:
service_idx: server index
Returns:
None
"""
self._rpc_service_list[service_idx].run_server()
def prepare_server(self):
"""
Prepare all servers to be started, and append them into list.
"""
for i, device_id in enumerate(self._devices):
if self._workdir != "":
workdir = "{}_{}".format(self._workdir, i)
......@@ -135,6 +227,9 @@ class LocalServiceHandler(object):
ir_optim=self._ir_optim))
def start_server(self):
"""
Start multiple processes and start one server in each process
"""
for i, service in enumerate(self._rpc_service_list):
p = multiprocessing.Process(
target=self._start_one_server, args=(i, ))
......
此差异已折叠。
......@@ -19,6 +19,7 @@ from numpy import *
import logging
import functools
import json
import socket
from .channel import ChannelDataErrcode
from .proto import pipeline_service_pb2
from .proto import pipeline_service_pb2_grpc
......@@ -27,6 +28,10 @@ _LOGGER = logging.getLogger(__name__)
class PipelineClient(object):
"""
PipelineClient provides the basic capabilities of the pipeline SDK
"""
def __init__(self):
self._channel = None
self._profile_key = "pipeline.profile"
......@@ -43,29 +48,38 @@ class PipelineClient(object):
def _pack_request_package(self, feed_dict, profile):
req = pipeline_service_pb2.Request()
"""
logid = feed_dict.get("logid")
if logid is None:
req.logid = 0
else:
req.logid = long(logid)
feed_dict.pop("logid")
clientip = feed_dict.get("clientip")
if clientip is None:
hostname = socket.gethostname()
ip = socket.gethostbyname(hostname)
req.clientip = ip
else:
req.clientip = clientip
feed_dict.pop("clientip")
np.set_printoptions(threshold=sys.maxsize)
new_dict = {}
for key, value in feed_dict.items():
req.key.append(key)
if isinstance(value, np.ndarray):
new_dict[key] = value.__repr__()
req.value.append(value.__repr__())
elif isinstance(value, (str, unicode)):
new_dict[key] = value
req.value.append(value)
elif isinstance(value, list):
new_dict[key] = np.array(value).__repr__()
req.value.append(np.array(value).__repr__())
else:
raise TypeError("only str and np.ndarray type is supported: {}".
format(type(value)))
if profile:
new_dict[self._profile_key] = self._profile_value
"""
req.appid = feed_dict.get("appid")
req.logid = feed_dict.get("logid")
req.format = feed_dict.get("format")
setattr(req, "from", feed_dict.get("from"))
req.cmdid = feed_dict.get("cmdid")
req.clientip = feed_dict.get("clientip")
req.data = feed_dict.get("data")
req.key.append(self._profile_key)
req.value.append(self._profile_value)
return req
def _unpack_response_package(self, resp, fetch):
......
......@@ -32,6 +32,10 @@ _LOGGER = logging.getLogger(__name__)
class PipelineServicer(pipeline_service_pb2_grpc.PipelineServiceServicer):
"""
Pipeline Servicer entrance.
"""
def __init__(self, name, response_op, dag_conf, worker_idx=-1):
super(PipelineServicer, self).__init__()
self._name = name
......@@ -42,9 +46,12 @@ class PipelineServicer(pipeline_service_pb2_grpc.PipelineServiceServicer):
_LOGGER.info("[PipelineServicer] succ init")
def inference(self, request, context):
_LOGGER.info("inference request name:{} self.name:{}".format(
request.name, self._name))
_LOGGER.info("(log_id={}) inference request name:{} self.name:{}".
format(request.logid, request.name, self._name))
if request.name != "" and request.name != self._name:
_LOGGER.error("(log_id={}) name dismatch error. request.name:{},"
"server.name={}".format(request.logid, request.name,
self._name))
resp = pipeline_service_pb2.Response()
resp.err_no = channel.ChannelDataErrcode.NO_SERVICE.value
resp.err_msg = "Failed to inference: Service name error."
......@@ -56,7 +63,9 @@ class PipelineServicer(pipeline_service_pb2_grpc.PipelineServiceServicer):
@contextlib.contextmanager
def _reserve_port(port):
"""Find and reserve a port for all subprocesses to use."""
"""
Find and reserve a port for all subprocesses to use.
"""
sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
if sock.getsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT) == 0:
......@@ -69,6 +78,10 @@ def _reserve_port(port):
class PipelineServer(object):
"""
Pipeline Server : grpc gateway + grpc server.
"""
def __init__(self, name=None):
self._name = name # for grpc-gateway path
self._rpc_port = None
......@@ -77,6 +90,16 @@ class PipelineServer(object):
self._proxy_server = None
def _grpc_gateway(self, grpc_port, http_port):
"""
Running a gateway server, linking libproxy_server.so
Args:
grpc_port: GRPC port
http_port: HTTP port
Returns:
None
"""
import os
from ctypes import cdll
from . import gateway
......@@ -86,6 +109,17 @@ class PipelineServer(object):
proxy_server.run_proxy_server(grpc_port, http_port)
def _run_grpc_gateway(self, grpc_port, http_port):
"""
Starting the GRPC gateway in a new process. Exposing one
available HTTP port outside, and reflecting the data to RPC port.
Args:
grpc_port: GRPC port
http_port: HTTP port
Returns:
None
"""
if http_port <= 0:
_LOGGER.info("Ignore grpc_gateway configuration.")
return
......@@ -102,6 +136,15 @@ class PipelineServer(object):
self._proxy_server.start()
def set_response_op(self, response_op):
"""
Set the response OP.
Args:
response_op: ResponseOp or its subclass object
Returns:
None
"""
if not isinstance(response_op, operator.ResponseOp):
raise Exception("Failed to set response_op: response_op "
"must be ResponseOp type.")
......@@ -112,6 +155,17 @@ class PipelineServer(object):
self._used_op, _ = dag.DAG.get_use_ops(self._response_op)
def prepare_server(self, yml_file=None, yml_dict=None):
"""
Reading configures from the yml file(config.yaml), and launching
local services.
Args:
yml_file: Reading configures from yaml files
yml_dict: Reading configures from yaml dict.
Returns:
None
"""
conf = ServerYamlConfChecker.load_server_yaml_conf(
yml_file=yml_file, yml_dict=yml_dict)
......@@ -161,6 +215,15 @@ class PipelineServer(object):
self._start_local_rpc_service()
def _init_ops(self, op_conf):
"""
Initializing all OPs from dicetory.
Args:
op_conf: the op configures in yaml dict.
Returns:
None.
"""
default_conf = {
"concurrency": 1,
"timeout": -1,
......@@ -190,6 +253,17 @@ class PipelineServer(object):
op.launch_local_rpc_service()
def run_server(self):
"""
If _build_dag_each_worker is True, Starting _worker_num processes and
running one GRPC server in each process. Otherwise, Staring one GRPC
server.
Args:
None
Returns:
None
"""
if self._build_dag_each_worker:
with _reserve_port(self._rpc_port) as port:
bind_address = 'localhost:{}'.format(port)
......@@ -222,6 +296,15 @@ class PipelineServer(object):
server.wait_for_termination()
def _run_server_func(self, bind_address, response_op, dag_conf, worker_idx):
"""
Running one GRPC server with PipelineServicer.
Args:
bind_address: binding IP/Port
response_op: ResponseOp or its subclass object
dag_conf: DAG config
worker_idx: Process index.
"""
options = [('grpc.so_reuseport', 1),
('grpc.max_send_message_length', 256 * 1024 * 1024),
('grpc.max_send_message_length', 256 * 1024 * 1024)]
......@@ -237,6 +320,10 @@ class PipelineServer(object):
class ServerYamlConfChecker(object):
"""
Checking validities of server yaml files.
"""
def __init__(self):
pass
......
......@@ -16,21 +16,19 @@ syntax = "proto2";
package baidu.paddle_serving.pipeline_serving;
message Request {
optional string name = 1;
optional string methond = 2;
optional string appid = 3;
optional int64 logid = 4;
optional string format = 5;
optional string from = 6;
optional string cmdid = 7;
optional string clientip = 8;
optional string data = 9;
repeated string key = 1;
repeated string value = 2;
optional string name = 3;
optional string method = 4;
optional int64 logid = 5;
optional string clientip = 6;
};
message Response {
optional int32 err_no = 1;
optional string err_msg = 2;
optional string result = 3;
repeated string key = 3;
repeated string value = 4;
};
service PipelineService {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册