From 761805a12d5a2c4ba2f039fe52c6fa33f41a9bcd Mon Sep 17 00:00:00 2001 From: tink2123 Date: Wed, 12 May 2021 20:19:40 +0800 Subject: [PATCH] add qps --- deploy/pdserving/README.md | 76 +++++++++++++++++++++++++++------ deploy/pdserving/README_CN.md | 72 ++++++++++++++++++++++++++----- deploy/pdserving/config.yml | 8 ++-- deploy/pdserving/ocr_reader.py | 10 ++--- deploy/pdserving/web_service.py | 13 +++--- 5 files changed, 140 insertions(+), 39 deletions(-) diff --git a/deploy/pdserving/README.md b/deploy/pdserving/README.md index 3a38053e..b5d8953d 100644 --- a/deploy/pdserving/README.md +++ b/deploy/pdserving/README.md @@ -70,38 +70,38 @@ When using PaddleServing for service deployment, you need to convert the saved i Firstly, download the [inference model](https://github.com/PaddlePaddle/PaddleOCR#pp-ocr-20-series-model-listupdate-on-dec-15) of PPOCR ``` # Download and unzip the OCR text detection model -wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar && tar xf ch_ppocr_server_v2.0_det_infer.tar +wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_det_infer.tar # Download and unzip the OCR text recognition model -wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar && tar xf ch_ppocr_server_v2.0_rec_infer.tar +wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar ``` -Then, you can use installed paddle_serving_client tool to convert inference model to server model. +Then, you can use installed paddle_serving_client tool to convert inference model to mobile model. ``` # Detection model conversion -python3 -m paddle_serving_client.convert --dirname ./ch_ppocr_server_v2.0_det_infer/ \ +python3 -m paddle_serving_client.convert --dirname ./ch_ppocr_mobile_v2.0_det_infer/ \ --model_filename inference.pdmodel \ --params_filename inference.pdiparams \ - --serving_server ./ppocr_det_server_2.0_serving/ \ - --serving_client ./ppocr_det_server_2.0_client/ + --serving_server ./ppocr_det_mobile_2.0_serving/ \ + --serving_client ./ppocr_det_mobile_2.0_client/ # Recognition model conversion -python3 -m paddle_serving_client.convert --dirname ./ch_ppocr_server_v2.0_rec_infer/ \ +python3 -m paddle_serving_client.convert --dirname ./ch_ppocr_mobile_v2.0_rec_infer/ \ --model_filename inference.pdmodel \ --params_filename inference.pdiparams \ - --serving_server ./ppocr_rec_server_2.0_serving/ \ - --serving_client ./ppocr_rec_server_2.0_client/ + --serving_server ./ppocr_rec_mobile_2.0_serving/ \ + --serving_client ./ppocr_rec_mobile_2.0_client/ ``` -After the detection model is converted, there will be additional folders of `ppocr_det_server_2.0_serving` and `ppocr_det_server_2.0_client` in the current folder, with the following format: +After the detection model is converted, there will be additional folders of `ppocr_det_mobile_2.0_serving` and `ppocr_det_mobile_2.0_client` in the current folder, with the following format: ``` -|- ppocr_det_server_2.0_serving/ +|- ppocr_det_mobile_2.0_serving/ |- __model__ |- __params__ |- serving_server_conf.prototxt |- serving_server_conf.stream.prototxt -|- ppocr_det_server_2.0_client +|- ppocr_det_mobile_2.0_client |- serving_client_conf.prototxt |- serving_client_conf.stream.prototxt @@ -143,6 +143,58 @@ The recognition model is the same. After successfully running, the predicted result of the model will be printed in the cmd window. An example of the result is: ![](./imgs/results.png) + Adjust the number of concurrency in config.yml to get the largest QPS. Generally, the number of concurrent detection and recognition is 2:1 + + ``` + det: + concurrency: 8 + ... + rec: + concurrency: 4 + ... + ``` + + Multiple service requests can be sent at the same time if necessary. + + The predicted performance data will be automatically written into the `PipelineServingLogs/pipeline.tracer` file: + + ``` + 2021-05-12 10:03:24,812 ==================== TRACER ====================== + 2021-05-12 10:03:24,904 Op(rec): + 2021-05-12 10:03:24,904 in[51.5634921875 ms] + 2021-05-12 10:03:24,904 prep[215.310859375 ms] + 2021-05-12 10:03:24,904 midp[33.1617109375 ms] + 2021-05-12 10:03:24,905 postp[10.451234375 ms] + 2021-05-12 10:03:24,905 out[9.736765625 ms] + 2021-05-12 10:03:24,905 idle[0.1914292677880819] + 2021-05-12 10:03:24,905 Op(det): + 2021-05-12 10:03:24,905 in[218.63487096774193 ms] + 2021-05-12 10:03:24,906 prep[357.35925 ms] + 2021-05-12 10:03:24,906 midp[31.47598387096774 ms] + 2021-05-12 10:03:24,906 postp[15.274870967741936 ms] + 2021-05-12 10:03:24,906 out[16.245693548387095 ms] + 2021-05-12 10:03:24,906 idle[0.3675805857279226] + 2021-05-12 10:03:24,906 DAGExecutor: + 2021-05-12 10:03:24,906 Query count[128] + 2021-05-12 10:03:24,906 QPS[12.8 q/s] + 2021-05-12 10:03:24,906 Succ[1.0] + 2021-05-12 10:03:24,907 Error req[] + 2021-05-12 10:03:24,907 Latency: + 2021-05-12 10:03:24,907 ave[798.6557734374998 ms] + 2021-05-12 10:03:24,907 .50[867.936 ms] + 2021-05-12 10:03:24,907 .60[914.507 ms] + 2021-05-12 10:03:24,907 .70[961.064 ms] + 2021-05-12 10:03:24,907 .80[1043.264 ms] + 2021-05-12 10:03:24,907 .90[1117.923 ms] + 2021-05-12 10:03:24,907 .95[1207.056 ms] + 2021-05-12 10:03:24,908 .99[1325.008 ms] + 2021-05-12 10:03:24,908 Channel (server worker num[10]): + 2021-05-12 10:03:24,909 chl0(In: ['@DAGExecutor'], Out: ['det']) size[0/0] + 2021-05-12 10:03:24,909 chl1(In: ['det'], Out: ['rec']) size[1/0] + 2021-05-12 10:03:24,910 chl2(In: ['rec'], Out: ['@DAGExecutor']) size[0/0] + ``` + + ## FAQ **Q1**: No result return after sending the request. diff --git a/deploy/pdserving/README_CN.md b/deploy/pdserving/README_CN.md index 071d2947..37d018d0 100644 --- a/deploy/pdserving/README_CN.md +++ b/deploy/pdserving/README_CN.md @@ -68,38 +68,38 @@ PaddleOCR提供2种服务部署方式: 首先,下载PPOCR的[inference模型](https://github.com/PaddlePaddle/PaddleOCR#pp-ocr-20-series-model-listupdate-on-dec-15) ``` # 下载并解压 OCR 文本检测模型 -wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar && tar xf ch_ppocr_server_v2.0_det_infer.tar +wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_det_infer.tar # 下载并解压 OCR 文本识别模型 -wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar && tar xf ch_ppocr_server_v2.0_rec_infer.tar +wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar ``` 接下来,用安装的paddle_serving_client把下载的inference模型转换成易于server部署的模型格式。 ``` # 转换检测模型 -python3 -m paddle_serving_client.convert --dirname ./ch_ppocr_server_v2.0_det_infer/ \ +python3 -m paddle_serving_client.convert --dirname ./ch_ppocr_mobile_v2.0_det_infer/ \ --model_filename inference.pdmodel \ --params_filename inference.pdiparams \ - --serving_server ./ppocr_det_server_2.0_serving/ \ - --serving_client ./ppocr_det_server_2.0_client/ + --serving_server ./ppocr_det_mobile_2.0_serving/ \ + --serving_client ./ppocr_det_mobile_2.0_client/ # 转换识别模型 -python3 -m paddle_serving_client.convert --dirname ./ch_ppocr_server_v2.0_rec_infer/ \ +python3 -m paddle_serving_client.convert --dirname ./ch_ppocr_mobile_v2.0_rec_infer/ \ --model_filename inference.pdmodel \ --params_filename inference.pdiparams \ - --serving_server ./ppocr_rec_server_2.0_serving/ \ - --serving_client ./ppocr_rec_server_2.0_client/ + --serving_server ./ppocr_rec_mobile_2.0_serving/ \ + --serving_client ./ppocr_rec_mobile_2.0_client/ ``` -检测模型转换完成后,会在当前文件夹多出`ppocr_det_server_2.0_serving` 和`ppocr_det_server_2.0_client`的文件夹,具备如下格式: +检测模型转换完成后,会在当前文件夹多出`ppocr_det_mobile_2.0_serving` 和`ppocr_det_mobile_2.0_client`的文件夹,具备如下格式: ``` -|- ppocr_det_server_2.0_serving/ +|- ppocr_det_mobile_2.0_serving/ |- __model__ |- __params__ |- serving_server_conf.prototxt |- serving_server_conf.stream.prototxt -|- ppocr_det_server_2.0_client +|- ppocr_det_mobile_2.0_client |- serving_client_conf.prototxt |- serving_client_conf.stream.prototxt @@ -140,6 +140,56 @@ python3 -m paddle_serving_client.convert --dirname ./ch_ppocr_server_v2.0_rec_in 成功运行后,模型预测的结果会打印在cmd窗口中,结果示例为: ![](./imgs/results.png) + 调整 config.yml 中的并发个数获得最大的QPS, 一般检测和识别的并发数为2:1 + ``` + det: + #并发数,is_thread_op=True时,为线程并发;否则为进程并发 + concurrency: 8 + ... + rec: + #并发数,is_thread_op=True时,为线程并发;否则为进程并发 + concurrency: 4 + ... + ``` + 有需要的话可以同时发送多个服务请求 + + 预测性能数据会被自动写入 `PipelineServingLogs/pipeline.tracer` 文件中: + + ``` + 2021-05-12 10:03:24,812 ==================== TRACER ====================== + 2021-05-12 10:03:24,904 Op(rec): + 2021-05-12 10:03:24,904 in[51.5634921875 ms] + 2021-05-12 10:03:24,904 prep[215.310859375 ms] + 2021-05-12 10:03:24,904 midp[33.1617109375 ms] + 2021-05-12 10:03:24,905 postp[10.451234375 ms] + 2021-05-12 10:03:24,905 out[9.736765625 ms] + 2021-05-12 10:03:24,905 idle[0.1914292677880819] + 2021-05-12 10:03:24,905 Op(det): + 2021-05-12 10:03:24,905 in[218.63487096774193 ms] + 2021-05-12 10:03:24,906 prep[357.35925 ms] + 2021-05-12 10:03:24,906 midp[31.47598387096774 ms] + 2021-05-12 10:03:24,906 postp[15.274870967741936 ms] + 2021-05-12 10:03:24,906 out[16.245693548387095 ms] + 2021-05-12 10:03:24,906 idle[0.3675805857279226] + 2021-05-12 10:03:24,906 DAGExecutor: + 2021-05-12 10:03:24,906 Query count[128] + 2021-05-12 10:03:24,906 QPS[12.8 q/s] + 2021-05-12 10:03:24,906 Succ[1.0] + 2021-05-12 10:03:24,907 Error req[] + 2021-05-12 10:03:24,907 Latency: + 2021-05-12 10:03:24,907 ave[798.6557734374998 ms] + 2021-05-12 10:03:24,907 .50[867.936 ms] + 2021-05-12 10:03:24,907 .60[914.507 ms] + 2021-05-12 10:03:24,907 .70[961.064 ms] + 2021-05-12 10:03:24,907 .80[1043.264 ms] + 2021-05-12 10:03:24,907 .90[1117.923 ms] + 2021-05-12 10:03:24,907 .95[1207.056 ms] + 2021-05-12 10:03:24,908 .99[1325.008 ms] + 2021-05-12 10:03:24,908 Channel (server worker num[10]): + 2021-05-12 10:03:24,909 chl0(In: ['@DAGExecutor'], Out: ['det']) size[0/0] + 2021-05-12 10:03:24,909 chl1(In: ['det'], Out: ['rec']) size[1/0] + 2021-05-12 10:03:24,910 chl2(In: ['rec'], Out: ['@DAGExecutor']) size[0/0] + ``` ## FAQ diff --git a/deploy/pdserving/config.yml b/deploy/pdserving/config.yml index 8ab27dbc..6cac1b9f 100644 --- a/deploy/pdserving/config.yml +++ b/deploy/pdserving/config.yml @@ -26,7 +26,7 @@ dag: op: det: #并发数,is_thread_op=True时,为线程并发;否则为进程并发 - concurrency: 2 + concurrency: 8 #当op配置没有server_endpoints时,从local_service_conf读取本地服务配置 local_service_conf: @@ -34,7 +34,7 @@ op: client_type: local_predictor #det模型路径 - model_config: ./ppocr_det_server_2.0_serving + model_config: ./ppocr_det_mobile_2.0_serving #Fetch结果列表,以client_config中fetch_var的alias_name为准 fetch_list: ["save_infer_model/scale_0.tmp_1"] @@ -45,7 +45,7 @@ op: ir_optim: False rec: #并发数,is_thread_op=True时,为线程并发;否则为进程并发 - concurrency: 1 + concurrency: 4 #超时时间, 单位ms timeout: -1 @@ -60,7 +60,7 @@ op: client_type: local_predictor #rec模型路径 - model_config: ./ppocr_rec_server_2.0_serving + model_config: ./ppocr_rec_mobile_2.0_serving #Fetch结果列表,以client_config中fetch_var的alias_name为准 fetch_list: ["save_infer_model/scale_0.tmp_1"] diff --git a/deploy/pdserving/ocr_reader.py b/deploy/pdserving/ocr_reader.py index 71682e67..3f219784 100644 --- a/deploy/pdserving/ocr_reader.py +++ b/deploy/pdserving/ocr_reader.py @@ -33,12 +33,12 @@ class DetResizeForTest(object): elif 'limit_side_len' in kwargs: self.limit_side_len = kwargs['limit_side_len'] self.limit_type = kwargs.get('limit_type', 'min') - elif 'resize_long' in kwargs: - self.resize_type = 2 - self.resize_long = kwargs.get('resize_long', 960) - else: + elif 'resize_short' in kwargs: self.limit_side_len = 736 self.limit_type = 'min' + else: + self.resize_type = 2 + self.resize_long = kwargs.get('resize_long', 960) def __call__(self, data): img = deepcopy(data) @@ -226,8 +226,6 @@ class CTCLabelDecode(BaseRecLabelDecode): super(CTCLabelDecode, self).__init__(config) def __call__(self, preds, label=None, *args, **kwargs): - #if isinstance(preds, paddle.Tensor): - # preds = preds.numpy() preds_idx = preds.argmax(axis=2) preds_prob = preds.max(axis=2) text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True) diff --git a/deploy/pdserving/web_service.py b/deploy/pdserving/web_service.py index 43de8355..3f77f035 100644 --- a/deploy/pdserving/web_service.py +++ b/deploy/pdserving/web_service.py @@ -48,13 +48,12 @@ class DetOp(Op): def preprocess(self, input_dicts, data_id, log_id): (_, input_dict), = input_dicts.items() data = base64.b64decode(input_dict["image"].encode('utf8')) + self.raw_im = data data = np.fromstring(data, np.uint8) # Note: class variables(self.var) can only be used in process op mode im = cv2.imdecode(data, cv2.IMREAD_COLOR) - self.im = im self.ori_h, self.ori_w, _ = im.shape - - det_img = self.det_preprocess(self.im) + det_img = self.det_preprocess(im) _, self.new_h, self.new_w = det_img.shape return {"x": det_img[np.newaxis, :].copy()}, False, None, "" @@ -65,7 +64,7 @@ class DetOp(Op): ] dt_boxes_list = self.post_func(det_out, [ratio_list]) dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w]) - out_dict = {"dt_boxes": dt_boxes, "image": self.im} + out_dict = {"dt_boxes": dt_boxes, "image": self.raw_im} return out_dict, None, "" @@ -80,7 +79,9 @@ class RecOp(Op): def preprocess(self, input_dicts, data_id, log_id): (_, input_dict), = input_dicts.items() - im = input_dict["image"] + raw_im = input_dict["image"] + data = np.frombuffer(raw_im, np.uint8) + im = cv2.imdecode(data, cv2.IMREAD_COLOR) dt_boxes = input_dict["dt_boxes"] dt_boxes = self.sorted_boxes(dt_boxes) feed_list = [] @@ -95,7 +96,6 @@ class RecOp(Op): boxes_size = len(dt_boxes) batch_size = boxes_size // max_batch_size rem = boxes_size % max_batch_size - #_LOGGER.info("max_batch_len:{}, batch_size:{}, rem:{}, boxes_size:{}".format(max_batch_size, batch_size, rem, boxes_size)) for bt_idx in range(0, batch_size + 1): imgs = None boxes_num_in_one_batch = 0 @@ -131,6 +131,7 @@ class RecOp(Op): feed_list.append(feed) return feed_list, False, None, "" + def postprocess(self, input_dicts, fetch_data, log_id): res_list = [] if isinstance(fetch_data, dict): -- GitLab