diff --git a/deploy/pdserving/clas_local_server.py b/deploy/pdserving/clas_local_server.py new file mode 100644 index 0000000000000000000000000000000000000000..1d48adcfdb8c3543205f31b7d1345338c589ddcf --- /dev/null +++ b/deploy/pdserving/clas_local_server.py @@ -0,0 +1,128 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle_serving_client import Client +from paddle_serving_app.reader import OCRReader +import cv2 +import sys +import numpy as np +import os +import time +import re +import base64 +from tools.infer.predict_cls import TextClassifier +from params import read_params + +global_args = read_params() +if global_args.use_gpu: + from paddle_serving_server_gpu.web_service import WebService +else: + from paddle_serving_server.web_service import WebService + + +class TextClassifierHelper(TextClassifier): + def __init__(self, args): + self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")] + self.cls_batch_num = args.rec_batch_num + self.label_list = args.label_list + self.cls_thresh = args.cls_thresh + self.fetch = [ + "save_infer_model/scale_0.tmp_0", "save_infer_model/scale_1.tmp_0" + ] + + def preprocess(self, img_list): + args = {} + img_num = len(img_list) + args["img_list"] = img_list + # Calculate the aspect ratio of all text bars + width_list = [] + for img in img_list: + width_list.append(img.shape[1] / float(img.shape[0])) + # Sorting can speed up the cls process + indices = np.argsort(np.array(width_list)) + args["indices"] = indices + cls_res = [['', 0.0]] * img_num + batch_num = self.cls_batch_num + predict_time = 0 + beg_img_no, end_img_no = 0, img_num + norm_img_batch = [] + max_wh_ratio = 0 + for ino in range(beg_img_no, end_img_no): + h, w = img_list[indices[ino]].shape[0:2] + wh_ratio = w * 1.0 / h + max_wh_ratio = max(max_wh_ratio, wh_ratio) + for ino in range(beg_img_no, end_img_no): + norm_img = self.resize_norm_img(img_list[indices[ino]]) + norm_img = norm_img[np.newaxis, :] + norm_img_batch.append(norm_img) + norm_img_batch = np.concatenate(norm_img_batch) + feed = {"image": norm_img_batch.copy()} + return feed, self.fetch, args + + def postprocess(self, outputs, args): + prob_out = outputs[0] + label_out = outputs[1] + indices = args["indices"] + cls_res = [['', 0.0]] * len(label_out) + if len(label_out.shape) != 1: + prob_out, label_out = label_out, prob_out + for rno in range(len(label_out)): + label_idx = label_out[rno] + score = prob_out[rno][label_idx] + label = self.label_list[label_idx] + cls_res[indices[rno]] = [label, score] + if '180' in label and score > self.cls_thresh: + img_list[indices[rno]] = cv2.rotate(img_list[indices[rno]], 1) + return args["img_list"], cls_res + + +class OCRService(WebService): + def init_rec(self): + self.ocr_reader = OCRReader() + self.text_classifier = TextClassifierHelper(global_args) + + def preprocess(self, feed=[], fetch=[]): + img_list = [] + for feed_data in feed: + data = base64.b64decode(feed_data["image"].encode('utf8')) + data = np.fromstring(data, np.uint8) + im = cv2.imdecode(data, cv2.IMREAD_COLOR) + img_list.append(im) + feed, fetch, self.tmp_args = self.text_classifier.preprocess(img_list) + return feed, fetch + + def postprocess(self, feed={}, fetch=[], fetch_map=None): + outputs = [fetch_map[x] for x in self.text_classifier.fetch] + for x in fetch_map.keys(): + if ".lod" in x: + self.tmp_args[x] = fetch_map[x] + _, rec_res = self.text_classifier.postprocess(outputs, self.tmp_args) + res = { + "pred_text": [x[0] for x in rec_res], + "score": [str(x[1]) for x in rec_res] + } + return res + + +if __name__ == "__main__": + ocr_service = OCRService(name="ocr") + ocr_service.load_model_config(global_args.cls_model_dir) + ocr_service.init_rec() + if global_args.use_gpu: + ocr_service.prepare_server( + workdir="workdir", port=9292, device="gpu", gpuid=0) + else: + ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu") + ocr_service.run_debugger_service() + ocr_service.run_web_service() diff --git a/deploy/pdserving/clas_rpc_server.py b/deploy/pdserving/clas_rpc_server.py new file mode 100644 index 0000000000000000000000000000000000000000..7fad61ee81361957568fbf8dd61e446106062084 --- /dev/null +++ b/deploy/pdserving/clas_rpc_server.py @@ -0,0 +1,134 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle_serving_client import Client +from paddle_serving_app.reader import OCRReader +import cv2 +import sys +import numpy as np +import os +import time +import re +import base64 +from tools.infer.predict_cls import TextClassifier +from params import read_params + +global_args = read_params() +if global_args.use_gpu: + from paddle_serving_server_gpu.web_service import WebService +else: + from paddle_serving_server.web_service import WebService + + +class TextClassifierHelper(TextClassifier): + def __init__(self, args): + self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")] + self.cls_batch_num = args.rec_batch_num + self.label_list = args.label_list + self.cls_thresh = args.cls_thresh + self.fetch = [ + "save_infer_model/scale_0.tmp_0", "save_infer_model/scale_1.tmp_0" + ] + + def preprocess(self, img_list): + args = {} + img_num = len(img_list) + args["img_list"] = img_list + # Calculate the aspect ratio of all text bars + width_list = [] + for img in img_list: + width_list.append(img.shape[1] / float(img.shape[0])) + # Sorting can speed up the cls process + indices = np.argsort(np.array(width_list)) + args["indices"] = indices + cls_res = [['', 0.0]] * img_num + batch_num = self.cls_batch_num + predict_time = 0 + beg_img_no, end_img_no = 0, img_num + norm_img_batch = [] + max_wh_ratio = 0 + for ino in range(beg_img_no, end_img_no): + h, w = img_list[indices[ino]].shape[0:2] + wh_ratio = w * 1.0 / h + max_wh_ratio = max(max_wh_ratio, wh_ratio) + for ino in range(beg_img_no, end_img_no): + norm_img = self.resize_norm_img(img_list[indices[ino]]) + norm_img = norm_img[np.newaxis, :] + norm_img_batch.append(norm_img) + norm_img_batch = np.concatenate(norm_img_batch) + if img_num > 1: + feed = [{ + "image": norm_img_batch[x] + } for x in range(norm_img_batch.shape[0])] + else: + feed = {"image": norm_img_batch[0]} + return feed, self.fetch, args + + def postprocess(self, outputs, args): + prob_out = outputs[0] + label_out = outputs[1] + indices = args["indices"] + cls_res = [['', 0.0]] * len(label_out) + if len(label_out.shape) != 1: + prob_out, label_out = label_out, prob_out + for rno in range(len(label_out)): + label_idx = label_out[rno] + score = prob_out[rno][label_idx] + label = self.label_list[label_idx] + cls_res[indices[rno]] = [label, score] + if '180' in label and score > self.cls_thresh: + img_list[indices[rno]] = cv2.rotate(img_list[indices[rno]], 1) + return args["img_list"], cls_res + + +class OCRService(WebService): + def init_rec(self): + self.ocr_reader = OCRReader() + self.text_classifier = TextClassifierHelper(global_args) + + def preprocess(self, feed=[], fetch=[]): + # TODO: to handle batch rec images + img_list = [] + for feed_data in feed: + data = base64.b64decode(feed_data["image"].encode('utf8')) + data = np.fromstring(data, np.uint8) + im = cv2.imdecode(data, cv2.IMREAD_COLOR) + img_list.append(im) + feed, fetch, self.tmp_args = self.text_classifier.preprocess(img_list) + return feed, fetch + + def postprocess(self, feed={}, fetch=[], fetch_map=None): + outputs = [fetch_map[x] for x in self.text_classifier.fetch] + for x in fetch_map.keys(): + if ".lod" in x: + self.tmp_args[x] = fetch_map[x] + _, rec_res = self.text_classifier.postprocess(outputs, self.tmp_args) + res = { + "direction": [x[0] for x in rec_res], + "score": [str(x[1]) for x in rec_res] + } + return res + + +if __name__ == "__main__": + ocr_service = OCRService(name="ocr") + ocr_service.load_model_config(global_args.cls_model_dir) + ocr_service.init_rec() + if global_args.use_gpu: + ocr_service.prepare_server( + workdir="workdir", port=9292, device="gpu", gpuid=0) + else: + ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu") + ocr_service.run_rpc_service() + ocr_service.run_web_service() diff --git a/deploy/pdserving/clas_web_client.py b/deploy/pdserving/clas_web_client.py new file mode 100644 index 0000000000000000000000000000000000000000..576e073b837281e84f1c5094aa1ff20037c45427 --- /dev/null +++ b/deploy/pdserving/clas_web_client.py @@ -0,0 +1,39 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -*- coding: utf-8 -*- + +import requests +import json +import cv2 +import base64 +import os, sys +import time + + +def cv2_to_base64(image): + #data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(image).decode( + 'utf8') #data.tostring()).decode('utf8') + + +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:9292/ocr/prediction" +test_img_dir = "../../doc/imgs_words/ch/" +for img_file in os.listdir(test_img_dir): + with open(os.path.join(test_img_dir, img_file), 'rb') as file: + image_data1 = file.read() + image = cv2_to_base64(image_data1) + data = {"feed": [{"image": image}], "fetch": ["res"]} + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) diff --git a/deploy/pdserving/det_local_server.py b/deploy/pdserving/det_local_server.py index eb7948daadd018810997bba78367e86aa3398e31..8659dd5b295b3889c803ef96510dd2c6a250033f 100644 --- a/deploy/pdserving/det_local_server.py +++ b/deploy/pdserving/det_local_server.py @@ -17,63 +17,91 @@ import cv2 import sys import numpy as np import os -from paddle_serving_client import Client -from paddle_serving_app.reader import Sequential, ResizeByFactor -from paddle_serving_app.reader import Div, Normalize, Transpose -from paddle_serving_app.reader import DBPostProcess, FilterBoxes -if sys.argv[1] == 'gpu': - from paddle_serving_server_gpu.web_service import WebService -elif sys.argv[1] == 'cpu': - from paddle_serving_server.web_service import WebService import time import re import base64 +from tools.infer.predict_det import TextDetector +from params import read_params + +global_args = read_params() +if global_args.use_gpu: + from paddle_serving_server_gpu.web_service import WebService +else: + from paddle_serving_server.web_service import WebService + + +class TextDetectorHelper(TextDetector): + def __init__(self, args): + super(TextDetectorHelper, self).__init__(args) + if self.det_algorithm == "SAST": + self.fetch = [ + "bn_f_border4.output.tmp_2", "bn_f_tco4.output.tmp_2", + "bn_f_tvo4.output.tmp_2", "sigmoid_0.tmp_0" + ] + elif self.det_algorithm == "EAST": + self.fetch = ["sigmoid_0.tmp_0", "tmp_2"] + elif self.det_algorithm == "DB": + self.fetch = ["save_infer_model/scale_0.tmp_0"] + + def preprocess(self, img): + img = img.copy() + im, ratio_list = self.preprocess_op(img) + if im is None: + return None, 0 + return { + "image": im.copy() + }, self.fetch, { + "ratio_list": [ratio_list], + "ori_im": img + } + + def postprocess(self, outputs, args): + outs_dict = {} + if self.det_algorithm == "EAST": + outs_dict['f_geo'] = outputs[0] + outs_dict['f_score'] = outputs[1] + elif self.det_algorithm == 'SAST': + outs_dict['f_border'] = outputs[0] + outs_dict['f_score'] = outputs[1] + outs_dict['f_tco'] = outputs[2] + outs_dict['f_tvo'] = outputs[3] + else: + outs_dict['maps'] = outputs[0] + dt_boxes_list = self.postprocess_op(outs_dict, args["ratio_list"]) + dt_boxes = dt_boxes_list[0] + if self.det_algorithm == "SAST" and self.det_sast_polygon: + dt_boxes = self.filter_tag_det_res_only_clip(dt_boxes, + args["ori_im"].shape) + else: + dt_boxes = self.filter_tag_det_res(dt_boxes, args["ori_im"].shape) + return dt_boxes -class OCRService(WebService): +class DetService(WebService): def init_det(self): - self.det_preprocess = Sequential([ - ResizeByFactor(32, 960), Div(255), - Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose( - (2, 0, 1)) - ]) - self.filter_func = FilterBoxes(10, 10) - self.post_func = DBPostProcess({ - "thresh": 0.3, - "box_thresh": 0.5, - "max_candidates": 1000, - "unclip_ratio": 1.5, - "min_size": 3 - }) + self.text_detector = TextDetectorHelper(global_args) def preprocess(self, feed=[], fetch=[]): data = base64.b64decode(feed[0]["image"].encode('utf8')) data = np.fromstring(data, np.uint8) im = cv2.imdecode(data, cv2.IMREAD_COLOR) - self.ori_h, self.ori_w, _ = im.shape - det_img = self.det_preprocess(im) - _, self.new_h, self.new_w = det_img.shape - return {"image": det_img[np.newaxis, :].copy()}, ["concat_1.tmp_0"] + feed, fetch, self.tmp_args = self.text_detector.preprocess(im) + return feed, fetch def postprocess(self, feed={}, fetch=[], fetch_map=None): - det_out = fetch_map["concat_1.tmp_0"] - ratio_list = [ - float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w - ] - dt_boxes_list = self.post_func(det_out, [ratio_list]) - dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w]) - return {"dt_boxes": dt_boxes.tolist()} + outputs = [fetch_map[x] for x in fetch] + res = self.text_detector.postprocess(outputs, self.tmp_args) + return {"boxes": res.tolist()} -ocr_service = OCRService(name="ocr") -ocr_service.load_model_config("ocr_det_model") -ocr_service.init_det() -if sys.argv[1] == 'gpu': - ocr_service.set_gpus("0") - ocr_service.prepare_server(workdir="workdir", port=9292, device="gpu", gpuid=0) - ocr_service.run_debugger_service(gpu=True) -elif sys.argv[1] == 'cpu': - ocr_service.prepare_server(workdir="workdir", port=9292) +if __name__ == "__main__": + ocr_service = DetService(name="ocr") + ocr_service.load_model_config(global_args.det_model_dir) + ocr_service.init_det() + if global_args.use_gpu: + ocr_service.prepare_server( + workdir="workdir", port=9292, device="gpu", gpuid=0) + else: + ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu") ocr_service.run_debugger_service() -ocr_service.init_det() -ocr_service.run_web_service() + ocr_service.run_web_service() diff --git a/deploy/pdserving/det_rpc_server.py b/deploy/pdserving/det_rpc_server.py new file mode 100644 index 0000000000000000000000000000000000000000..3bc0579dcebedfd8d7e267fed2b6aa4d9fe03843 --- /dev/null +++ b/deploy/pdserving/det_rpc_server.py @@ -0,0 +1,106 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle_serving_client import Client +import cv2 +import sys +import numpy as np +import os +import time +import re +import base64 +from tools.infer.predict_det import TextDetector +from params import read_params + +global_args = read_params() +if global_args.use_gpu: + from paddle_serving_server_gpu.web_service import WebService +else: + from paddle_serving_server.web_service import WebService + + +class TextDetectorHelper(TextDetector): + def __init__(self, args): + super(TextDetectorHelper, self).__init__(args) + if self.det_algorithm == "SAST": + self.fetch = [ + "bn_f_border4.output.tmp_2", "bn_f_tco4.output.tmp_2", + "bn_f_tvo4.output.tmp_2", "sigmoid_0.tmp_0" + ] + elif self.det_algorithm == "EAST": + self.fetch = ["sigmoid_0.tmp_0", "tmp_2"] + elif self.det_algorithm == "DB": + self.fetch = ["save_infer_model/scale_0.tmp_0"] + + def preprocess(self, img): + im, ratio_list = self.preprocess_op(img) + if im is None: + return None, 0 + return { + "image": im[0] + }, self.fetch, { + "ratio_list": [ratio_list], + "ori_im": img + } + + def postprocess(self, outputs, args): + outs_dict = {} + if self.det_algorithm == "EAST": + outs_dict['f_geo'] = outputs[0] + outs_dict['f_score'] = outputs[1] + elif self.det_algorithm == 'SAST': + outs_dict['f_border'] = outputs[0] + outs_dict['f_score'] = outputs[1] + outs_dict['f_tco'] = outputs[2] + outs_dict['f_tvo'] = outputs[3] + else: + outs_dict['maps'] = outputs[0] + dt_boxes_list = self.postprocess_op(outs_dict, args["ratio_list"]) + dt_boxes = dt_boxes_list[0] + if self.det_algorithm == "SAST" and self.det_sast_polygon: + dt_boxes = self.filter_tag_det_res_only_clip(dt_boxes, + args["ori_im"].shape) + else: + dt_boxes = self.filter_tag_det_res(dt_boxes, args["ori_im"].shape) + return dt_boxes + + +class DetService(WebService): + def init_det(self): + self.text_detector = TextDetectorHelper(global_args) + + def preprocess(self, feed=[], fetch=[]): + data = base64.b64decode(feed[0]["image"].encode('utf8')) + data = np.fromstring(data, np.uint8) + im = cv2.imdecode(data, cv2.IMREAD_COLOR) + feed, fetch, self.tmp_args = self.text_detector.preprocess(im) + return feed, fetch + + def postprocess(self, feed={}, fetch=[], fetch_map=None): + outputs = [fetch_map[x] for x in fetch] + res = self.text_detector.postprocess(outputs, self.tmp_args) + return {"boxes": res.tolist()} + + +if __name__ == "__main__": + ocr_service = DetService(name="ocr") + ocr_service.load_model_config(global_args.det_model_dir) + ocr_service.init_det() + if global_args.use_gpu: + ocr_service.prepare_server( + workdir="workdir", port=9292, device="gpu", gpuid=0) + else: + ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu") + ocr_service.run_rpc_service() + ocr_service.run_web_service() diff --git a/deploy/pdserving/det_web_client.py b/deploy/pdserving/det_web_client.py new file mode 100644 index 0000000000000000000000000000000000000000..4324406bc5d17124b208e6064f53562850799980 --- /dev/null +++ b/deploy/pdserving/det_web_client.py @@ -0,0 +1,40 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -*- coding: utf-8 -*- + +import requests +import json +import cv2 +import base64 +import os, sys +import time + + +def cv2_to_base64(image): + #data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(image).decode( + 'utf8') #data.tostring()).decode('utf8') + + +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:9292/ocr/prediction" +test_img_dir = "../../doc/imgs/" +for img_file in os.listdir(test_img_dir): + with open(os.path.join(test_img_dir, img_file), 'rb') as file: + image_data1 = file.read() + image = cv2_to_base64(image_data1) + data = {"feed": [{"image": image}], "fetch": ["res"]} + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + rjson = r.json() + print(rjson) diff --git a/deploy/pdserving/det_web_server.py b/deploy/pdserving/det_web_server.py deleted file mode 100644 index 14be74130dcb413c31a3e76c150d74f65575f451..0000000000000000000000000000000000000000 --- a/deploy/pdserving/det_web_server.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle_serving_client import Client -import cv2 -import sys -import numpy as np -import os -from paddle_serving_client import Client -from paddle_serving_app.reader import Sequential, ResizeByFactor -from paddle_serving_app.reader import Div, Normalize, Transpose -from paddle_serving_app.reader import DBPostProcess, FilterBoxes -if sys.argv[1] == 'gpu': - from paddle_serving_server_gpu.web_service import WebService -elif sys.argv[1] == 'cpu': - from paddle_serving_server.web_service import WebService -import time -import re -import base64 - - -class OCRService(WebService): - def init_det(self): - self.det_preprocess = Sequential([ - ResizeByFactor(32, 960), Div(255), - Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose( - (2, 0, 1)) - ]) - self.filter_func = FilterBoxes(10, 10) - self.post_func = DBPostProcess({ - "thresh": 0.3, - "box_thresh": 0.5, - "max_candidates": 1000, - "unclip_ratio": 1.5, - "min_size": 3 - }) - - def preprocess(self, feed=[], fetch=[]): - data = base64.b64decode(feed[0]["image"].encode('utf8')) - data = np.fromstring(data, np.uint8) - im = cv2.imdecode(data, cv2.IMREAD_COLOR) - self.ori_h, self.ori_w, _ = im.shape - det_img = self.det_preprocess(im) - _, self.new_h, self.new_w = det_img.shape - print(det_img) - return {"image": det_img}, ["concat_1.tmp_0"] - - def postprocess(self, feed={}, fetch=[], fetch_map=None): - det_out = fetch_map["concat_1.tmp_0"] - ratio_list = [ - float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w - ] - dt_boxes_list = self.post_func(det_out, [ratio_list]) - dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w]) - return {"dt_boxes": dt_boxes.tolist()} - - -ocr_service = OCRService(name="ocr") -ocr_service.load_model_config("ocr_det_model") -if sys.argv[1] == 'gpu': - ocr_service.set_gpus("0") - ocr_service.prepare_server(workdir="workdir", port=9292, device="gpu", gpuid=0) -elif sys.argv[1] == 'cpu': - ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu") -ocr_service.init_det() -ocr_service.run_rpc_service() -ocr_service.run_web_service() diff --git a/deploy/pdserving/ocr_local_server.py b/deploy/pdserving/ocr_local_server.py index de5b3d13f12afd4a84c5d46625682c42f418d6bb..af9e72224d841d7451ef723aa207ebe0cb041ed8 100644 --- a/deploy/pdserving/ocr_local_server.py +++ b/deploy/pdserving/ocr_local_server.py @@ -18,97 +18,104 @@ import cv2 import sys import numpy as np import os -from paddle_serving_client import Client -from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor -from paddle_serving_app.reader import Div, Normalize, Transpose -from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes -if sys.argv[1] == 'gpu': - from paddle_serving_server_gpu.web_service import WebService -elif sys.argv[1] == 'cpu': - from paddle_serving_server.web_service import WebService -from paddle_serving_app.local_predict import Debugger import time import re import base64 +from clas_local_server import TextClassifierHelper +from det_local_server import TextDetectorHelper +from rec_local_server import TextRecognizerHelper +from tools.infer.predict_system import TextSystem, sorted_boxes +from paddle_serving_app.local_predict import Debugger +import copy +from params import read_params +global_args = read_params() -class OCRService(WebService): - def init_det_debugger(self, det_model_config): - self.det_preprocess = Sequential([ - ResizeByFactor(32, 960), Div(255), - Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose( - (2, 0, 1)) - ]) +if global_args.use_gpu: + from paddle_serving_server_gpu.web_service import WebService +else: + from paddle_serving_server.web_service import WebService + + +class TextSystemHelper(TextSystem): + def __init__(self, args): + self.text_detector = TextDetectorHelper(args) + self.text_recognizer = TextRecognizerHelper(args) + self.use_angle_cls = args.use_angle_cls + if self.use_angle_cls: + self.clas_client = Debugger() + self.clas_client.load_model_config( + global_args.cls_model_dir, gpu=True, profile=False) + self.text_classifier = TextClassifierHelper(args) self.det_client = Debugger() - if sys.argv[1] == 'gpu': - self.det_client.load_model_config( - det_model_config, gpu=True, profile=False) - elif sys.argv[1] == 'cpu': - self.det_client.load_model_config( - det_model_config, gpu=False, profile=False) - self.ocr_reader = OCRReader() + self.det_client.load_model_config( + global_args.det_model_dir, gpu=True, profile=False) + self.fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] + + def preprocess(self, img): + feed, fetch, self.tmp_args = self.text_detector.preprocess(img) + fetch_map = self.det_client.predict(feed, fetch) + outputs = [fetch_map[x] for x in fetch] + dt_boxes = self.text_detector.postprocess(outputs, self.tmp_args) + if dt_boxes is None: + return None, None + img_crop_list = [] + dt_boxes = sorted_boxes(dt_boxes) + for bno in range(len(dt_boxes)): + tmp_box = copy.deepcopy(dt_boxes[bno]) + img_crop = self.get_rotate_crop_image(img, tmp_box) + img_crop_list.append(img_crop) + if self.use_angle_cls: + feed, fetch, self.tmp_args = self.text_classifier.preprocess( + img_crop_list) + fetch_map = self.clas_client.predict(feed, fetch) + outputs = [fetch_map[x] for x in self.text_classifier.fetch] + for x in fetch_map.keys(): + if ".lod" in x: + self.tmp_args[x] = fetch_map[x] + img_crop_list, _ = self.text_classifier.postprocess(outputs, + self.tmp_args) + feed, fetch, self.tmp_args = self.text_recognizer.preprocess( + img_crop_list) + return feed, self.fetch, self.tmp_args + + def postprocess(self, outputs, args): + return self.text_recognizer.postprocess(outputs, args) + + +class OCRService(WebService): + def init_rec(self): + self.text_system = TextSystemHelper(global_args) def preprocess(self, feed=[], fetch=[]): + # TODO: to handle batch rec images data = base64.b64decode(feed[0]["image"].encode('utf8')) data = np.fromstring(data, np.uint8) im = cv2.imdecode(data, cv2.IMREAD_COLOR) - ori_h, ori_w, _ = im.shape - det_img = self.det_preprocess(im) - _, new_h, new_w = det_img.shape - det_img = det_img[np.newaxis, :] - det_img = det_img.copy() - det_out = self.det_client.predict( - feed={"image": det_img}, fetch=["concat_1.tmp_0"]) - filter_func = FilterBoxes(10, 10) - post_func = DBPostProcess({ - "thresh": 0.3, - "box_thresh": 0.5, - "max_candidates": 1000, - "unclip_ratio": 1.5, - "min_size": 3 - }) - sorted_boxes = SortedBoxes() - ratio_list = [float(new_h) / ori_h, float(new_w) / ori_w] - dt_boxes_list = post_func(det_out["concat_1.tmp_0"], [ratio_list]) - dt_boxes = filter_func(dt_boxes_list[0], [ori_h, ori_w]) - dt_boxes = sorted_boxes(dt_boxes) - get_rotate_crop_image = GetRotateCropImage() - img_list = [] - max_wh_ratio = 0 - for i, dtbox in enumerate(dt_boxes): - boximg = get_rotate_crop_image(im, dt_boxes[i]) - img_list.append(boximg) - h, w = boximg.shape[0:2] - wh_ratio = w * 1.0 / h - max_wh_ratio = max(max_wh_ratio, wh_ratio) - if len(img_list) == 0: - return [], [] - _, w, h = self.ocr_reader.resize_norm_img(img_list[0], - max_wh_ratio).shape - imgs = np.zeros((len(img_list), 3, w, h)).astype('float32') - for id, img in enumerate(img_list): - norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio) - imgs[id] = norm_img - feed = {"image": imgs.copy()} - fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] + feed, fetch, self.tmp_args = self.text_system.preprocess(im) return feed, fetch def postprocess(self, feed={}, fetch=[], fetch_map=None): - rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True) - res_lst = [] - for res in rec_res: - res_lst.append(res[0]) - res = {"res": res_lst} + outputs = [fetch_map[x] for x in self.text_system.fetch] + for x in fetch_map.keys(): + if ".lod" in x: + self.tmp_args[x] = fetch_map[x] + rec_res = self.text_system.postprocess(outputs, self.tmp_args) + res = { + "pred_text": [x[0] for x in rec_res], + "score": [str(x[1]) for x in rec_res] + } return res -ocr_service = OCRService(name="ocr") -ocr_service.load_model_config("ocr_rec_model") -ocr_service.init_det_debugger(det_model_config="ocr_det_model") -if sys.argv[1] == 'gpu': - ocr_service.prepare_server(workdir="workdir", port=9292, device="gpu", gpuid=0) - ocr_service.run_debugger_service(gpu=True) -elif sys.argv[1] == 'cpu': - ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu") +if __name__ == "__main__": + ocr_service = OCRService(name="ocr") + ocr_service.load_model_config(global_args.rec_model_dir) + ocr_service.init_rec() + if global_args.use_gpu: + ocr_service.prepare_server( + workdir="workdir", port=9292, device="gpu", gpuid=0) + else: + ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu") ocr_service.run_debugger_service() -ocr_service.run_web_service() + ocr_service.run_web_service() diff --git a/deploy/pdserving/ocr_rpc_server.py b/deploy/pdserving/ocr_rpc_server.py new file mode 100644 index 0000000000000000000000000000000000000000..8229c141560423c8bff0dbec43877d6b1007d827 --- /dev/null +++ b/deploy/pdserving/ocr_rpc_server.py @@ -0,0 +1,123 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle_serving_client import Client +from paddle_serving_app.reader import OCRReader +import cv2 +import sys +import numpy as np +import os +import time +import re +import base64 +from clas_rpc_server import TextClassifierHelper +from det_rpc_server import TextDetectorHelper +from rec_rpc_server import TextRecognizerHelper +from tools.infer.predict_system import TextSystem, sorted_boxes +import copy +from params import read_params + +global_args = read_params() +if global_args.use_gpu: + from paddle_serving_server_gpu.web_service import WebService +else: + from paddle_serving_server.web_service import WebService + + +class TextSystemHelper(TextSystem): + def __init__(self, args): + self.text_detector = TextDetectorHelper(args) + self.text_recognizer = TextRecognizerHelper(args) + self.use_angle_cls = args.use_angle_cls + if self.use_angle_cls: + self.clas_client = Client() + self.clas_client.load_client_config( + "ocr_clas_client/serving_client_conf.prototxt") + self.clas_client.connect(["127.0.0.1:9294"]) + self.text_classifier = TextClassifierHelper(args) + self.det_client = Client() + self.det_client.load_client_config( + "det_db_client/serving_client_conf.prototxt") + self.det_client.connect(["127.0.0.1:9293"]) + self.fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] + + def preprocess(self, img): + feed, fetch, self.tmp_args = self.text_detector.preprocess(img) + fetch_map = self.det_client.predict(feed, fetch) + outputs = [fetch_map[x] for x in fetch] + dt_boxes = self.text_detector.postprocess(outputs, self.tmp_args) + print(dt_boxes) + if dt_boxes is None: + return None, None + img_crop_list = [] + dt_boxes = sorted_boxes(dt_boxes) + for bno in range(len(dt_boxes)): + tmp_box = copy.deepcopy(dt_boxes[bno]) + img_crop = self.get_rotate_crop_image(img, tmp_box) + img_crop_list.append(img_crop) + if self.use_angle_cls: + feed, fetch, self.tmp_args = self.text_classifier.preprocess( + img_crop_list) + fetch_map = self.clas_client.predict(feed, fetch) + print(fetch_map) + outputs = [fetch_map[x] for x in self.text_classifier.fetch] + for x in fetch_map.keys(): + if ".lod" in x: + self.tmp_args[x] = fetch_map[x] + img_crop_list, _ = self.text_classifier.postprocess(outputs, + self.tmp_args) + feed, fetch, self.tmp_args = self.text_recognizer.preprocess( + img_crop_list) + return feed, self.fetch, self.tmp_args + + def postprocess(self, outputs, args): + return self.text_recognizer.postprocess(outputs, args) + + +class OCRService(WebService): + def init_rec(self): + self.text_system = TextSystemHelper(global_args) + + def preprocess(self, feed=[], fetch=[]): + # TODO: to handle batch rec images + data = base64.b64decode(feed[0]["image"].encode('utf8')) + data = np.fromstring(data, np.uint8) + im = cv2.imdecode(data, cv2.IMREAD_COLOR) + feed, fetch, self.tmp_args = self.text_system.preprocess(im) + return feed, fetch + + def postprocess(self, feed={}, fetch=[], fetch_map=None): + outputs = [fetch_map[x] for x in self.text_system.fetch] + for x in fetch_map.keys(): + if ".lod" in x: + self.tmp_args[x] = fetch_map[x] + rec_res = self.text_system.postprocess(outputs, self.tmp_args) + res = { + "pred_text": [x[0] for x in rec_res], + "score": [str(x[1]) for x in rec_res] + } + return res + + +if __name__ == "__main__": + ocr_service = OCRService(name="ocr") + ocr_service.load_model_config(global_args.rec_model_dir) + ocr_service.init_rec() + if global_args.use_gpu: + ocr_service.prepare_server( + workdir="workdir", port=9292, device="gpu", gpuid=0) + else: + ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu") + ocr_service.run_rpc_service() + ocr_service.run_web_service() diff --git a/deploy/pdserving/ocr_web_client.py b/deploy/pdserving/ocr_web_client.py index e2a92eb8ee4aa62059be184dd7e67237ed460f13..4324406bc5d17124b208e6064f53562850799980 100644 --- a/deploy/pdserving/ocr_web_client.py +++ b/deploy/pdserving/ocr_web_client.py @@ -20,11 +20,13 @@ import base64 import os, sys import time + def cv2_to_base64(image): #data = cv2.imencode('.jpg', image)[1] return base64.b64encode(image).decode( 'utf8') #data.tostring()).decode('utf8') + headers = {"Content-type": "application/json"} url = "http://127.0.0.1:9292/ocr/prediction" test_img_dir = "../../doc/imgs/" @@ -34,4 +36,5 @@ for img_file in os.listdir(test_img_dir): image = cv2_to_base64(image_data1) data = {"feed": [{"image": image}], "fetch": ["res"]} r = requests.post(url=url, headers=headers, data=json.dumps(data)) - print(r.json()) + rjson = r.json() + print(rjson) diff --git a/deploy/pdserving/ocr_web_server.py b/deploy/pdserving/ocr_web_server.py deleted file mode 100644 index 6c0de44661958a6425f57039261969551ff552c5..0000000000000000000000000000000000000000 --- a/deploy/pdserving/ocr_web_server.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle_serving_client import Client -from paddle_serving_app.reader import OCRReader -import cv2 -import sys -import numpy as np -import os -from paddle_serving_client import Client -from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor -from paddle_serving_app.reader import Div, Normalize, Transpose -from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes -if sys.argv[1] == 'gpu': - from paddle_serving_server_gpu.web_service import WebService -elif sys.argv[1] == 'cpu': - from paddle_serving_server.web_service import WebService -import time -import re -import base64 - - -class OCRService(WebService): - def init_det_client(self, det_port, det_client_config): - self.det_preprocess = Sequential([ - ResizeByFactor(32, 960), Div(255), - Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose( - (2, 0, 1)) - ]) - self.det_client = Client() - self.det_client.load_client_config(det_client_config) - self.det_client.connect(["127.0.0.1:{}".format(det_port)]) - self.ocr_reader = OCRReader() - - def preprocess(self, feed=[], fetch=[]): - data = base64.b64decode(feed[0]["image"].encode('utf8')) - data = np.fromstring(data, np.uint8) - im = cv2.imdecode(data, cv2.IMREAD_COLOR) - ori_h, ori_w, _ = im.shape - det_img = self.det_preprocess(im) - det_out = self.det_client.predict( - feed={"image": det_img}, fetch=["concat_1.tmp_0"]) - _, new_h, new_w = det_img.shape - filter_func = FilterBoxes(10, 10) - post_func = DBPostProcess({ - "thresh": 0.3, - "box_thresh": 0.5, - "max_candidates": 1000, - "unclip_ratio": 1.5, - "min_size": 3 - }) - sorted_boxes = SortedBoxes() - ratio_list = [float(new_h) / ori_h, float(new_w) / ori_w] - dt_boxes_list = post_func(det_out["concat_1.tmp_0"], [ratio_list]) - dt_boxes = filter_func(dt_boxes_list[0], [ori_h, ori_w]) - dt_boxes = sorted_boxes(dt_boxes) - get_rotate_crop_image = GetRotateCropImage() - feed_list = [] - img_list = [] - max_wh_ratio = 0 - for i, dtbox in enumerate(dt_boxes): - boximg = get_rotate_crop_image(im, dt_boxes[i]) - img_list.append(boximg) - h, w = boximg.shape[0:2] - wh_ratio = w * 1.0 / h - max_wh_ratio = max(max_wh_ratio, wh_ratio) - for img in img_list: - norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio) - feed = {"image": norm_img} - feed_list.append(feed) - fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] - return feed_list, fetch - - def postprocess(self, feed={}, fetch=[], fetch_map=None): - rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True) - res_lst = [] - for res in rec_res: - res_lst.append(res[0]) - res = {"res": res_lst} - return res - - -ocr_service = OCRService(name="ocr") -ocr_service.load_model_config("ocr_rec_model") -if sys.argv[1] == 'gpu': - ocr_service.set_gpus("0") - ocr_service.prepare_server(workdir="workdir", port=9292, device="gpu", gpuid=0) -elif sys.argv[1] == 'cpu': - ocr_service.prepare_server(workdir="workdir", port=9292) -ocr_service.init_det_client( - det_port=9293, - det_client_config="ocr_det_client/serving_client_conf.prototxt") -ocr_service.run_rpc_service() -ocr_service.run_web_service() diff --git a/deploy/pdserving/params.py b/deploy/pdserving/params.py new file mode 100644 index 0000000000000000000000000000000000000000..9d0b082fb983776053619353ca8736e8c3dc8d79 --- /dev/null +++ b/deploy/pdserving/params.py @@ -0,0 +1,50 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +class Config(object): + pass + +def read_params(): + cfg = Config() + #use gpu + cfg.use_gpu = False + cfg.use_pdserving = True + + #params for text detector + cfg.det_algorithm = "DB" + cfg.det_model_dir = "./det_infer_server/" + cfg.det_max_side_len = 960 + + #DB parmas + cfg.det_db_thresh =0.3 + cfg.det_db_box_thresh =0.5 + cfg.det_db_unclip_ratio =2.0 + + #EAST parmas + cfg.det_east_score_thresh = 0.8 + cfg.det_east_cover_thresh = 0.1 + cfg.det_east_nms_thresh = 0.2 + + #params for text recognizer + cfg.rec_algorithm = "CRNN" + cfg.rec_model_dir = "./rec_infer_server/" + + cfg.rec_image_shape = "3, 32, 320" + cfg.rec_char_type = 'ch' + cfg.rec_batch_num = 30 + cfg.max_text_length = 25 + + cfg.rec_char_dict_path = "./ppocr_keys_v1.txt" + cfg.use_space_char = True + + #params for text classifier + cfg.use_angle_cls = True + cfg.cls_model_dir = "./cls_infer_server/" + cfg.cls_image_shape = "3, 48, 192" + cfg.label_list = ['0', '180'] + cfg.cls_batch_num = 30 + cfg.cls_thresh = 0.9 + + return cfg diff --git a/deploy/pdserving/readme.md b/deploy/pdserving/readme.md deleted file mode 100644 index af12d508ba9c04e6032f2a392701e72b41462395..0000000000000000000000000000000000000000 --- a/deploy/pdserving/readme.md +++ /dev/null @@ -1,120 +0,0 @@ -[English](readme_en.md) | 简体中文 - -PaddleOCR提供2种服务部署方式: -- 基于PaddleHub Serving的部署:代码路径为"`./deploy/hubserving`",使用方法参考[文档](../hubserving/readme.md)。 -- 基于PaddleServing的部署:代码路径为"`./deploy/pdserving`",按照本教程使用。 - -# Paddle Serving 服务部署 -本教程将介绍基于[Paddle Serving](https://github.com/PaddlePaddle/Serving)部署PaddleOCR在线预测服务的详细步骤。 - -## 快速启动服务 - -### 1. 准备环境 -我们先安装Paddle Serving相关组件 -我们推荐用户使用GPU来做Paddle Serving的OCR服务部署 - -**CUDA版本:9.0** - -**CUDNN版本:7.0** - -**操作系统版本:CentOS 6以上** - -**Python版本: 2.7/3.6/3.7** - -**Python操作指南:** -``` -#CPU/GPU版本选择一个 -#GPU版本服务端 -python -m pip install paddle_serving_server_gpu -#CPU版本服务端 -python -m pip install paddle_serving_server -#客户端和App包使用以下链接(CPU,GPU通用) -python -m pip install paddle_serving_app paddle_serving_client -``` - - -### 2. 模型转换 -可以使用`paddle_serving_app`提供的模型,执行下列命令 -``` -python -m paddle_serving_app.package --get_model ocr_rec -tar -xzvf ocr_rec.tar.gz -python -m paddle_serving_app.package --get_model ocr_det -tar -xzvf ocr_det.tar.gz -``` -执行上述命令会下载`db_crnn_mobile`的模型,如果想要下载规模更大的`db_crnn_server`模型,可以在下载预测模型并解压之后。参考[如何从Paddle保存的预测模型转为Paddle Serving格式可部署的模型](https://github.com/PaddlePaddle/Serving/blob/develop/doc/INFERENCE_TO_SERVING_CN.md)。 - -我们以`ch_rec_r34_vd_crnn`模型作为例子,下载链接在: - -``` -wget --no-check-certificate https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_infer.tar -tar xf ch_rec_r34_vd_crnn_infer.tar -``` -因此我们按照Serving模型转换教程,运行下列python文件。 -``` -from paddle_serving_client.io import inference_model_to_serving -inference_model_dir = "ch_rec_r34_vd_crnn" -serving_client_dir = "serving_client_dir" -serving_server_dir = "serving_server_dir" -feed_var_names, fetch_var_names = inference_model_to_serving( - inference_model_dir, serving_client_dir, serving_server_dir, model_filename="model", params_filename="params") -``` -最终会在`serving_client_dir`和`serving_server_dir`生成客户端和服务端的模型配置。 - -### 3. 启动服务 -启动服务可以根据实际需求选择启动`标准版`或者`快速版`,两种方式的对比如下表: - -|版本|特点|适用场景| -|-|-|-| -|标准版|稳定性高,分布式部署|适用于吞吐量大,需要跨机房部署的情况| -|快速版|部署方便,预测速度快|适用于对预测速度要求高,迭代速度快的场景| - -#### 方式1. 启动标准版服务 - -``` -# cpu,gpu启动二选一,以下是cpu启动 -python -m paddle_serving_server.serve --model ocr_det_model --port 9293 -python ocr_web_server.py cpu -# gpu启动 -python -m paddle_serving_server_gpu.serve --model ocr_det_model --port 9293 --gpu_id 0 -python ocr_web_server.py gpu -``` - -#### 方式2. 启动快速版服务 - -``` -# cpu,gpu启动二选一,以下是cpu启动 -python ocr_local_server.py cpu -# gpu启动 -python ocr_local_server.py gpu -``` - -## 发送预测请求 - -``` -python ocr_web_client.py -``` - -## 返回结果格式说明 - -返回结果是json格式 -``` -{u'result': {u'res': [u'\u571f\u5730\u6574\u6cbb\u4e0e\u571f\u58e4\u4fee\u590d\u7814\u7a76\u4e2d\u5fc3', u'\u534e\u5357\u519c\u4e1a\u5927\u5b661\u7d20\u56fe']}} -``` -我们也可以打印结果json串中`res`字段的每一句话 -``` -土地整治与土壤修复研究中心 -华南农业大学1素图 -``` - -## 自定义修改服务逻辑 - -在`ocr_web_server.py`或是`ocr_local_server.py`当中的`preprocess`函数里面做了检测服务和识别服务的前处理,`postprocess`函数里面做了识别的后处理服务,可以在相应的函数中做修改。调用了`paddle_serving_app`库提供的常见CV模型的前处理/后处理库。 - -如果想要单独启动Paddle Serving的检测服务和识别服务,参见下列表格, 执行对应的脚本即可,并且在命令行参数注明用的CPU或是GPU来提供服务。 - -| 模型 | 标准版 | 快速版 | -| ---- | ----------------- | ------------------- | -| 检测 | det_web_server.py | det_local_server.py | -| 识别 | rec_web_server.py | rec_local_server.py | - -更多信息参见[Paddle Serving](https://github.com/PaddlePaddle/Serving) diff --git a/deploy/pdserving/readme_en.md b/deploy/pdserving/readme_en.md deleted file mode 100644 index 9a0c684fb6fb4f0eeff2552af70f62053d3351fb..0000000000000000000000000000000000000000 --- a/deploy/pdserving/readme_en.md +++ /dev/null @@ -1,123 +0,0 @@ -English | [简体中文](readme.md) - -PaddleOCR provides 2 service deployment methods: -- Based on **PaddleHub Serving**: Code path is "`./deploy/hubserving`". Please refer to the [tutorial](../hubserving/readme_en.md) for usage. -- Based on **PaddleServing**: Code path is "`./deploy/pdserving`". Please follow this tutorial. - -# Service deployment based on Paddle Serving - -This tutorial will introduce the detail steps of deploying PaddleOCR online prediction service based on [Paddle Serving](https://github.com/PaddlePaddle/Serving). - -## Quick start service - -### 1. Prepare the environment -Let's first install the relevant components of Paddle Serving. GPU is recommended for service deployment with Paddle Serving. - -**Requirements:** -- **CUDA version: 9.0** -- **CUDNN version: 7.0** -- **Operating system version: >= CentOS 6** -- **Python version: 2.7/3.6/3.7** - -**Installation:** -``` -# install GPU server -python -m pip install paddle_serving_server_gpu - -# or, install CPU server -python -m pip install paddle_serving_server - -# install client and App package (CPU/GPU) -python -m pip install paddle_serving_app paddle_serving_client -``` - -### 2. Model transformation -You can directly use converted model provided by `paddle_serving_app` for convenience. Execute the following command to obtain: -``` -python -m paddle_serving_app.package --get_model ocr_rec -tar -xzvf ocr_rec.tar.gz -python -m paddle_serving_app.package --get_model ocr_det -tar -xzvf ocr_det.tar.gz -``` -Executing the above command will download the `db_crnn_mobile` model, which is in different format with inference model. If you want to use other models for deployment, you can refer to the [tutorial](https://github.com/PaddlePaddle/Serving/blob/develop/doc/INFERENCE_TO_SERVING_CN.md) to convert your inference model to a model which is deployable for Paddle Serving. - -We take `ch_rec_r34_vd_crnn` model as example. Download the inference model by executing the following command: -``` -wget --no-check-certificate https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_infer.tar -tar xf ch_rec_r34_vd_crnn_infer.tar -``` - -Convert the downloaded model by executing the following python script: -``` -from paddle_serving_client.io import inference_model_to_serving -inference_model_dir = "ch_rec_r34_vd_crnn" -serving_client_dir = "serving_client_dir" -serving_server_dir = "serving_server_dir" -feed_var_names, fetch_var_names = inference_model_to_serving( - inference_model_dir, serving_client_dir, serving_server_dir, model_filename="model", params_filename="params") -``` - -Finally, model configuration of client and server will be generated in `serving_client_dir` and `serving_server_dir`. - -### 3. Start service -Start the standard version or the fast version service according to your actual needs. The comparison of the two versions is shown in the table below: - -|version|characteristics|recommended scenarios| -|-|-|-| -|standard version|High stability, suitable for distributed deployment|Large throughput and cross regional deployment| -|fast version|Easy to deploy and fast to predict|Suitable for scenarios which requires high prediction speed and fast iteration speed| - -#### Mode 1. Start the standard mode service - -``` -# start with CPU -python -m paddle_serving_server.serve --model ocr_det_model --port 9293 -python ocr_web_server.py cpu - -# or, with GPU -python -m paddle_serving_server_gpu.serve --model ocr_det_model --port 9293 --gpu_id 0 -python ocr_web_server.py gpu -``` - -#### Mode 2. Start the fast mode service - -``` -# start with CPU -python ocr_local_server.py cpu - -# or, with GPU -python ocr_local_server.py gpu -``` - -## Send prediction requests - -``` -python ocr_web_client.py -``` - -## Returned result format - -The returned result is a JSON string, eg. -``` -{u'result': {u'res': [u'\u571f\u5730\u6574\u6cbb\u4e0e\u571f\u58e4\u4fee\u590d\u7814\u7a76\u4e2d\u5fc3', u'\u534e\u5357\u519c\u4e1a\u5927\u5b661\u7d20\u56fe']}} -``` - -You can also print the readable result in `res`: -``` -土地整治与土壤修复研究中心 -华南农业大学1素图 -``` - -## User defined service module modification - -The pre-processing and post-processing process, can be found in the `preprocess` and `postprocess` function in `ocr_web_server.py` or `ocr_local_server.py`. The pre-processing/post-processing library for common CV models provided by `paddle_serving_app` is called. -You can modify the corresponding code as actual needs. - -If you only want to start the detection service or the recognition service, execute the corresponding script reffering to the following table. Indicate the CPU or GPU is used in the start command parameters. - -| task | standard | fast | -| ---- | ----------------- | ------------------- | -| detection | det_web_server.py | det_local_server.py | -| recognition | rec_web_server.py | rec_local_server.py | - -More info can be found in [Paddle Serving](https://github.com/PaddlePaddle/Serving). diff --git a/deploy/pdserving/rec_local_server.py b/deploy/pdserving/rec_local_server.py index ba021c1cd5054071eb115b3e6e9c64cb572ff871..40266e3112d464f8161ad0dc94dfb192b317f62c 100644 --- a/deploy/pdserving/rec_local_server.py +++ b/deploy/pdserving/rec_local_server.py @@ -18,62 +18,158 @@ import cv2 import sys import numpy as np import os -from paddle_serving_client import Client -from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor -from paddle_serving_app.reader import Div, Normalize, Transpose -from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes -if sys.argv[1] == 'gpu': - from paddle_serving_server_gpu.web_service import WebService -elif sys.argv[1] == 'cpu': - from paddle_serving_server.web_service import WebService import time import re import base64 +from tools.infer.predict_rec import TextRecognizer +from params import read_params + +global_args = read_params() + +if global_args.use_gpu: + from paddle_serving_server_gpu.web_service import WebService +else: + from paddle_serving_server.web_service import WebService + + +class TextRecognizerHelper(TextRecognizer): + def __init__(self, args): + super(TextRecognizerHelper, self).__init__(args) + if self.loss_type == "ctc": + self.fetch = ["save_infer_model/scale_0.tmp_0", "save_infer_model/scale_1.tmp_0"] + + def preprocess(self, img_list): + img_num = len(img_list) + args = {} + # Calculate the aspect ratio of all text bars + width_list = [] + for img in img_list: + width_list.append(img.shape[1] / float(img.shape[0])) + indices = np.argsort(np.array(width_list)) + args["indices"] = indices + predict_time = 0 + beg_img_no = 0 + end_img_no = img_num + norm_img_batch = [] + max_wh_ratio = 0 + for ino in range(beg_img_no, end_img_no): + h, w = img_list[indices[ino]].shape[0:2] + wh_ratio = w * 1.0 / h + max_wh_ratio = max(max_wh_ratio, wh_ratio) + for ino in range(beg_img_no, end_img_no): + if self.loss_type != "srn": + norm_img = self.resize_norm_img(img_list[indices[ino]], + max_wh_ratio) + norm_img = norm_img[np.newaxis, :] + norm_img_batch.append(norm_img) + else: + norm_img = self.process_image_srn(img_list[indices[ino]], + self.rec_image_shape, 8, 25, + self.char_ops) + encoder_word_pos_list = [] + gsrm_word_pos_list = [] + gsrm_slf_attn_bias1_list = [] + gsrm_slf_attn_bias2_list = [] + encoder_word_pos_list.append(norm_img[1]) + gsrm_word_pos_list.append(norm_img[2]) + gsrm_slf_attn_bias1_list.append(norm_img[3]) + gsrm_slf_attn_bias2_list.append(norm_img[4]) + norm_img_batch.append(norm_img[0]) + norm_img_batch = np.concatenate(norm_img_batch, axis=0).copy() + feed = {"image": norm_img_batch.copy()} + return feed, self.fetch, args + + def postprocess(self, outputs, args): + if self.loss_type == "ctc": + rec_idx_batch = outputs[0] + predict_batch = outputs[1] + rec_idx_lod = args["save_infer_model/scale_0.tmp_0.lod"] + predict_lod = args["save_infer_model/scale_1.tmp_0.lod"] + indices = args["indices"] + rec_res = [['', 0.0]] * (len(rec_idx_lod) - 1) + for rno in range(len(rec_idx_lod) - 1): + beg = rec_idx_lod[rno] + end = rec_idx_lod[rno + 1] + rec_idx_tmp = rec_idx_batch[beg:end, 0] + preds_text = self.char_ops.decode(rec_idx_tmp) + beg = predict_lod[rno] + end = predict_lod[rno + 1] + probs = predict_batch[beg:end, :] + ind = np.argmax(probs, axis=1) + blank = probs.shape[1] + valid_ind = np.where(ind != (blank - 1))[0] + if len(valid_ind) == 0: + continue + score = np.mean(probs[valid_ind, ind[valid_ind]]) + rec_res[indices[rno]] = [preds_text, score] + elif self.loss_type == 'srn': + char_num = self.char_ops.get_char_num() + preds = rec_idx_batch.reshape(-1) + elapse = time.time() - starttime + predict_time += elapse + total_preds = preds.copy() + for ino in range(int(len(rec_idx_batch) / self.text_len)): + preds = total_preds[ino * self.text_len:(ino + 1) * + self.text_len] + ind = np.argmax(probs, axis=1) + valid_ind = np.where(preds != int(char_num - 1))[0] + if len(valid_ind) == 0: + continue + score = np.mean(probs[valid_ind, ind[valid_ind]]) + preds = preds[:valid_ind[-1] + 1] + preds_text = self.char_ops.decode(preds) + rec_res[indices[ino]] = [preds_text, score] + else: + for rno in range(len(rec_idx_batch)): + end_pos = np.where(rec_idx_batch[rno, :] == 1)[0] + if len(end_pos) <= 1: + preds = rec_idx_batch[rno, 1:] + score = np.mean(predict_batch[rno, 1:]) + else: + preds = rec_idx_batch[rno, 1:end_pos[1]] + score = np.mean(predict_batch[rno, 1:end_pos[1]]) + preds_text = self.char_ops.decode(preds) + rec_res[indices[rno]] = [preds_text, score] + return rec_res class OCRService(WebService): def init_rec(self): self.ocr_reader = OCRReader() + self.text_recognizer = TextRecognizerHelper(global_args) def preprocess(self, feed=[], fetch=[]): + # TODO: to handle batch rec images img_list = [] for feed_data in feed: data = base64.b64decode(feed_data["image"].encode('utf8')) data = np.fromstring(data, np.uint8) im = cv2.imdecode(data, cv2.IMREAD_COLOR) img_list.append(im) - max_wh_ratio = 0 - for i, boximg in enumerate(img_list): - h, w = boximg.shape[0:2] - wh_ratio = w * 1.0 / h - max_wh_ratio = max(max_wh_ratio, wh_ratio) - _, w, h = self.ocr_reader.resize_norm_img(img_list[0], - max_wh_ratio).shape - imgs = np.zeros((len(img_list), 3, w, h)).astype('float32') - for i, img in enumerate(img_list): - norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio) - imgs[i] = norm_img - feed = {"image": imgs.copy()} - fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] + feed, fetch, self.tmp_args = self.text_recognizer.preprocess(img_list) return feed, fetch def postprocess(self, feed={}, fetch=[], fetch_map=None): - rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True) - res_lst = [] - for res in rec_res: - res_lst.append(res[0]) - res = {"res": res_lst} + outputs = [fetch_map[x] for x in self.text_recognizer.fetch] + for x in fetch_map.keys(): + if ".lod" in x: + self.tmp_args[x] = fetch_map[x] + rec_res = self.text_recognizer.postprocess(outputs, self.tmp_args) + res = { + "pred_text": [x[0] for x in rec_res], + "score": [str(x[1]) for x in rec_res] + } return res -ocr_service = OCRService(name="ocr") -ocr_service.load_model_config("ocr_rec_model") -ocr_service.init_rec() -if sys.argv[1] == 'gpu': - ocr_service.set_gpus("0") - ocr_service.prepare_server(workdir="workdir", port=9292, device="gpu", gpuid=0) - ocr_service.run_debugger_service(gpu=True) -elif sys.argv[1] == 'cpu': - ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu") +if __name__ == "__main__": + ocr_service = OCRService(name="ocr") + ocr_service.load_model_config(global_args.rec_model_dir) + ocr_service.init_rec() + if global_args.use_gpu: + ocr_service.prepare_server( + workdir="workdir", port=9292, device="gpu", gpuid=0) + else: + ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu") ocr_service.run_debugger_service() -ocr_service.run_web_service() + ocr_service.run_web_service() diff --git a/deploy/pdserving/rec_rpc_server.py b/deploy/pdserving/rec_rpc_server.py new file mode 100644 index 0000000000000000000000000000000000000000..cb43eb990ffec679aabd5ab1408572529c3a2737 --- /dev/null +++ b/deploy/pdserving/rec_rpc_server.py @@ -0,0 +1,180 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle_serving_client import Client +from paddle_serving_app.reader import OCRReader +import cv2 +import sys +import numpy as np +import os +import time +import re +import base64 +from tools.infer.predict_rec import TextRecognizer +from params import read_params + +global_args = read_params() +if global_args.use_gpu: + from paddle_serving_server_gpu.web_service import WebService +else: + from paddle_serving_server.web_service import WebService + + +class TextRecognizerHelper(TextRecognizer): + def __init__(self, args): + super(TextRecognizerHelper, self).__init__(args) + if self.loss_type == "ctc": + self.fetch = ["save_infer_model/scale_0.tmp_0", "save_infer_model/scale_1.tmp_0"] + + def preprocess(self, img_list): + img_num = len(img_list) + args = {} + # Calculate the aspect ratio of all text bars + width_list = [] + for img in img_list: + width_list.append(img.shape[1] / float(img.shape[0])) + indices = np.argsort(np.array(width_list)) + args["indices"] = indices + predict_time = 0 + beg_img_no = 0 + end_img_no = img_num + norm_img_batch = [] + max_wh_ratio = 0 + for ino in range(beg_img_no, end_img_no): + h, w = img_list[indices[ino]].shape[0:2] + wh_ratio = w * 1.0 / h + max_wh_ratio = max(max_wh_ratio, wh_ratio) + for ino in range(beg_img_no, end_img_no): + if self.loss_type != "srn": + norm_img = self.resize_norm_img(img_list[indices[ino]], + max_wh_ratio) + norm_img = norm_img[np.newaxis, :] + norm_img_batch.append(norm_img) + else: + norm_img = self.process_image_srn(img_list[indices[ino]], + self.rec_image_shape, 8, 25, + self.char_ops) + encoder_word_pos_list = [] + gsrm_word_pos_list = [] + gsrm_slf_attn_bias1_list = [] + gsrm_slf_attn_bias2_list = [] + encoder_word_pos_list.append(norm_img[1]) + gsrm_word_pos_list.append(norm_img[2]) + gsrm_slf_attn_bias1_list.append(norm_img[3]) + gsrm_slf_attn_bias2_list.append(norm_img[4]) + norm_img_batch.append(norm_img[0]) + + norm_img_batch = np.concatenate(norm_img_batch, axis=0) + if img_num > 1: + feed = [{ + "image": norm_img_batch[x] + } for x in range(norm_img_batch.shape[0])] + else: + feed = {"image": norm_img_batch[0]} + return feed, self.fetch, args + + def postprocess(self, outputs, args): + if self.loss_type == "ctc": + rec_idx_batch = outputs[0] + predict_batch = outputs[1] + rec_idx_lod = args["save_infer_model/scale_0.tmp_0.lod"] + predict_lod = args["save_infer_model/scale_1.tmp_0.lod"] + indices = args["indices"] + rec_res = [['', 0.0]] * (len(rec_idx_lod) - 1) + for rno in range(len(rec_idx_lod) - 1): + beg = rec_idx_lod[rno] + end = rec_idx_lod[rno + 1] + rec_idx_tmp = rec_idx_batch[beg:end, 0] + preds_text = self.char_ops.decode(rec_idx_tmp) + beg = predict_lod[rno] + end = predict_lod[rno + 1] + probs = predict_batch[beg:end, :] + ind = np.argmax(probs, axis=1) + blank = probs.shape[1] + valid_ind = np.where(ind != (blank - 1))[0] + if len(valid_ind) == 0: + continue + score = np.mean(probs[valid_ind, ind[valid_ind]]) + rec_res[indices[rno]] = [preds_text, score] + elif self.loss_type == 'srn': + char_num = self.char_ops.get_char_num() + preds = rec_idx_batch.reshape(-1) + elapse = time.time() - starttime + predict_time += elapse + total_preds = preds.copy() + for ino in range(int(len(rec_idx_batch) / self.text_len)): + preds = total_preds[ino * self.text_len:(ino + 1) * + self.text_len] + ind = np.argmax(probs, axis=1) + valid_ind = np.where(preds != int(char_num - 1))[0] + if len(valid_ind) == 0: + continue + score = np.mean(probs[valid_ind, ind[valid_ind]]) + preds = preds[:valid_ind[-1] + 1] + preds_text = self.char_ops.decode(preds) + rec_res[indices[ino]] = [preds_text, score] + else: + for rno in range(len(rec_idx_batch)): + end_pos = np.where(rec_idx_batch[rno, :] == 1)[0] + if len(end_pos) <= 1: + preds = rec_idx_batch[rno, 1:] + score = np.mean(predict_batch[rno, 1:]) + else: + preds = rec_idx_batch[rno, 1:end_pos[1]] + score = np.mean(predict_batch[rno, 1:end_pos[1]]) + preds_text = self.char_ops.decode(preds) + rec_res[indices[rno]] = [preds_text, score] + return rec_res + + +class OCRService(WebService): + def init_rec(self): + self.ocr_reader = OCRReader() + self.text_recognizer = TextRecognizerHelper(global_args) + + def preprocess(self, feed=[], fetch=[]): + # TODO: to handle batch rec images + img_list = [] + for feed_data in feed: + data = base64.b64decode(feed_data["image"].encode('utf8')) + data = np.fromstring(data, np.uint8) + im = cv2.imdecode(data, cv2.IMREAD_COLOR) + img_list.append(im) + feed, fetch, self.tmp_args = self.text_recognizer.preprocess(img_list) + return feed, fetch + + def postprocess(self, feed={}, fetch=[], fetch_map=None): + outputs = [fetch_map[x] for x in self.text_recognizer.fetch] + for x in fetch_map.keys(): + if ".lod" in x: + self.tmp_args[x] = fetch_map[x] + rec_res = self.text_recognizer.postprocess(outputs, self.tmp_args) + res = { + "pred_text": [x[0] for x in rec_res], + "score": [str(x[1]) for x in rec_res] + } + return res + + +if __name__ == "__main__": + ocr_service = OCRService(name="ocr") + ocr_service.load_model_config(global_args.rec_model_dir) + ocr_service.init_rec() + if global_args.use_gpu: + ocr_service.prepare_server( + workdir="workdir", port=9292, device="gpu", gpuid=0) + else: + ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu") + ocr_service.run_rpc_service() + ocr_service.run_web_service() diff --git a/deploy/pdserving/rec_web_client.py b/deploy/pdserving/rec_web_client.py new file mode 100644 index 0000000000000000000000000000000000000000..576e073b837281e84f1c5094aa1ff20037c45427 --- /dev/null +++ b/deploy/pdserving/rec_web_client.py @@ -0,0 +1,39 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -*- coding: utf-8 -*- + +import requests +import json +import cv2 +import base64 +import os, sys +import time + + +def cv2_to_base64(image): + #data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(image).decode( + 'utf8') #data.tostring()).decode('utf8') + + +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:9292/ocr/prediction" +test_img_dir = "../../doc/imgs_words/ch/" +for img_file in os.listdir(test_img_dir): + with open(os.path.join(test_img_dir, img_file), 'rb') as file: + image_data1 = file.read() + image = cv2_to_base64(image_data1) + data = {"feed": [{"image": image}], "fetch": ["res"]} + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) diff --git a/deploy/pdserving/rec_web_server.py b/deploy/pdserving/rec_web_server.py deleted file mode 100644 index 0f4e9f6d264ed602f387bfaf0303cd59af7823fa..0000000000000000000000000000000000000000 --- a/deploy/pdserving/rec_web_server.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle_serving_client import Client -from paddle_serving_app.reader import OCRReader -import cv2 -import sys -import numpy as np -import os -from paddle_serving_client import Client -from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor -from paddle_serving_app.reader import Div, Normalize, Transpose -from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes -if sys.argv[1] == 'gpu': - from paddle_serving_server_gpu.web_service import WebService -elif sys.argv[1] == 'cpu': - from paddle_serving_server.web_service import WebService -import time -import re -import base64 - - -class OCRService(WebService): - def init_rec(self): - self.ocr_reader = OCRReader() - - def preprocess(self, feed=[], fetch=[]): - # TODO: to handle batch rec images - img_list = [] - for feed_data in feed: - data = base64.b64decode(feed_data["image"].encode('utf8')) - data = np.fromstring(data, np.uint8) - im = cv2.imdecode(data, cv2.IMREAD_COLOR) - img_list.append(im) - feed_list = [] - max_wh_ratio = 0 - for i, boximg in enumerate(img_list): - h, w = boximg.shape[0:2] - wh_ratio = w * 1.0 / h - max_wh_ratio = max(max_wh_ratio, wh_ratio) - for img in img_list: - norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio) - feed = {"image": norm_img} - feed_list.append(feed) - fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] - return feed_list, fetch - - def postprocess(self, feed={}, fetch=[], fetch_map=None): - rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True) - res_lst = [] - for res in rec_res: - res_lst.append(res[0]) - res = {"res": res_lst} - return res - - -ocr_service = OCRService(name="ocr") -ocr_service.load_model_config("ocr_rec_model") -ocr_service.init_rec() -if sys.argv[1] == 'gpu': - ocr_service.set_gpus("0") - ocr_service.prepare_server(workdir="workdir", port=9292, device="gpu", gpuid=0) -elif sys.argv[1] == 'cpu': - ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu") -ocr_service.run_rpc_service() -ocr_service.run_web_service() diff --git a/doc/doc_ch/serving_inference.md b/doc/doc_ch/serving_inference.md new file mode 100644 index 0000000000000000000000000000000000000000..f2215bcffb62205726e864170e177f0784d16094 --- /dev/null +++ b/doc/doc_ch/serving_inference.md @@ -0,0 +1,233 @@ +# 使用Paddle Serving预测推理 + +阅读本文档之前,请先阅读文档 [基于Python预测引擎推理](./inference.md) + +同本地执行预测一样,我们需要保存一份可以用于Paddle Serving的模型。 + +接下来首先介绍如何将训练的模型转换成Paddle Serving模型,然后将依次介绍文本检测、文本识别以及两者串联基于预测引擎推理。 + +### 一、 准备环境 +我们先安装Paddle Serving相关组件 +我们推荐用户使用GPU来做Paddle Serving的OCR服务部署 + +**CUDA版本:9.X/10.X** + +**CUDNN版本:7.X** + +**操作系统版本:Linux/Windows** + +**Python版本: 2.7/3.6/3.7** + +**Python操作指南:** +``` +#CPU/GPU版本选择一个 +#GPU版本服务端 +python -m pip install paddle_serving_server_gpu +#CPU版本服务端 +python -m pip install paddle_serving_server +#客户端和App包使用以下链接(CPU,GPU通用) +python -m pip install paddle_serving_app paddle_serving_client +``` + +## 二、训练模型转Serving模型 + +在前序文档 [基于Python预测引擎推理](./inference.md) 中,我们提供了如何把训练的checkpoint转换成Paddle模型。Paddle模型通常由一个文件夹构成,内含模型结构描述文件`model`和模型参数文件`params`。Serving模型由两个文件夹构成,用于存放客户端和服务端的配置。 + +我们以`ch_rec_r34_vd_crnn`模型作为例子,下载链接在: + +``` +wget --no-check-certificate https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_infer.tar +tar xf ch_rec_r34_vd_crnn_infer.tar +``` +因此我们按照Serving模型转换教程,运行下列python文件。 +``` +python tools/inference_to_serving.py --model_dir ch_rec_r34_vd_crnn +``` +最终会在`serving_client_dir`和`serving_server_dir`生成客户端和服务端的模型配置。其中`serving_server_dir`和`serving_client_dir`的名字可以自定义。最终文件结构如下 + +``` +/ch_rec_r34_vd_crnn/ +├── serving_client_dir # 客户端配置文件夹 +└── serving_server_dir # 服务端配置文件夹 +``` + +## 三、文本检测模型Serving推理 + +启动服务可以根据实际需求选择启动`标准版`或者`快速版`,两种方式的对比如下表: + +|版本|特点|适用场景| +|-|-|-| +|标准版|稳定性高,分布式部署|适用于吞吐量大,需要跨机房部署的情况| +|快速版|部署方便,预测速度快|适用于对预测速度要求高,迭代速度快的场景,Windows用户只能选择快速版| + +接下来的命令中,我们会指定快速版和标准版的命令。需要说明的是,标准版只能用Linux平台,快速版可以支持Linux/Windows。 +文本检测模型推理,默认使用DB模型的配置参数,识别默认为CRNN。 + +配置文件在`params.py`中,我们贴出配置部分,如果需要做改动,也在这个文件内部进行修改。 + +``` +def read_params(): + cfg = Config() + #use gpu + cfg.use_gpu = False # 是否使用GPU + cfg.use_pdserving = True # 是否使用paddleserving,必须为True + + #params for text detector + cfg.det_algorithm = "DB" # 检测算法, DB/EAST等 + cfg.det_model_dir = "./det_mv_server/" # 检测算法模型路径 + cfg.det_max_side_len = 960 + + #DB params + cfg.det_db_thresh =0.3 + cfg.det_db_box_thresh =0.5 + cfg.det_db_unclip_ratio =2.0 + + #EAST params + cfg.det_east_score_thresh = 0.8 + cfg.det_east_cover_thresh = 0.1 + cfg.det_east_nms_thresh = 0.2 + + #params for text recognizer + cfg.rec_algorithm = "CRNN" # 识别算法, CRNN/RARE等 + cfg.rec_model_dir = "./ocr_rec_server/" # 识别算法模型路径 + + cfg.rec_image_shape = "3, 32, 320" + cfg.rec_char_type = 'ch' + cfg.rec_batch_num = 30 + cfg.max_text_length = 25 + + cfg.rec_char_dict_path = "./ppocr_keys_v1.txt" # 识别算法字典文件 + cfg.use_space_char = True + + #params for text classifier + cfg.use_angle_cls = True # 是否启用分类算法 + cfg.cls_model_dir = "./ocr_clas_server/" # 分类算法模型路径 + cfg.cls_image_shape = "3, 48, 192" + cfg.label_list = ['0', '180'] + cfg.cls_batch_num = 30 + cfg.cls_thresh = 0.9 + + return cfg +``` +与本地预测不同的是,Serving预测需要一个客户端和一个服务端,因此接下来的教程都是两行代码。 + +在正式执行服务端启动命令之前,先export PYTHONPATH到工程主目录下。 +``` +export PYTHONPATH=$PWD:$PYTHONPATH +cd deploy/pdserving +``` +为了方便用户复现Demo程序,我们提供了Chinese and English ultra-lightweight OCR model (8.1M)版本的Serving模型 +``` +wget --no-check-certificate https://paddleocr.bj.bcebos.com/deploy/pdserving/ocr_pdserving_suite.tar.gz +tar xf ocr_pdserving_suite.tar.gz +``` + +### 1. 超轻量中文检测模型推理 + +超轻量中文检测模型推理,可以执行如下命令启动服务端: + +``` +#根据环境只需要启动其中一个就可以 +python det_rpc_server.py #标准版,Linux用户 +python det_local_server.py #快速版,Windows/Linux用户 +``` + +客户端 + +``` +python det_web_client.py +``` + + +Serving的推测和本地预测不同点在于,客户端发送请求到服务端,服务端需要检测到文字框之后返回框的坐标,此处没有后处理的图片,只能看到坐标值。 + +## 四、文本识别模型Serving推理 + +下面将介绍超轻量中文识别模型推理、基于CTC损失的识别模型推理和基于Attention损失的识别模型推理。对于中文文本识别,建议优先选择基于CTC损失的识别模型,实践中也发现基于Attention损失的效果不如基于CTC损失的识别模型。此外,如果训练时修改了文本的字典,请参考下面的自定义文本识别字典的推理。 + +### 1. 超轻量中文识别模型推理 + +超轻量中文识别模型推理,可以执行如下命令启动服务端: +需要注意params.py中的`--use_gpu`的值 +``` +#根据环境只需要启动其中一个就可以 +python rec_rpc_server.py #标准版,Linux用户 +python rec_local_server.py #快速版,Windows/Linux用户 +``` +如果需要使用CPU版本,还需增加 `--use_gpu False`。 + +客户端 + +``` +python rec_web_client.py +``` + +![](../imgs_words/ch/word_4.jpg) + +执行命令后,上面图像的预测结果(识别的文本和得分)会打印到屏幕上,示例如下: + +``` +{u'result': {u'score': [u'0.89547354'], u'pred_text': ['实力活力']}} +``` + + + +## 五、方向分类模型推理 + +下面将介绍方向分类模型推理。 + + + +### 1. 方向分类模型推理 + +方向分类模型推理, 可以执行如下命令启动服务端: +需要注意params.py中的`--use_gpu`的值 +``` +#根据环境只需要启动其中一个就可以 +python clas_rpc_server.py #标准版,Linux用户 +python clas_local_server.py #快速版,Windows/Linux用户 +``` + +客户端 + +``` +python rec_web_client.py +``` + +![](../imgs_words/ch/word_4.jpg) + +执行命令后,上面图像的预测结果(分类的方向和得分)会打印到屏幕上,示例如下: + +``` +{u'result': {u'direction': [u'0'], u'score': [u'0.9999963']}} +``` + + +## 六、文本检测、方向分类和文字识别串联Serving推理 + +### 1. 超轻量中文OCR模型推理 + +在执行预测时,需要通过参数`image_dir`指定单张图像或者图像集合的路径、参数`det_model_dir`,`cls_model_dir`和`rec_model_dir`分别指定检测,方向分类和识别的inference模型路径。参数`use_angle_cls`用于控制是否启用方向分类模型。与本地预测不同的是,为了减少网络传输耗时,可视化识别结果目前不做处理,用户收到的是推理得到的文字字段。 + +执行如下命令启动服务端: +需要注意params.py中的`--use_gpu`的值 +``` +#标准版,Linux用户 +#GPU用户 +python -m paddle_serving_server_gpu.serve --model det_mv_server --port 9293 --gpu_id 0 +python -m paddle_serving_server_gpu.serve --model ocr_cls_server --port 9294 --gpu_id 0 +python ocr_rpc_server.py +#CPU用户 +python -m paddle_serving_server.serve --model det_mv_server --port 9293 +python -m paddle_serving_server.serve --model ocr_cls_server --port 9294 +python ocr_rpc_server.py + +#快速版,Windows/Linux用户 +python ocr_local_server.py +``` + +客户端 + +``` +python rec_web_client.py +``` diff --git a/ppocr/data/det/__init__.py b/ppocr/data/det/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ppocr/postprocess/__init__.py b/ppocr/postprocess/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tools/__init__.py b/tools/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tools/infer/__init__.py b/tools/infer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tools/infer/predict_cls.py b/tools/infer/predict_cls.py index 3c14011a24cf5afcecc5edd5a54e395a0f171f53..dc4028d0ddcafa8e69cc47c783efe067fda51a0b 100755 --- a/tools/infer/predict_cls.py +++ b/tools/infer/predict_cls.py @@ -33,12 +33,13 @@ from paddle import fluid class TextClassifier(object): def __init__(self, args): - self.predictor, self.input_tensor, self.output_tensors = \ - utility.create_predictor(args, mode="cls") + if args.use_pdserving is False: + self.predictor, self.input_tensor, self.output_tensors = \ + utility.create_predictor(args, mode="cls") + self.use_zero_copy_run = args.use_zero_copy_run self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")] self.cls_batch_num = args.rec_batch_num self.label_list = args.label_list - self.use_zero_copy_run = args.use_zero_copy_run self.cls_thresh = args.cls_thresh def resize_norm_img(self, img): @@ -103,7 +104,6 @@ class TextClassifier(object): label_out = self.output_tensors[1].copy_to_cpu() if len(label_out.shape) != 1: prob_out, label_out = label_out, prob_out - elapse = time.time() - starttime predict_time += elapse for rno in range(len(label_out)): diff --git a/tools/infer/predict_det.py b/tools/infer/predict_det.py index c57986b7590d6ea526097e5c251e9ea7827d36f7..60d67c963a262af8a15cd9cae8b8540090bbedac 100755 --- a/tools/infer/predict_det.py +++ b/tools/infer/predict_det.py @@ -42,7 +42,6 @@ class TextDetector(object): def __init__(self, args): max_side_len = args.det_max_side_len self.det_algorithm = args.det_algorithm - self.use_zero_copy_run = args.use_zero_copy_run preprocess_params = {'max_side_len': max_side_len} postprocess_params = {} if self.det_algorithm == "DB": @@ -75,9 +74,10 @@ class TextDetector(object): else: logger.info("unknown det_algorithm:{}".format(self.det_algorithm)) sys.exit(0) - - self.predictor, self.input_tensor, self.output_tensors =\ - utility.create_predictor(args, mode="det") + if args.use_pdserving is False: + self.use_zero_copy_run = args.use_zero_copy_run + self.predictor, self.input_tensor, self.output_tensors =\ + utility.create_predictor(args, mode="det") def order_points_clockwise(self, pts): """ diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py index 06273e9f9e5b42a9ecc829c435662e9aabcdd224..1989f524c21d519f879290bcc72bc1b282a93772 100755 --- a/tools/infer/predict_rec.py +++ b/tools/infer/predict_rec.py @@ -34,14 +34,15 @@ from ppocr.utils.character import CharacterOps class TextRecognizer(object): def __init__(self, args): - self.predictor, self.input_tensor, self.output_tensors =\ - utility.create_predictor(args, mode="rec") + if args.use_pdserving is False: + self.predictor, self.input_tensor, self.output_tensors =\ + utility.create_predictor(args, mode="rec") + self.use_zero_copy_run = args.use_zero_copy_run self.rec_image_shape = [int(v) for v in args.rec_image_shape.split(",")] self.character_type = args.rec_char_type self.rec_batch_num = args.rec_batch_num self.rec_algorithm = args.rec_algorithm self.text_len = args.max_text_length - self.use_zero_copy_run = args.use_zero_copy_run char_ops_params = { "character_type": args.rec_char_type, "character_dict_path": args.rec_char_dict_path, @@ -320,7 +321,7 @@ def main(args): print(e) logger.info( "ERROR!!!! \n" - "Please read the FAQ:https://github.com/PaddlePaddle/PaddleOCR#faq \n" + "Please read the FAQ: https://github.com/PaddlePaddle/PaddleOCR#faq \n" "If your model has tps module: " "TPS does not support variable shape.\n" "Please set --rec_image_shape='3,32,100' and --rec_char_type='en' ") diff --git a/tools/inference_to_serving.py b/tools/inference_to_serving.py new file mode 100644 index 0000000000000000000000000000000000000000..77af59cc70a2a6e7610579d798f2722c3c63f847 --- /dev/null +++ b/tools/inference_to_serving.py @@ -0,0 +1,29 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +from paddle_serving_client.io import inference_model_to_serving +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_dir", type=str) + parser.add_argument("--server_dir", type=str, default="serving_server_dir") + parser.add_argument("--client_dir", type=str, default="serving_client_dir") + return parser.parse_args() + +args = parse_args() +inference_model_dir = args.model_dir +serving_client_dir = args.server_dir +serving_server_dir = args.client_dir +feed_var_names, fetch_var_names = inference_model_to_serving( + inference_model_dir, serving_client_dir, serving_server_dir, model_filename="model", params_filename="params")