未验证 提交 04c5c6ab 编写于 作者: M MissPenguin 提交者: GitHub

Merge pull request #797 from wangjiawei04/ocr_0923

Paddle Serving OCR 更新
...@@ -18,60 +18,111 @@ import cv2 ...@@ -18,60 +18,111 @@ import cv2
import sys import sys
import numpy as np import numpy as np
import os import os
from paddle_serving_client import Client
from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService
elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService
import time import time
import re import re
import base64 import base64
from tools.infer.predict_cls import TextClassifier
from params import read_params
global_args = read_params()
if global_args.use_gpu:
from paddle_serving_server_gpu.web_service import WebService
else:
from paddle_serving_server.web_service import WebService
class TextClassifierHelper(TextClassifier):
def __init__(self, args):
self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")]
self.cls_batch_num = args.rec_batch_num
self.label_list = args.label_list
self.cls_thresh = args.cls_thresh
self.fetch = [
"save_infer_model/scale_0.tmp_0", "save_infer_model/scale_1.tmp_0"
]
def preprocess(self, img_list):
args = {}
img_num = len(img_list)
args["img_list"] = img_list
# Calculate the aspect ratio of all text bars
width_list = []
for img in img_list:
width_list.append(img.shape[1] / float(img.shape[0]))
# Sorting can speed up the cls process
indices = np.argsort(np.array(width_list))
args["indices"] = indices
cls_res = [['', 0.0]] * img_num
batch_num = self.cls_batch_num
predict_time = 0
beg_img_no, end_img_no = 0, img_num
norm_img_batch = []
max_wh_ratio = 0
for ino in range(beg_img_no, end_img_no):
h, w = img_list[indices[ino]].shape[0:2]
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
for ino in range(beg_img_no, end_img_no):
norm_img = self.resize_norm_img(img_list[indices[ino]])
norm_img = norm_img[np.newaxis, :]
norm_img_batch.append(norm_img)
norm_img_batch = np.concatenate(norm_img_batch)
feed = {"image": norm_img_batch.copy()}
return feed, self.fetch, args
def postprocess(self, outputs, args):
prob_out = outputs[0]
label_out = outputs[1]
indices = args["indices"]
cls_res = [['', 0.0]] * len(label_out)
if len(label_out.shape) != 1:
prob_out, label_out = label_out, prob_out
for rno in range(len(label_out)):
label_idx = label_out[rno]
score = prob_out[rno][label_idx]
label = self.label_list[label_idx]
cls_res[indices[rno]] = [label, score]
if '180' in label and score > self.cls_thresh:
img_list[indices[rno]] = cv2.rotate(img_list[indices[rno]], 1)
return args["img_list"], cls_res
class OCRService(WebService): class OCRService(WebService):
def init_rec(self): def init_rec(self):
self.ocr_reader = OCRReader() self.ocr_reader = OCRReader()
self.text_classifier = TextClassifierHelper(global_args)
def preprocess(self, feed=[], fetch=[]): def preprocess(self, feed=[], fetch=[]):
# TODO: to handle batch rec images
img_list = [] img_list = []
for feed_data in feed: for feed_data in feed:
data = base64.b64decode(feed_data["image"].encode('utf8')) data = base64.b64decode(feed_data["image"].encode('utf8'))
data = np.fromstring(data, np.uint8) data = np.fromstring(data, np.uint8)
im = cv2.imdecode(data, cv2.IMREAD_COLOR) im = cv2.imdecode(data, cv2.IMREAD_COLOR)
img_list.append(im) img_list.append(im)
feed_list = [] feed, fetch, self.tmp_args = self.text_classifier.preprocess(img_list)
max_wh_ratio = 0 return feed, fetch
for i, boximg in enumerate(img_list):
h, w = boximg.shape[0:2]
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
for img in img_list:
norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
feed = {"image": norm_img}
feed_list.append(feed)
fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
return feed_list, fetch
def postprocess(self, feed={}, fetch=[], fetch_map=None): def postprocess(self, feed={}, fetch=[], fetch_map=None):
rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True) outputs = [fetch_map[x] for x in self.text_classifier.fetch]
res_lst = [] for x in fetch_map.keys():
for res in rec_res: if ".lod" in x:
res_lst.append(res[0]) self.tmp_args[x] = fetch_map[x]
res = {"res": res_lst} _, rec_res = self.text_classifier.postprocess(outputs, self.tmp_args)
res = {
"pred_text": [x[0] for x in rec_res],
"score": [str(x[1]) for x in rec_res]
}
return res return res
ocr_service = OCRService(name="ocr") if __name__ == "__main__":
ocr_service.load_model_config("ocr_rec_model") ocr_service = OCRService(name="ocr")
ocr_service.init_rec() ocr_service.load_model_config(global_args.cls_model_dir)
if sys.argv[1] == 'gpu': ocr_service.init_rec()
ocr_service.set_gpus("0") if global_args.use_gpu:
ocr_service.prepare_server(workdir="workdir", port=9292, device="gpu", gpuid=0) ocr_service.prepare_server(
elif sys.argv[1] == 'cpu': workdir="workdir", port=9292, device="gpu", gpuid=0)
else:
ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu") ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu")
ocr_service.run_rpc_service() ocr_service.run_debugger_service()
ocr_service.run_web_service() ocr_service.run_web_service()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle_serving_client import Client
from paddle_serving_app.reader import OCRReader
import cv2
import sys
import numpy as np
import os
import time
import re
import base64
from tools.infer.predict_cls import TextClassifier
from params import read_params
global_args = read_params()
if global_args.use_gpu:
from paddle_serving_server_gpu.web_service import WebService
else:
from paddle_serving_server.web_service import WebService
class TextClassifierHelper(TextClassifier):
def __init__(self, args):
self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")]
self.cls_batch_num = args.rec_batch_num
self.label_list = args.label_list
self.cls_thresh = args.cls_thresh
self.fetch = [
"save_infer_model/scale_0.tmp_0", "save_infer_model/scale_1.tmp_0"
]
def preprocess(self, img_list):
args = {}
img_num = len(img_list)
args["img_list"] = img_list
# Calculate the aspect ratio of all text bars
width_list = []
for img in img_list:
width_list.append(img.shape[1] / float(img.shape[0]))
# Sorting can speed up the cls process
indices = np.argsort(np.array(width_list))
args["indices"] = indices
cls_res = [['', 0.0]] * img_num
batch_num = self.cls_batch_num
predict_time = 0
beg_img_no, end_img_no = 0, img_num
norm_img_batch = []
max_wh_ratio = 0
for ino in range(beg_img_no, end_img_no):
h, w = img_list[indices[ino]].shape[0:2]
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
for ino in range(beg_img_no, end_img_no):
norm_img = self.resize_norm_img(img_list[indices[ino]])
norm_img = norm_img[np.newaxis, :]
norm_img_batch.append(norm_img)
norm_img_batch = np.concatenate(norm_img_batch)
if img_num > 1:
feed = [{
"image": norm_img_batch[x]
} for x in range(norm_img_batch.shape[0])]
else:
feed = {"image": norm_img_batch[0]}
return feed, self.fetch, args
def postprocess(self, outputs, args):
prob_out = outputs[0]
label_out = outputs[1]
indices = args["indices"]
cls_res = [['', 0.0]] * len(label_out)
if len(label_out.shape) != 1:
prob_out, label_out = label_out, prob_out
for rno in range(len(label_out)):
label_idx = label_out[rno]
score = prob_out[rno][label_idx]
label = self.label_list[label_idx]
cls_res[indices[rno]] = [label, score]
if '180' in label and score > self.cls_thresh:
img_list[indices[rno]] = cv2.rotate(img_list[indices[rno]], 1)
return args["img_list"], cls_res
class OCRService(WebService):
def init_rec(self):
self.ocr_reader = OCRReader()
self.text_classifier = TextClassifierHelper(global_args)
def preprocess(self, feed=[], fetch=[]):
# TODO: to handle batch rec images
img_list = []
for feed_data in feed:
data = base64.b64decode(feed_data["image"].encode('utf8'))
data = np.fromstring(data, np.uint8)
im = cv2.imdecode(data, cv2.IMREAD_COLOR)
img_list.append(im)
feed, fetch, self.tmp_args = self.text_classifier.preprocess(img_list)
return feed, fetch
def postprocess(self, feed={}, fetch=[], fetch_map=None):
outputs = [fetch_map[x] for x in self.text_classifier.fetch]
for x in fetch_map.keys():
if ".lod" in x:
self.tmp_args[x] = fetch_map[x]
_, rec_res = self.text_classifier.postprocess(outputs, self.tmp_args)
res = {
"direction": [x[0] for x in rec_res],
"score": [str(x[1]) for x in rec_res]
}
return res
if __name__ == "__main__":
ocr_service = OCRService(name="ocr")
ocr_service.load_model_config(global_args.cls_model_dir)
ocr_service.init_rec()
if global_args.use_gpu:
ocr_service.prepare_server(
workdir="workdir", port=9292, device="gpu", gpuid=0)
else:
ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu")
ocr_service.run_rpc_service()
ocr_service.run_web_service()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -*- coding: utf-8 -*-
import requests
import json
import cv2
import base64
import os, sys
import time
def cv2_to_base64(image):
#data = cv2.imencode('.jpg', image)[1]
return base64.b64encode(image).decode(
'utf8') #data.tostring()).decode('utf8')
headers = {"Content-type": "application/json"}
url = "http://127.0.0.1:9292/ocr/prediction"
test_img_dir = "../../doc/imgs_words/ch/"
for img_file in os.listdir(test_img_dir):
with open(os.path.join(test_img_dir, img_file), 'rb') as file:
image_data1 = file.read()
image = cv2_to_base64(image_data1)
data = {"feed": [{"image": image}], "fetch": ["res"]}
r = requests.post(url=url, headers=headers, data=json.dumps(data))
print(r.json())
...@@ -17,63 +17,91 @@ import cv2 ...@@ -17,63 +17,91 @@ import cv2
import sys import sys
import numpy as np import numpy as np
import os import os
from paddle_serving_client import Client
from paddle_serving_app.reader import Sequential, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes
if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService
elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService
import time import time
import re import re
import base64 import base64
from tools.infer.predict_det import TextDetector
from params import read_params
global_args = read_params()
if global_args.use_gpu:
from paddle_serving_server_gpu.web_service import WebService
else:
from paddle_serving_server.web_service import WebService
class OCRService(WebService): class TextDetectorHelper(TextDetector):
def __init__(self, args):
super(TextDetectorHelper, self).__init__(args)
if self.det_algorithm == "SAST":
self.fetch = [
"bn_f_border4.output.tmp_2", "bn_f_tco4.output.tmp_2",
"bn_f_tvo4.output.tmp_2", "sigmoid_0.tmp_0"
]
elif self.det_algorithm == "EAST":
self.fetch = ["sigmoid_0.tmp_0", "tmp_2"]
elif self.det_algorithm == "DB":
self.fetch = ["save_infer_model/scale_0.tmp_0"]
def preprocess(self, img):
img = img.copy()
im, ratio_list = self.preprocess_op(img)
if im is None:
return None, 0
return {
"image": im.copy()
}, self.fetch, {
"ratio_list": [ratio_list],
"ori_im": img
}
def postprocess(self, outputs, args):
outs_dict = {}
if self.det_algorithm == "EAST":
outs_dict['f_geo'] = outputs[0]
outs_dict['f_score'] = outputs[1]
elif self.det_algorithm == 'SAST':
outs_dict['f_border'] = outputs[0]
outs_dict['f_score'] = outputs[1]
outs_dict['f_tco'] = outputs[2]
outs_dict['f_tvo'] = outputs[3]
else:
outs_dict['maps'] = outputs[0]
dt_boxes_list = self.postprocess_op(outs_dict, args["ratio_list"])
dt_boxes = dt_boxes_list[0]
if self.det_algorithm == "SAST" and self.det_sast_polygon:
dt_boxes = self.filter_tag_det_res_only_clip(dt_boxes,
args["ori_im"].shape)
else:
dt_boxes = self.filter_tag_det_res(dt_boxes, args["ori_im"].shape)
return dt_boxes
class DetService(WebService):
def init_det(self): def init_det(self):
self.det_preprocess = Sequential([ self.text_detector = TextDetectorHelper(global_args)
ResizeByFactor(32, 960), Div(255),
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose(
(2, 0, 1))
])
self.filter_func = FilterBoxes(10, 10)
self.post_func = DBPostProcess({
"thresh": 0.3,
"box_thresh": 0.5,
"max_candidates": 1000,
"unclip_ratio": 1.5,
"min_size": 3
})
def preprocess(self, feed=[], fetch=[]): def preprocess(self, feed=[], fetch=[]):
data = base64.b64decode(feed[0]["image"].encode('utf8')) data = base64.b64decode(feed[0]["image"].encode('utf8'))
data = np.fromstring(data, np.uint8) data = np.fromstring(data, np.uint8)
im = cv2.imdecode(data, cv2.IMREAD_COLOR) im = cv2.imdecode(data, cv2.IMREAD_COLOR)
self.ori_h, self.ori_w, _ = im.shape feed, fetch, self.tmp_args = self.text_detector.preprocess(im)
det_img = self.det_preprocess(im) return feed, fetch
_, self.new_h, self.new_w = det_img.shape
return {"image": det_img[np.newaxis, :].copy()}, ["concat_1.tmp_0"]
def postprocess(self, feed={}, fetch=[], fetch_map=None): def postprocess(self, feed={}, fetch=[], fetch_map=None):
det_out = fetch_map["concat_1.tmp_0"] outputs = [fetch_map[x] for x in fetch]
ratio_list = [ res = self.text_detector.postprocess(outputs, self.tmp_args)
float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w return {"boxes": res.tolist()}
]
dt_boxes_list = self.post_func(det_out, [ratio_list])
dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w])
return {"dt_boxes": dt_boxes.tolist()}
ocr_service = OCRService(name="ocr") if __name__ == "__main__":
ocr_service.load_model_config("ocr_det_model") ocr_service = DetService(name="ocr")
ocr_service.init_det() ocr_service.load_model_config(global_args.det_model_dir)
if sys.argv[1] == 'gpu': ocr_service.init_det()
ocr_service.set_gpus("0") if global_args.use_gpu:
ocr_service.prepare_server(workdir="workdir", port=9292, device="gpu", gpuid=0) ocr_service.prepare_server(
ocr_service.run_debugger_service(gpu=True) workdir="workdir", port=9292, device="gpu", gpuid=0)
elif sys.argv[1] == 'cpu': else:
ocr_service.prepare_server(workdir="workdir", port=9292) ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu")
ocr_service.run_debugger_service() ocr_service.run_debugger_service()
ocr_service.init_det() ocr_service.run_web_service()
ocr_service.run_web_service()
...@@ -17,62 +17,90 @@ import cv2 ...@@ -17,62 +17,90 @@ import cv2
import sys import sys
import numpy as np import numpy as np
import os import os
from paddle_serving_client import Client
from paddle_serving_app.reader import Sequential, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes
if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService
elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService
import time import time
import re import re
import base64 import base64
from tools.infer.predict_det import TextDetector
from params import read_params
global_args = read_params()
if global_args.use_gpu:
from paddle_serving_server_gpu.web_service import WebService
else:
from paddle_serving_server.web_service import WebService
class OCRService(WebService): class TextDetectorHelper(TextDetector):
def __init__(self, args):
super(TextDetectorHelper, self).__init__(args)
if self.det_algorithm == "SAST":
self.fetch = [
"bn_f_border4.output.tmp_2", "bn_f_tco4.output.tmp_2",
"bn_f_tvo4.output.tmp_2", "sigmoid_0.tmp_0"
]
elif self.det_algorithm == "EAST":
self.fetch = ["sigmoid_0.tmp_0", "tmp_2"]
elif self.det_algorithm == "DB":
self.fetch = ["save_infer_model/scale_0.tmp_0"]
def preprocess(self, img):
im, ratio_list = self.preprocess_op(img)
if im is None:
return None, 0
return {
"image": im[0]
}, self.fetch, {
"ratio_list": [ratio_list],
"ori_im": img
}
def postprocess(self, outputs, args):
outs_dict = {}
if self.det_algorithm == "EAST":
outs_dict['f_geo'] = outputs[0]
outs_dict['f_score'] = outputs[1]
elif self.det_algorithm == 'SAST':
outs_dict['f_border'] = outputs[0]
outs_dict['f_score'] = outputs[1]
outs_dict['f_tco'] = outputs[2]
outs_dict['f_tvo'] = outputs[3]
else:
outs_dict['maps'] = outputs[0]
dt_boxes_list = self.postprocess_op(outs_dict, args["ratio_list"])
dt_boxes = dt_boxes_list[0]
if self.det_algorithm == "SAST" and self.det_sast_polygon:
dt_boxes = self.filter_tag_det_res_only_clip(dt_boxes,
args["ori_im"].shape)
else:
dt_boxes = self.filter_tag_det_res(dt_boxes, args["ori_im"].shape)
return dt_boxes
class DetService(WebService):
def init_det(self): def init_det(self):
self.det_preprocess = Sequential([ self.text_detector = TextDetectorHelper(global_args)
ResizeByFactor(32, 960), Div(255),
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose(
(2, 0, 1))
])
self.filter_func = FilterBoxes(10, 10)
self.post_func = DBPostProcess({
"thresh": 0.3,
"box_thresh": 0.5,
"max_candidates": 1000,
"unclip_ratio": 1.5,
"min_size": 3
})
def preprocess(self, feed=[], fetch=[]): def preprocess(self, feed=[], fetch=[]):
data = base64.b64decode(feed[0]["image"].encode('utf8')) data = base64.b64decode(feed[0]["image"].encode('utf8'))
data = np.fromstring(data, np.uint8) data = np.fromstring(data, np.uint8)
im = cv2.imdecode(data, cv2.IMREAD_COLOR) im = cv2.imdecode(data, cv2.IMREAD_COLOR)
self.ori_h, self.ori_w, _ = im.shape feed, fetch, self.tmp_args = self.text_detector.preprocess(im)
det_img = self.det_preprocess(im) return feed, fetch
_, self.new_h, self.new_w = det_img.shape
print(det_img)
return {"image": det_img}, ["concat_1.tmp_0"]
def postprocess(self, feed={}, fetch=[], fetch_map=None): def postprocess(self, feed={}, fetch=[], fetch_map=None):
det_out = fetch_map["concat_1.tmp_0"] outputs = [fetch_map[x] for x in fetch]
ratio_list = [ res = self.text_detector.postprocess(outputs, self.tmp_args)
float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w return {"boxes": res.tolist()}
]
dt_boxes_list = self.post_func(det_out, [ratio_list])
dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w])
return {"dt_boxes": dt_boxes.tolist()}
ocr_service = OCRService(name="ocr") if __name__ == "__main__":
ocr_service.load_model_config("ocr_det_model") ocr_service = DetService(name="ocr")
if sys.argv[1] == 'gpu': ocr_service.load_model_config(global_args.det_model_dir)
ocr_service.set_gpus("0") ocr_service.init_det()
ocr_service.prepare_server(workdir="workdir", port=9292, device="gpu", gpuid=0) if global_args.use_gpu:
elif sys.argv[1] == 'cpu': ocr_service.prepare_server(
workdir="workdir", port=9292, device="gpu", gpuid=0)
else:
ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu") ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu")
ocr_service.init_det() ocr_service.run_rpc_service()
ocr_service.run_rpc_service() ocr_service.run_web_service()
ocr_service.run_web_service()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -*- coding: utf-8 -*-
import requests
import json
import cv2
import base64
import os, sys
import time
def cv2_to_base64(image):
#data = cv2.imencode('.jpg', image)[1]
return base64.b64encode(image).decode(
'utf8') #data.tostring()).decode('utf8')
headers = {"Content-type": "application/json"}
url = "http://127.0.0.1:9292/ocr/prediction"
test_img_dir = "../../doc/imgs/"
for img_file in os.listdir(test_img_dir):
with open(os.path.join(test_img_dir, img_file), 'rb') as file:
image_data1 = file.read()
image = cv2_to_base64(image_data1)
data = {"feed": [{"image": image}], "fetch": ["res"]}
r = requests.post(url=url, headers=headers, data=json.dumps(data))
rjson = r.json()
print(rjson)
...@@ -18,97 +18,104 @@ import cv2 ...@@ -18,97 +18,104 @@ import cv2
import sys import sys
import numpy as np import numpy as np
import os import os
from paddle_serving_client import Client
from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService
elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService
from paddle_serving_app.local_predict import Debugger
import time import time
import re import re
import base64 import base64
from clas_local_server import TextClassifierHelper
from det_local_server import TextDetectorHelper
from rec_local_server import TextRecognizerHelper
from tools.infer.predict_system import TextSystem, sorted_boxes
from paddle_serving_app.local_predict import Debugger
import copy
from params import read_params
global_args = read_params()
class OCRService(WebService): if global_args.use_gpu:
def init_det_debugger(self, det_model_config): from paddle_serving_server_gpu.web_service import WebService
self.det_preprocess = Sequential([ else:
ResizeByFactor(32, 960), Div(255), from paddle_serving_server.web_service import WebService
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose(
(2, 0, 1))
]) class TextSystemHelper(TextSystem):
def __init__(self, args):
self.text_detector = TextDetectorHelper(args)
self.text_recognizer = TextRecognizerHelper(args)
self.use_angle_cls = args.use_angle_cls
if self.use_angle_cls:
self.clas_client = Debugger()
self.clas_client.load_model_config(
global_args.cls_model_dir, gpu=True, profile=False)
self.text_classifier = TextClassifierHelper(args)
self.det_client = Debugger() self.det_client = Debugger()
if sys.argv[1] == 'gpu':
self.det_client.load_model_config(
det_model_config, gpu=True, profile=False)
elif sys.argv[1] == 'cpu':
self.det_client.load_model_config( self.det_client.load_model_config(
det_model_config, gpu=False, profile=False) global_args.det_model_dir, gpu=True, profile=False)
self.ocr_reader = OCRReader() self.fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
def preprocess(self, img):
feed, fetch, self.tmp_args = self.text_detector.preprocess(img)
fetch_map = self.det_client.predict(feed, fetch)
outputs = [fetch_map[x] for x in fetch]
dt_boxes = self.text_detector.postprocess(outputs, self.tmp_args)
if dt_boxes is None:
return None, None
img_crop_list = []
dt_boxes = sorted_boxes(dt_boxes)
for bno in range(len(dt_boxes)):
tmp_box = copy.deepcopy(dt_boxes[bno])
img_crop = self.get_rotate_crop_image(img, tmp_box)
img_crop_list.append(img_crop)
if self.use_angle_cls:
feed, fetch, self.tmp_args = self.text_classifier.preprocess(
img_crop_list)
fetch_map = self.clas_client.predict(feed, fetch)
outputs = [fetch_map[x] for x in self.text_classifier.fetch]
for x in fetch_map.keys():
if ".lod" in x:
self.tmp_args[x] = fetch_map[x]
img_crop_list, _ = self.text_classifier.postprocess(outputs,
self.tmp_args)
feed, fetch, self.tmp_args = self.text_recognizer.preprocess(
img_crop_list)
return feed, self.fetch, self.tmp_args
def postprocess(self, outputs, args):
return self.text_recognizer.postprocess(outputs, args)
class OCRService(WebService):
def init_rec(self):
self.text_system = TextSystemHelper(global_args)
def preprocess(self, feed=[], fetch=[]): def preprocess(self, feed=[], fetch=[]):
# TODO: to handle batch rec images
data = base64.b64decode(feed[0]["image"].encode('utf8')) data = base64.b64decode(feed[0]["image"].encode('utf8'))
data = np.fromstring(data, np.uint8) data = np.fromstring(data, np.uint8)
im = cv2.imdecode(data, cv2.IMREAD_COLOR) im = cv2.imdecode(data, cv2.IMREAD_COLOR)
ori_h, ori_w, _ = im.shape feed, fetch, self.tmp_args = self.text_system.preprocess(im)
det_img = self.det_preprocess(im)
_, new_h, new_w = det_img.shape
det_img = det_img[np.newaxis, :]
det_img = det_img.copy()
det_out = self.det_client.predict(
feed={"image": det_img}, fetch=["concat_1.tmp_0"])
filter_func = FilterBoxes(10, 10)
post_func = DBPostProcess({
"thresh": 0.3,
"box_thresh": 0.5,
"max_candidates": 1000,
"unclip_ratio": 1.5,
"min_size": 3
})
sorted_boxes = SortedBoxes()
ratio_list = [float(new_h) / ori_h, float(new_w) / ori_w]
dt_boxes_list = post_func(det_out["concat_1.tmp_0"], [ratio_list])
dt_boxes = filter_func(dt_boxes_list[0], [ori_h, ori_w])
dt_boxes = sorted_boxes(dt_boxes)
get_rotate_crop_image = GetRotateCropImage()
img_list = []
max_wh_ratio = 0
for i, dtbox in enumerate(dt_boxes):
boximg = get_rotate_crop_image(im, dt_boxes[i])
img_list.append(boximg)
h, w = boximg.shape[0:2]
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
if len(img_list) == 0:
return [], []
_, w, h = self.ocr_reader.resize_norm_img(img_list[0],
max_wh_ratio).shape
imgs = np.zeros((len(img_list), 3, w, h)).astype('float32')
for id, img in enumerate(img_list):
norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
imgs[id] = norm_img
feed = {"image": imgs.copy()}
fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
return feed, fetch return feed, fetch
def postprocess(self, feed={}, fetch=[], fetch_map=None): def postprocess(self, feed={}, fetch=[], fetch_map=None):
rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True) outputs = [fetch_map[x] for x in self.text_system.fetch]
res_lst = [] for x in fetch_map.keys():
for res in rec_res: if ".lod" in x:
res_lst.append(res[0]) self.tmp_args[x] = fetch_map[x]
res = {"res": res_lst} rec_res = self.text_system.postprocess(outputs, self.tmp_args)
res = {
"pred_text": [x[0] for x in rec_res],
"score": [str(x[1]) for x in rec_res]
}
return res return res
ocr_service = OCRService(name="ocr") if __name__ == "__main__":
ocr_service.load_model_config("ocr_rec_model") ocr_service = OCRService(name="ocr")
ocr_service.init_det_debugger(det_model_config="ocr_det_model") ocr_service.load_model_config(global_args.rec_model_dir)
if sys.argv[1] == 'gpu': ocr_service.init_rec()
ocr_service.prepare_server(workdir="workdir", port=9292, device="gpu", gpuid=0) if global_args.use_gpu:
ocr_service.run_debugger_service(gpu=True) ocr_service.prepare_server(
elif sys.argv[1] == 'cpu': workdir="workdir", port=9292, device="gpu", gpuid=0)
else:
ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu") ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu")
ocr_service.run_debugger_service() ocr_service.run_debugger_service()
ocr_service.run_web_service() ocr_service.run_web_service()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle_serving_client import Client
from paddle_serving_app.reader import OCRReader
import cv2
import sys
import numpy as np
import os
import time
import re
import base64
from clas_rpc_server import TextClassifierHelper
from det_rpc_server import TextDetectorHelper
from rec_rpc_server import TextRecognizerHelper
from tools.infer.predict_system import TextSystem, sorted_boxes
import copy
from params import read_params
global_args = read_params()
if global_args.use_gpu:
from paddle_serving_server_gpu.web_service import WebService
else:
from paddle_serving_server.web_service import WebService
class TextSystemHelper(TextSystem):
def __init__(self, args):
self.text_detector = TextDetectorHelper(args)
self.text_recognizer = TextRecognizerHelper(args)
self.use_angle_cls = args.use_angle_cls
if self.use_angle_cls:
self.clas_client = Client()
self.clas_client.load_client_config(
"ocr_clas_client/serving_client_conf.prototxt")
self.clas_client.connect(["127.0.0.1:9294"])
self.text_classifier = TextClassifierHelper(args)
self.det_client = Client()
self.det_client.load_client_config(
"det_db_client/serving_client_conf.prototxt")
self.det_client.connect(["127.0.0.1:9293"])
self.fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
def preprocess(self, img):
feed, fetch, self.tmp_args = self.text_detector.preprocess(img)
fetch_map = self.det_client.predict(feed, fetch)
outputs = [fetch_map[x] for x in fetch]
dt_boxes = self.text_detector.postprocess(outputs, self.tmp_args)
print(dt_boxes)
if dt_boxes is None:
return None, None
img_crop_list = []
dt_boxes = sorted_boxes(dt_boxes)
for bno in range(len(dt_boxes)):
tmp_box = copy.deepcopy(dt_boxes[bno])
img_crop = self.get_rotate_crop_image(img, tmp_box)
img_crop_list.append(img_crop)
if self.use_angle_cls:
feed, fetch, self.tmp_args = self.text_classifier.preprocess(
img_crop_list)
fetch_map = self.clas_client.predict(feed, fetch)
print(fetch_map)
outputs = [fetch_map[x] for x in self.text_classifier.fetch]
for x in fetch_map.keys():
if ".lod" in x:
self.tmp_args[x] = fetch_map[x]
img_crop_list, _ = self.text_classifier.postprocess(outputs,
self.tmp_args)
feed, fetch, self.tmp_args = self.text_recognizer.preprocess(
img_crop_list)
return feed, self.fetch, self.tmp_args
def postprocess(self, outputs, args):
return self.text_recognizer.postprocess(outputs, args)
class OCRService(WebService):
def init_rec(self):
self.text_system = TextSystemHelper(global_args)
def preprocess(self, feed=[], fetch=[]):
# TODO: to handle batch rec images
data = base64.b64decode(feed[0]["image"].encode('utf8'))
data = np.fromstring(data, np.uint8)
im = cv2.imdecode(data, cv2.IMREAD_COLOR)
feed, fetch, self.tmp_args = self.text_system.preprocess(im)
return feed, fetch
def postprocess(self, feed={}, fetch=[], fetch_map=None):
outputs = [fetch_map[x] for x in self.text_system.fetch]
for x in fetch_map.keys():
if ".lod" in x:
self.tmp_args[x] = fetch_map[x]
rec_res = self.text_system.postprocess(outputs, self.tmp_args)
res = {
"pred_text": [x[0] for x in rec_res],
"score": [str(x[1]) for x in rec_res]
}
return res
if __name__ == "__main__":
ocr_service = OCRService(name="ocr")
ocr_service.load_model_config(global_args.rec_model_dir)
ocr_service.init_rec()
if global_args.use_gpu:
ocr_service.prepare_server(
workdir="workdir", port=9292, device="gpu", gpuid=0)
else:
ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu")
ocr_service.run_rpc_service()
ocr_service.run_web_service()
...@@ -20,11 +20,13 @@ import base64 ...@@ -20,11 +20,13 @@ import base64
import os, sys import os, sys
import time import time
def cv2_to_base64(image): def cv2_to_base64(image):
#data = cv2.imencode('.jpg', image)[1] #data = cv2.imencode('.jpg', image)[1]
return base64.b64encode(image).decode( return base64.b64encode(image).decode(
'utf8') #data.tostring()).decode('utf8') 'utf8') #data.tostring()).decode('utf8')
headers = {"Content-type": "application/json"} headers = {"Content-type": "application/json"}
url = "http://127.0.0.1:9292/ocr/prediction" url = "http://127.0.0.1:9292/ocr/prediction"
test_img_dir = "../../doc/imgs/" test_img_dir = "../../doc/imgs/"
...@@ -34,4 +36,5 @@ for img_file in os.listdir(test_img_dir): ...@@ -34,4 +36,5 @@ for img_file in os.listdir(test_img_dir):
image = cv2_to_base64(image_data1) image = cv2_to_base64(image_data1)
data = {"feed": [{"image": image}], "fetch": ["res"]} data = {"feed": [{"image": image}], "fetch": ["res"]}
r = requests.post(url=url, headers=headers, data=json.dumps(data)) r = requests.post(url=url, headers=headers, data=json.dumps(data))
print(r.json()) rjson = r.json()
print(rjson)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle_serving_client import Client
from paddle_serving_app.reader import OCRReader
import cv2
import sys
import numpy as np
import os
from paddle_serving_client import Client
from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService
elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService
import time
import re
import base64
class OCRService(WebService):
def init_det_client(self, det_port, det_client_config):
self.det_preprocess = Sequential([
ResizeByFactor(32, 960), Div(255),
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose(
(2, 0, 1))
])
self.det_client = Client()
self.det_client.load_client_config(det_client_config)
self.det_client.connect(["127.0.0.1:{}".format(det_port)])
self.ocr_reader = OCRReader()
def preprocess(self, feed=[], fetch=[]):
data = base64.b64decode(feed[0]["image"].encode('utf8'))
data = np.fromstring(data, np.uint8)
im = cv2.imdecode(data, cv2.IMREAD_COLOR)
ori_h, ori_w, _ = im.shape
det_img = self.det_preprocess(im)
det_out = self.det_client.predict(
feed={"image": det_img}, fetch=["concat_1.tmp_0"])
_, new_h, new_w = det_img.shape
filter_func = FilterBoxes(10, 10)
post_func = DBPostProcess({
"thresh": 0.3,
"box_thresh": 0.5,
"max_candidates": 1000,
"unclip_ratio": 1.5,
"min_size": 3
})
sorted_boxes = SortedBoxes()
ratio_list = [float(new_h) / ori_h, float(new_w) / ori_w]
dt_boxes_list = post_func(det_out["concat_1.tmp_0"], [ratio_list])
dt_boxes = filter_func(dt_boxes_list[0], [ori_h, ori_w])
dt_boxes = sorted_boxes(dt_boxes)
get_rotate_crop_image = GetRotateCropImage()
feed_list = []
img_list = []
max_wh_ratio = 0
for i, dtbox in enumerate(dt_boxes):
boximg = get_rotate_crop_image(im, dt_boxes[i])
img_list.append(boximg)
h, w = boximg.shape[0:2]
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
for img in img_list:
norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
feed = {"image": norm_img}
feed_list.append(feed)
fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
return feed_list, fetch
def postprocess(self, feed={}, fetch=[], fetch_map=None):
rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True)
res_lst = []
for res in rec_res:
res_lst.append(res[0])
res = {"res": res_lst}
return res
ocr_service = OCRService(name="ocr")
ocr_service.load_model_config("ocr_rec_model")
if sys.argv[1] == 'gpu':
ocr_service.set_gpus("0")
ocr_service.prepare_server(workdir="workdir", port=9292, device="gpu", gpuid=0)
elif sys.argv[1] == 'cpu':
ocr_service.prepare_server(workdir="workdir", port=9292)
ocr_service.init_det_client(
det_port=9293,
det_client_config="ocr_det_client/serving_client_conf.prototxt")
ocr_service.run_rpc_service()
ocr_service.run_web_service()
# -*- coding:utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
class Config(object):
pass
def read_params():
cfg = Config()
#use gpu
cfg.use_gpu = False
cfg.use_pdserving = True
#params for text detector
cfg.det_algorithm = "DB"
cfg.det_model_dir = "./det_infer_server/"
cfg.det_max_side_len = 960
#DB parmas
cfg.det_db_thresh =0.3
cfg.det_db_box_thresh =0.5
cfg.det_db_unclip_ratio =2.0
#EAST parmas
cfg.det_east_score_thresh = 0.8
cfg.det_east_cover_thresh = 0.1
cfg.det_east_nms_thresh = 0.2
#params for text recognizer
cfg.rec_algorithm = "CRNN"
cfg.rec_model_dir = "./rec_infer_server/"
cfg.rec_image_shape = "3, 32, 320"
cfg.rec_char_type = 'ch'
cfg.rec_batch_num = 30
cfg.max_text_length = 25
cfg.rec_char_dict_path = "./ppocr_keys_v1.txt"
cfg.use_space_char = True
#params for text classifier
cfg.use_angle_cls = True
cfg.cls_model_dir = "./cls_infer_server/"
cfg.cls_image_shape = "3, 48, 192"
cfg.label_list = ['0', '180']
cfg.cls_batch_num = 30
cfg.cls_thresh = 0.9
return cfg
[English](readme_en.md) | 简体中文
PaddleOCR提供2种服务部署方式:
- 基于PaddleHub Serving的部署:代码路径为"`./deploy/hubserving`",使用方法参考[文档](../hubserving/readme.md)
- 基于PaddleServing的部署:代码路径为"`./deploy/pdserving`",按照本教程使用。
# Paddle Serving 服务部署
本教程将介绍基于[Paddle Serving](https://github.com/PaddlePaddle/Serving)部署PaddleOCR在线预测服务的详细步骤。
## 快速启动服务
### 1. 准备环境
我们先安装Paddle Serving相关组件
我们推荐用户使用GPU来做Paddle Serving的OCR服务部署
**CUDA版本:9.0**
**CUDNN版本:7.0**
**操作系统版本:CentOS 6以上**
**Python版本: 2.7/3.6/3.7**
**Python操作指南:**
```
#CPU/GPU版本选择一个
#GPU版本服务端
python -m pip install paddle_serving_server_gpu
#CPU版本服务端
python -m pip install paddle_serving_server
#客户端和App包使用以下链接(CPU,GPU通用)
python -m pip install paddle_serving_app paddle_serving_client
```
### 2. 模型转换
可以使用`paddle_serving_app`提供的模型,执行下列命令
```
python -m paddle_serving_app.package --get_model ocr_rec
tar -xzvf ocr_rec.tar.gz
python -m paddle_serving_app.package --get_model ocr_det
tar -xzvf ocr_det.tar.gz
```
执行上述命令会下载`db_crnn_mobile`的模型,如果想要下载规模更大的`db_crnn_server`模型,可以在下载预测模型并解压之后。参考[如何从Paddle保存的预测模型转为Paddle Serving格式可部署的模型](https://github.com/PaddlePaddle/Serving/blob/develop/doc/INFERENCE_TO_SERVING_CN.md)
我们以`ch_rec_r34_vd_crnn`模型作为例子,下载链接在:
```
wget --no-check-certificate https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_infer.tar
tar xf ch_rec_r34_vd_crnn_infer.tar
```
因此我们按照Serving模型转换教程,运行下列python文件。
```
from paddle_serving_client.io import inference_model_to_serving
inference_model_dir = "ch_rec_r34_vd_crnn"
serving_client_dir = "serving_client_dir"
serving_server_dir = "serving_server_dir"
feed_var_names, fetch_var_names = inference_model_to_serving(
inference_model_dir, serving_client_dir, serving_server_dir, model_filename="model", params_filename="params")
```
最终会在`serving_client_dir``serving_server_dir`生成客户端和服务端的模型配置。
### 3. 启动服务
启动服务可以根据实际需求选择启动`标准版`或者`快速版`,两种方式的对比如下表:
|版本|特点|适用场景|
|-|-|-|
|标准版|稳定性高,分布式部署|适用于吞吐量大,需要跨机房部署的情况|
|快速版|部署方便,预测速度快|适用于对预测速度要求高,迭代速度快的场景|
#### 方式1. 启动标准版服务
```
# cpu,gpu启动二选一,以下是cpu启动
python -m paddle_serving_server.serve --model ocr_det_model --port 9293
python ocr_web_server.py cpu
# gpu启动
python -m paddle_serving_server_gpu.serve --model ocr_det_model --port 9293 --gpu_id 0
python ocr_web_server.py gpu
```
#### 方式2. 启动快速版服务
```
# cpu,gpu启动二选一,以下是cpu启动
python ocr_local_server.py cpu
# gpu启动
python ocr_local_server.py gpu
```
## 发送预测请求
```
python ocr_web_client.py
```
## 返回结果格式说明
返回结果是json格式
```
{u'result': {u'res': [u'\u571f\u5730\u6574\u6cbb\u4e0e\u571f\u58e4\u4fee\u590d\u7814\u7a76\u4e2d\u5fc3', u'\u534e\u5357\u519c\u4e1a\u5927\u5b661\u7d20\u56fe']}}
```
我们也可以打印结果json串中`res`字段的每一句话
```
土地整治与土壤修复研究中心
华南农业大学1素图
```
## 自定义修改服务逻辑
`ocr_web_server.py`或是`ocr_local_server.py`当中的`preprocess`函数里面做了检测服务和识别服务的前处理,`postprocess`函数里面做了识别的后处理服务,可以在相应的函数中做修改。调用了`paddle_serving_app`库提供的常见CV模型的前处理/后处理库。
如果想要单独启动Paddle Serving的检测服务和识别服务,参见下列表格, 执行对应的脚本即可,并且在命令行参数注明用的CPU或是GPU来提供服务。
| 模型 | 标准版 | 快速版 |
| ---- | ----------------- | ------------------- |
| 检测 | det_web_server.py | det_local_server.py |
| 识别 | rec_web_server.py | rec_local_server.py |
更多信息参见[Paddle Serving](https://github.com/PaddlePaddle/Serving)
English | [简体中文](readme.md)
PaddleOCR provides 2 service deployment methods:
- Based on **PaddleHub Serving**: Code path is "`./deploy/hubserving`". Please refer to the [tutorial](../hubserving/readme_en.md) for usage.
- Based on **PaddleServing**: Code path is "`./deploy/pdserving`". Please follow this tutorial.
# Service deployment based on Paddle Serving
This tutorial will introduce the detail steps of deploying PaddleOCR online prediction service based on [Paddle Serving](https://github.com/PaddlePaddle/Serving).
## Quick start service
### 1. Prepare the environment
Let's first install the relevant components of Paddle Serving. GPU is recommended for service deployment with Paddle Serving.
**Requirements:**
- **CUDA version: 9.0**
- **CUDNN version: 7.0**
- **Operating system version: >= CentOS 6**
- **Python version: 2.7/3.6/3.7**
**Installation:**
```
# install GPU server
python -m pip install paddle_serving_server_gpu
# or, install CPU server
python -m pip install paddle_serving_server
# install client and App package (CPU/GPU)
python -m pip install paddle_serving_app paddle_serving_client
```
### 2. Model transformation
You can directly use converted model provided by `paddle_serving_app` for convenience. Execute the following command to obtain:
```
python -m paddle_serving_app.package --get_model ocr_rec
tar -xzvf ocr_rec.tar.gz
python -m paddle_serving_app.package --get_model ocr_det
tar -xzvf ocr_det.tar.gz
```
Executing the above command will download the `db_crnn_mobile` model, which is in different format with inference model. If you want to use other models for deployment, you can refer to the [tutorial](https://github.com/PaddlePaddle/Serving/blob/develop/doc/INFERENCE_TO_SERVING_CN.md) to convert your inference model to a model which is deployable for Paddle Serving.
We take `ch_rec_r34_vd_crnn` model as example. Download the inference model by executing the following command:
```
wget --no-check-certificate https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_infer.tar
tar xf ch_rec_r34_vd_crnn_infer.tar
```
Convert the downloaded model by executing the following python script:
```
from paddle_serving_client.io import inference_model_to_serving
inference_model_dir = "ch_rec_r34_vd_crnn"
serving_client_dir = "serving_client_dir"
serving_server_dir = "serving_server_dir"
feed_var_names, fetch_var_names = inference_model_to_serving(
inference_model_dir, serving_client_dir, serving_server_dir, model_filename="model", params_filename="params")
```
Finally, model configuration of client and server will be generated in `serving_client_dir` and `serving_server_dir`.
### 3. Start service
Start the standard version or the fast version service according to your actual needs. The comparison of the two versions is shown in the table below:
|version|characteristics|recommended scenarios|
|-|-|-|
|standard version|High stability, suitable for distributed deployment|Large throughput and cross regional deployment|
|fast version|Easy to deploy and fast to predict|Suitable for scenarios which requires high prediction speed and fast iteration speed|
#### Mode 1. Start the standard mode service
```
# start with CPU
python -m paddle_serving_server.serve --model ocr_det_model --port 9293
python ocr_web_server.py cpu
# or, with GPU
python -m paddle_serving_server_gpu.serve --model ocr_det_model --port 9293 --gpu_id 0
python ocr_web_server.py gpu
```
#### Mode 2. Start the fast mode service
```
# start with CPU
python ocr_local_server.py cpu
# or, with GPU
python ocr_local_server.py gpu
```
## Send prediction requests
```
python ocr_web_client.py
```
## Returned result format
The returned result is a JSON string, eg.
```
{u'result': {u'res': [u'\u571f\u5730\u6574\u6cbb\u4e0e\u571f\u58e4\u4fee\u590d\u7814\u7a76\u4e2d\u5fc3', u'\u534e\u5357\u519c\u4e1a\u5927\u5b661\u7d20\u56fe']}}
```
You can also print the readable result in `res`:
```
土地整治与土壤修复研究中心
华南农业大学1素图
```
## User defined service module modification
The pre-processing and post-processing process, can be found in the `preprocess` and `postprocess` function in `ocr_web_server.py` or `ocr_local_server.py`. The pre-processing/post-processing library for common CV models provided by `paddle_serving_app` is called.
You can modify the corresponding code as actual needs.
If you only want to start the detection service or the recognition service, execute the corresponding script reffering to the following table. Indicate the CPU or GPU is used in the start command parameters.
| task | standard | fast |
| ---- | ----------------- | ------------------- |
| detection | det_web_server.py | det_local_server.py |
| recognition | rec_web_server.py | rec_local_server.py |
More info can be found in [Paddle Serving](https://github.com/PaddlePaddle/Serving).
...@@ -18,62 +18,158 @@ import cv2 ...@@ -18,62 +18,158 @@ import cv2
import sys import sys
import numpy as np import numpy as np
import os import os
from paddle_serving_client import Client
from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService
elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService
import time import time
import re import re
import base64 import base64
from tools.infer.predict_rec import TextRecognizer
from params import read_params
global_args = read_params()
if global_args.use_gpu:
from paddle_serving_server_gpu.web_service import WebService
else:
from paddle_serving_server.web_service import WebService
class TextRecognizerHelper(TextRecognizer):
def __init__(self, args):
super(TextRecognizerHelper, self).__init__(args)
if self.loss_type == "ctc":
self.fetch = ["save_infer_model/scale_0.tmp_0", "save_infer_model/scale_1.tmp_0"]
def preprocess(self, img_list):
img_num = len(img_list)
args = {}
# Calculate the aspect ratio of all text bars
width_list = []
for img in img_list:
width_list.append(img.shape[1] / float(img.shape[0]))
indices = np.argsort(np.array(width_list))
args["indices"] = indices
predict_time = 0
beg_img_no = 0
end_img_no = img_num
norm_img_batch = []
max_wh_ratio = 0
for ino in range(beg_img_no, end_img_no):
h, w = img_list[indices[ino]].shape[0:2]
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
for ino in range(beg_img_no, end_img_no):
if self.loss_type != "srn":
norm_img = self.resize_norm_img(img_list[indices[ino]],
max_wh_ratio)
norm_img = norm_img[np.newaxis, :]
norm_img_batch.append(norm_img)
else:
norm_img = self.process_image_srn(img_list[indices[ino]],
self.rec_image_shape, 8, 25,
self.char_ops)
encoder_word_pos_list = []
gsrm_word_pos_list = []
gsrm_slf_attn_bias1_list = []
gsrm_slf_attn_bias2_list = []
encoder_word_pos_list.append(norm_img[1])
gsrm_word_pos_list.append(norm_img[2])
gsrm_slf_attn_bias1_list.append(norm_img[3])
gsrm_slf_attn_bias2_list.append(norm_img[4])
norm_img_batch.append(norm_img[0])
norm_img_batch = np.concatenate(norm_img_batch, axis=0).copy()
feed = {"image": norm_img_batch.copy()}
return feed, self.fetch, args
def postprocess(self, outputs, args):
if self.loss_type == "ctc":
rec_idx_batch = outputs[0]
predict_batch = outputs[1]
rec_idx_lod = args["save_infer_model/scale_0.tmp_0.lod"]
predict_lod = args["save_infer_model/scale_1.tmp_0.lod"]
indices = args["indices"]
rec_res = [['', 0.0]] * (len(rec_idx_lod) - 1)
for rno in range(len(rec_idx_lod) - 1):
beg = rec_idx_lod[rno]
end = rec_idx_lod[rno + 1]
rec_idx_tmp = rec_idx_batch[beg:end, 0]
preds_text = self.char_ops.decode(rec_idx_tmp)
beg = predict_lod[rno]
end = predict_lod[rno + 1]
probs = predict_batch[beg:end, :]
ind = np.argmax(probs, axis=1)
blank = probs.shape[1]
valid_ind = np.where(ind != (blank - 1))[0]
if len(valid_ind) == 0:
continue
score = np.mean(probs[valid_ind, ind[valid_ind]])
rec_res[indices[rno]] = [preds_text, score]
elif self.loss_type == 'srn':
char_num = self.char_ops.get_char_num()
preds = rec_idx_batch.reshape(-1)
elapse = time.time() - starttime
predict_time += elapse
total_preds = preds.copy()
for ino in range(int(len(rec_idx_batch) / self.text_len)):
preds = total_preds[ino * self.text_len:(ino + 1) *
self.text_len]
ind = np.argmax(probs, axis=1)
valid_ind = np.where(preds != int(char_num - 1))[0]
if len(valid_ind) == 0:
continue
score = np.mean(probs[valid_ind, ind[valid_ind]])
preds = preds[:valid_ind[-1] + 1]
preds_text = self.char_ops.decode(preds)
rec_res[indices[ino]] = [preds_text, score]
else:
for rno in range(len(rec_idx_batch)):
end_pos = np.where(rec_idx_batch[rno, :] == 1)[0]
if len(end_pos) <= 1:
preds = rec_idx_batch[rno, 1:]
score = np.mean(predict_batch[rno, 1:])
else:
preds = rec_idx_batch[rno, 1:end_pos[1]]
score = np.mean(predict_batch[rno, 1:end_pos[1]])
preds_text = self.char_ops.decode(preds)
rec_res[indices[rno]] = [preds_text, score]
return rec_res
class OCRService(WebService): class OCRService(WebService):
def init_rec(self): def init_rec(self):
self.ocr_reader = OCRReader() self.ocr_reader = OCRReader()
self.text_recognizer = TextRecognizerHelper(global_args)
def preprocess(self, feed=[], fetch=[]): def preprocess(self, feed=[], fetch=[]):
# TODO: to handle batch rec images
img_list = [] img_list = []
for feed_data in feed: for feed_data in feed:
data = base64.b64decode(feed_data["image"].encode('utf8')) data = base64.b64decode(feed_data["image"].encode('utf8'))
data = np.fromstring(data, np.uint8) data = np.fromstring(data, np.uint8)
im = cv2.imdecode(data, cv2.IMREAD_COLOR) im = cv2.imdecode(data, cv2.IMREAD_COLOR)
img_list.append(im) img_list.append(im)
max_wh_ratio = 0 feed, fetch, self.tmp_args = self.text_recognizer.preprocess(img_list)
for i, boximg in enumerate(img_list):
h, w = boximg.shape[0:2]
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
_, w, h = self.ocr_reader.resize_norm_img(img_list[0],
max_wh_ratio).shape
imgs = np.zeros((len(img_list), 3, w, h)).astype('float32')
for i, img in enumerate(img_list):
norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
imgs[i] = norm_img
feed = {"image": imgs.copy()}
fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
return feed, fetch return feed, fetch
def postprocess(self, feed={}, fetch=[], fetch_map=None): def postprocess(self, feed={}, fetch=[], fetch_map=None):
rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True) outputs = [fetch_map[x] for x in self.text_recognizer.fetch]
res_lst = [] for x in fetch_map.keys():
for res in rec_res: if ".lod" in x:
res_lst.append(res[0]) self.tmp_args[x] = fetch_map[x]
res = {"res": res_lst} rec_res = self.text_recognizer.postprocess(outputs, self.tmp_args)
res = {
"pred_text": [x[0] for x in rec_res],
"score": [str(x[1]) for x in rec_res]
}
return res return res
ocr_service = OCRService(name="ocr") if __name__ == "__main__":
ocr_service.load_model_config("ocr_rec_model") ocr_service = OCRService(name="ocr")
ocr_service.init_rec() ocr_service.load_model_config(global_args.rec_model_dir)
if sys.argv[1] == 'gpu': ocr_service.init_rec()
ocr_service.set_gpus("0") if global_args.use_gpu:
ocr_service.prepare_server(workdir="workdir", port=9292, device="gpu", gpuid=0) ocr_service.prepare_server(
ocr_service.run_debugger_service(gpu=True) workdir="workdir", port=9292, device="gpu", gpuid=0)
elif sys.argv[1] == 'cpu': else:
ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu") ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu")
ocr_service.run_debugger_service() ocr_service.run_debugger_service()
ocr_service.run_web_service() ocr_service.run_web_service()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle_serving_client import Client
from paddle_serving_app.reader import OCRReader
import cv2
import sys
import numpy as np
import os
import time
import re
import base64
from tools.infer.predict_rec import TextRecognizer
from params import read_params
global_args = read_params()
if global_args.use_gpu:
from paddle_serving_server_gpu.web_service import WebService
else:
from paddle_serving_server.web_service import WebService
class TextRecognizerHelper(TextRecognizer):
def __init__(self, args):
super(TextRecognizerHelper, self).__init__(args)
if self.loss_type == "ctc":
self.fetch = ["save_infer_model/scale_0.tmp_0", "save_infer_model/scale_1.tmp_0"]
def preprocess(self, img_list):
img_num = len(img_list)
args = {}
# Calculate the aspect ratio of all text bars
width_list = []
for img in img_list:
width_list.append(img.shape[1] / float(img.shape[0]))
indices = np.argsort(np.array(width_list))
args["indices"] = indices
predict_time = 0
beg_img_no = 0
end_img_no = img_num
norm_img_batch = []
max_wh_ratio = 0
for ino in range(beg_img_no, end_img_no):
h, w = img_list[indices[ino]].shape[0:2]
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
for ino in range(beg_img_no, end_img_no):
if self.loss_type != "srn":
norm_img = self.resize_norm_img(img_list[indices[ino]],
max_wh_ratio)
norm_img = norm_img[np.newaxis, :]
norm_img_batch.append(norm_img)
else:
norm_img = self.process_image_srn(img_list[indices[ino]],
self.rec_image_shape, 8, 25,
self.char_ops)
encoder_word_pos_list = []
gsrm_word_pos_list = []
gsrm_slf_attn_bias1_list = []
gsrm_slf_attn_bias2_list = []
encoder_word_pos_list.append(norm_img[1])
gsrm_word_pos_list.append(norm_img[2])
gsrm_slf_attn_bias1_list.append(norm_img[3])
gsrm_slf_attn_bias2_list.append(norm_img[4])
norm_img_batch.append(norm_img[0])
norm_img_batch = np.concatenate(norm_img_batch, axis=0)
if img_num > 1:
feed = [{
"image": norm_img_batch[x]
} for x in range(norm_img_batch.shape[0])]
else:
feed = {"image": norm_img_batch[0]}
return feed, self.fetch, args
def postprocess(self, outputs, args):
if self.loss_type == "ctc":
rec_idx_batch = outputs[0]
predict_batch = outputs[1]
rec_idx_lod = args["save_infer_model/scale_0.tmp_0.lod"]
predict_lod = args["save_infer_model/scale_1.tmp_0.lod"]
indices = args["indices"]
rec_res = [['', 0.0]] * (len(rec_idx_lod) - 1)
for rno in range(len(rec_idx_lod) - 1):
beg = rec_idx_lod[rno]
end = rec_idx_lod[rno + 1]
rec_idx_tmp = rec_idx_batch[beg:end, 0]
preds_text = self.char_ops.decode(rec_idx_tmp)
beg = predict_lod[rno]
end = predict_lod[rno + 1]
probs = predict_batch[beg:end, :]
ind = np.argmax(probs, axis=1)
blank = probs.shape[1]
valid_ind = np.where(ind != (blank - 1))[0]
if len(valid_ind) == 0:
continue
score = np.mean(probs[valid_ind, ind[valid_ind]])
rec_res[indices[rno]] = [preds_text, score]
elif self.loss_type == 'srn':
char_num = self.char_ops.get_char_num()
preds = rec_idx_batch.reshape(-1)
elapse = time.time() - starttime
predict_time += elapse
total_preds = preds.copy()
for ino in range(int(len(rec_idx_batch) / self.text_len)):
preds = total_preds[ino * self.text_len:(ino + 1) *
self.text_len]
ind = np.argmax(probs, axis=1)
valid_ind = np.where(preds != int(char_num - 1))[0]
if len(valid_ind) == 0:
continue
score = np.mean(probs[valid_ind, ind[valid_ind]])
preds = preds[:valid_ind[-1] + 1]
preds_text = self.char_ops.decode(preds)
rec_res[indices[ino]] = [preds_text, score]
else:
for rno in range(len(rec_idx_batch)):
end_pos = np.where(rec_idx_batch[rno, :] == 1)[0]
if len(end_pos) <= 1:
preds = rec_idx_batch[rno, 1:]
score = np.mean(predict_batch[rno, 1:])
else:
preds = rec_idx_batch[rno, 1:end_pos[1]]
score = np.mean(predict_batch[rno, 1:end_pos[1]])
preds_text = self.char_ops.decode(preds)
rec_res[indices[rno]] = [preds_text, score]
return rec_res
class OCRService(WebService):
def init_rec(self):
self.ocr_reader = OCRReader()
self.text_recognizer = TextRecognizerHelper(global_args)
def preprocess(self, feed=[], fetch=[]):
# TODO: to handle batch rec images
img_list = []
for feed_data in feed:
data = base64.b64decode(feed_data["image"].encode('utf8'))
data = np.fromstring(data, np.uint8)
im = cv2.imdecode(data, cv2.IMREAD_COLOR)
img_list.append(im)
feed, fetch, self.tmp_args = self.text_recognizer.preprocess(img_list)
return feed, fetch
def postprocess(self, feed={}, fetch=[], fetch_map=None):
outputs = [fetch_map[x] for x in self.text_recognizer.fetch]
for x in fetch_map.keys():
if ".lod" in x:
self.tmp_args[x] = fetch_map[x]
rec_res = self.text_recognizer.postprocess(outputs, self.tmp_args)
res = {
"pred_text": [x[0] for x in rec_res],
"score": [str(x[1]) for x in rec_res]
}
return res
if __name__ == "__main__":
ocr_service = OCRService(name="ocr")
ocr_service.load_model_config(global_args.rec_model_dir)
ocr_service.init_rec()
if global_args.use_gpu:
ocr_service.prepare_server(
workdir="workdir", port=9292, device="gpu", gpuid=0)
else:
ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu")
ocr_service.run_rpc_service()
ocr_service.run_web_service()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -*- coding: utf-8 -*-
import requests
import json
import cv2
import base64
import os, sys
import time
def cv2_to_base64(image):
#data = cv2.imencode('.jpg', image)[1]
return base64.b64encode(image).decode(
'utf8') #data.tostring()).decode('utf8')
headers = {"Content-type": "application/json"}
url = "http://127.0.0.1:9292/ocr/prediction"
test_img_dir = "../../doc/imgs_words/ch/"
for img_file in os.listdir(test_img_dir):
with open(os.path.join(test_img_dir, img_file), 'rb') as file:
image_data1 = file.read()
image = cv2_to_base64(image_data1)
data = {"feed": [{"image": image}], "fetch": ["res"]}
r = requests.post(url=url, headers=headers, data=json.dumps(data))
print(r.json())
# 使用Paddle Serving预测推理
阅读本文档之前,请先阅读文档 [基于Python预测引擎推理](./inference.md)
同本地执行预测一样,我们需要保存一份可以用于Paddle Serving的模型。
接下来首先介绍如何将训练的模型转换成Paddle Serving模型,然后将依次介绍文本检测、文本识别以及两者串联基于预测引擎推理。
### 一、 准备环境
我们先安装Paddle Serving相关组件
我们推荐用户使用GPU来做Paddle Serving的OCR服务部署
**CUDA版本:9.X/10.X**
**CUDNN版本:7.X**
**操作系统版本:Linux/Windows**
**Python版本: 2.7/3.6/3.7**
**Python操作指南:**
```
#CPU/GPU版本选择一个
#GPU版本服务端
python -m pip install paddle_serving_server_gpu
#CPU版本服务端
python -m pip install paddle_serving_server
#客户端和App包使用以下链接(CPU,GPU通用)
python -m pip install paddle_serving_app paddle_serving_client
```
## 二、训练模型转Serving模型
在前序文档 [基于Python预测引擎推理](./inference.md) 中,我们提供了如何把训练的checkpoint转换成Paddle模型。Paddle模型通常由一个文件夹构成,内含模型结构描述文件`model`和模型参数文件`params`。Serving模型由两个文件夹构成,用于存放客户端和服务端的配置。
我们以`ch_rec_r34_vd_crnn`模型作为例子,下载链接在:
```
wget --no-check-certificate https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_infer.tar
tar xf ch_rec_r34_vd_crnn_infer.tar
```
因此我们按照Serving模型转换教程,运行下列python文件。
```
python tools/inference_to_serving.py --model_dir ch_rec_r34_vd_crnn
```
最终会在`serving_client_dir``serving_server_dir`生成客户端和服务端的模型配置。其中`serving_server_dir``serving_client_dir`的名字可以自定义。最终文件结构如下
```
/ch_rec_r34_vd_crnn/
├── serving_client_dir # 客户端配置文件夹
└── serving_server_dir # 服务端配置文件夹
```
## 三、文本检测模型Serving推理
启动服务可以根据实际需求选择启动`标准版`或者`快速版`,两种方式的对比如下表:
|版本|特点|适用场景|
|-|-|-|
|标准版|稳定性高,分布式部署|适用于吞吐量大,需要跨机房部署的情况|
|快速版|部署方便,预测速度快|适用于对预测速度要求高,迭代速度快的场景,Windows用户只能选择快速版|
接下来的命令中,我们会指定快速版和标准版的命令。需要说明的是,标准版只能用Linux平台,快速版可以支持Linux/Windows。
文本检测模型推理,默认使用DB模型的配置参数,识别默认为CRNN。
配置文件在`params.py`中,我们贴出配置部分,如果需要做改动,也在这个文件内部进行修改。
```
def read_params():
cfg = Config()
#use gpu
cfg.use_gpu = False # 是否使用GPU
cfg.use_pdserving = True # 是否使用paddleserving,必须为True
#params for text detector
cfg.det_algorithm = "DB" # 检测算法, DB/EAST等
cfg.det_model_dir = "./det_mv_server/" # 检测算法模型路径
cfg.det_max_side_len = 960
#DB params
cfg.det_db_thresh =0.3
cfg.det_db_box_thresh =0.5
cfg.det_db_unclip_ratio =2.0
#EAST params
cfg.det_east_score_thresh = 0.8
cfg.det_east_cover_thresh = 0.1
cfg.det_east_nms_thresh = 0.2
#params for text recognizer
cfg.rec_algorithm = "CRNN" # 识别算法, CRNN/RARE等
cfg.rec_model_dir = "./ocr_rec_server/" # 识别算法模型路径
cfg.rec_image_shape = "3, 32, 320"
cfg.rec_char_type = 'ch'
cfg.rec_batch_num = 30
cfg.max_text_length = 25
cfg.rec_char_dict_path = "./ppocr_keys_v1.txt" # 识别算法字典文件
cfg.use_space_char = True
#params for text classifier
cfg.use_angle_cls = True # 是否启用分类算法
cfg.cls_model_dir = "./ocr_clas_server/" # 分类算法模型路径
cfg.cls_image_shape = "3, 48, 192"
cfg.label_list = ['0', '180']
cfg.cls_batch_num = 30
cfg.cls_thresh = 0.9
return cfg
```
与本地预测不同的是,Serving预测需要一个客户端和一个服务端,因此接下来的教程都是两行代码。
在正式执行服务端启动命令之前,先export PYTHONPATH到工程主目录下。
```
export PYTHONPATH=$PWD:$PYTHONPATH
cd deploy/pdserving
```
为了方便用户复现Demo程序,我们提供了Chinese and English ultra-lightweight OCR model (8.1M)版本的Serving模型
```
wget --no-check-certificate https://paddleocr.bj.bcebos.com/deploy/pdserving/ocr_pdserving_suite.tar.gz
tar xf ocr_pdserving_suite.tar.gz
```
### 1. 超轻量中文检测模型推理
超轻量中文检测模型推理,可以执行如下命令启动服务端:
```
#根据环境只需要启动其中一个就可以
python det_rpc_server.py #标准版,Linux用户
python det_local_server.py #快速版,Windows/Linux用户
```
客户端
```
python det_web_client.py
```
Serving的推测和本地预测不同点在于,客户端发送请求到服务端,服务端需要检测到文字框之后返回框的坐标,此处没有后处理的图片,只能看到坐标值。
## 四、文本识别模型Serving推理
下面将介绍超轻量中文识别模型推理、基于CTC损失的识别模型推理和基于Attention损失的识别模型推理。对于中文文本识别,建议优先选择基于CTC损失的识别模型,实践中也发现基于Attention损失的效果不如基于CTC损失的识别模型。此外,如果训练时修改了文本的字典,请参考下面的自定义文本识别字典的推理。
### 1. 超轻量中文识别模型推理
超轻量中文识别模型推理,可以执行如下命令启动服务端:
需要注意params.py中的`--use_gpu`的值
```
#根据环境只需要启动其中一个就可以
python rec_rpc_server.py #标准版,Linux用户
python rec_local_server.py #快速版,Windows/Linux用户
```
如果需要使用CPU版本,还需增加 `--use_gpu False`
客户端
```
python rec_web_client.py
```
![](../imgs_words/ch/word_4.jpg)
执行命令后,上面图像的预测结果(识别的文本和得分)会打印到屏幕上,示例如下:
```
{u'result': {u'score': [u'0.89547354'], u'pred_text': ['实力活力']}}
```
## 五、方向分类模型推理
下面将介绍方向分类模型推理。
### 1. 方向分类模型推理
方向分类模型推理, 可以执行如下命令启动服务端:
需要注意params.py中的`--use_gpu`的值
```
#根据环境只需要启动其中一个就可以
python clas_rpc_server.py #标准版,Linux用户
python clas_local_server.py #快速版,Windows/Linux用户
```
客户端
```
python rec_web_client.py
```
![](../imgs_words/ch/word_4.jpg)
执行命令后,上面图像的预测结果(分类的方向和得分)会打印到屏幕上,示例如下:
```
{u'result': {u'direction': [u'0'], u'score': [u'0.9999963']}}
```
## 六、文本检测、方向分类和文字识别串联Serving推理
### 1. 超轻量中文OCR模型推理
在执行预测时,需要通过参数`image_dir`指定单张图像或者图像集合的路径、参数`det_model_dir`,`cls_model_dir``rec_model_dir`分别指定检测,方向分类和识别的inference模型路径。参数`use_angle_cls`用于控制是否启用方向分类模型。与本地预测不同的是,为了减少网络传输耗时,可视化识别结果目前不做处理,用户收到的是推理得到的文字字段。
执行如下命令启动服务端:
需要注意params.py中的`--use_gpu`的值
```
#标准版,Linux用户
#GPU用户
python -m paddle_serving_server_gpu.serve --model det_mv_server --port 9293 --gpu_id 0
python -m paddle_serving_server_gpu.serve --model ocr_cls_server --port 9294 --gpu_id 0
python ocr_rpc_server.py
#CPU用户
python -m paddle_serving_server.serve --model det_mv_server --port 9293
python -m paddle_serving_server.serve --model ocr_cls_server --port 9294
python ocr_rpc_server.py
#快速版,Windows/Linux用户
python ocr_local_server.py
```
客户端
```
python rec_web_client.py
```
...@@ -33,12 +33,13 @@ from paddle import fluid ...@@ -33,12 +33,13 @@ from paddle import fluid
class TextClassifier(object): class TextClassifier(object):
def __init__(self, args): def __init__(self, args):
if args.use_pdserving is False:
self.predictor, self.input_tensor, self.output_tensors = \ self.predictor, self.input_tensor, self.output_tensors = \
utility.create_predictor(args, mode="cls") utility.create_predictor(args, mode="cls")
self.use_zero_copy_run = args.use_zero_copy_run
self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")] self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")]
self.cls_batch_num = args.rec_batch_num self.cls_batch_num = args.rec_batch_num
self.label_list = args.label_list self.label_list = args.label_list
self.use_zero_copy_run = args.use_zero_copy_run
self.cls_thresh = args.cls_thresh self.cls_thresh = args.cls_thresh
def resize_norm_img(self, img): def resize_norm_img(self, img):
...@@ -103,7 +104,6 @@ class TextClassifier(object): ...@@ -103,7 +104,6 @@ class TextClassifier(object):
label_out = self.output_tensors[1].copy_to_cpu() label_out = self.output_tensors[1].copy_to_cpu()
if len(label_out.shape) != 1: if len(label_out.shape) != 1:
prob_out, label_out = label_out, prob_out prob_out, label_out = label_out, prob_out
elapse = time.time() - starttime elapse = time.time() - starttime
predict_time += elapse predict_time += elapse
for rno in range(len(label_out)): for rno in range(len(label_out)):
......
...@@ -42,7 +42,6 @@ class TextDetector(object): ...@@ -42,7 +42,6 @@ class TextDetector(object):
def __init__(self, args): def __init__(self, args):
max_side_len = args.det_max_side_len max_side_len = args.det_max_side_len
self.det_algorithm = args.det_algorithm self.det_algorithm = args.det_algorithm
self.use_zero_copy_run = args.use_zero_copy_run
preprocess_params = {'max_side_len': max_side_len} preprocess_params = {'max_side_len': max_side_len}
postprocess_params = {} postprocess_params = {}
if self.det_algorithm == "DB": if self.det_algorithm == "DB":
...@@ -75,7 +74,8 @@ class TextDetector(object): ...@@ -75,7 +74,8 @@ class TextDetector(object):
else: else:
logger.info("unknown det_algorithm:{}".format(self.det_algorithm)) logger.info("unknown det_algorithm:{}".format(self.det_algorithm))
sys.exit(0) sys.exit(0)
if args.use_pdserving is False:
self.use_zero_copy_run = args.use_zero_copy_run
self.predictor, self.input_tensor, self.output_tensors =\ self.predictor, self.input_tensor, self.output_tensors =\
utility.create_predictor(args, mode="det") utility.create_predictor(args, mode="det")
......
...@@ -34,14 +34,15 @@ from ppocr.utils.character import CharacterOps ...@@ -34,14 +34,15 @@ from ppocr.utils.character import CharacterOps
class TextRecognizer(object): class TextRecognizer(object):
def __init__(self, args): def __init__(self, args):
if args.use_pdserving is False:
self.predictor, self.input_tensor, self.output_tensors =\ self.predictor, self.input_tensor, self.output_tensors =\
utility.create_predictor(args, mode="rec") utility.create_predictor(args, mode="rec")
self.use_zero_copy_run = args.use_zero_copy_run
self.rec_image_shape = [int(v) for v in args.rec_image_shape.split(",")] self.rec_image_shape = [int(v) for v in args.rec_image_shape.split(",")]
self.character_type = args.rec_char_type self.character_type = args.rec_char_type
self.rec_batch_num = args.rec_batch_num self.rec_batch_num = args.rec_batch_num
self.rec_algorithm = args.rec_algorithm self.rec_algorithm = args.rec_algorithm
self.text_len = args.max_text_length self.text_len = args.max_text_length
self.use_zero_copy_run = args.use_zero_copy_run
char_ops_params = { char_ops_params = {
"character_type": args.rec_char_type, "character_type": args.rec_char_type,
"character_dict_path": args.rec_char_dict_path, "character_dict_path": args.rec_char_dict_path,
...@@ -320,7 +321,7 @@ def main(args): ...@@ -320,7 +321,7 @@ def main(args):
print(e) print(e)
logger.info( logger.info(
"ERROR!!!! \n" "ERROR!!!! \n"
"Please read the FAQhttps://github.com/PaddlePaddle/PaddleOCR#faq \n" "Please read the FAQ: https://github.com/PaddlePaddle/PaddleOCR#faq \n"
"If your model has tps module: " "If your model has tps module: "
"TPS does not support variable shape.\n" "TPS does not support variable shape.\n"
"Please set --rec_image_shape='3,32,100' and --rec_char_type='en' ") "Please set --rec_image_shape='3,32,100' and --rec_char_type='en' ")
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from paddle_serving_client.io import inference_model_to_serving
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--model_dir", type=str)
parser.add_argument("--server_dir", type=str, default="serving_server_dir")
parser.add_argument("--client_dir", type=str, default="serving_client_dir")
return parser.parse_args()
args = parse_args()
inference_model_dir = args.model_dir
serving_client_dir = args.server_dir
serving_server_dir = args.client_dir
feed_var_names, fetch_var_names = inference_model_to_serving(
inference_model_dir, serving_client_dir, serving_server_dir, model_filename="model", params_filename="params")
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册