diff --git a/deploy/paddle2onnx/predict_cls.py b/deploy/paddle2onnx/predict_cls.py deleted file mode 100644 index ea70e0f84f3973ae162a5081482126b82d4a80d4..0000000000000000000000000000000000000000 --- a/deploy/paddle2onnx/predict_cls.py +++ /dev/null @@ -1,149 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.append(os.path.abspath(os.path.join(__dir__, '../..'))) - -os.environ["FLAGS_allocator_strategy"] = 'auto_growth' - -import cv2 -import copy -import numpy as np -import math -import time -import traceback - -import utility -from ppocr.postprocess import build_post_process -from ppocr.utils.logging import get_logger -from ppocr.utils.utility import get_image_file_list, check_and_read_gif - -logger = get_logger() - - -class TextClassifier(object): - def __init__(self, args): - self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")] - self.cls_batch_num = args.cls_batch_num - self.cls_thresh = args.cls_thresh - postprocess_params = { - 'name': 'ClsPostProcess', - "label_list": args.label_list, - } - self.postprocess_op = build_post_process(postprocess_params) - self.predictor, self.input_tensor, self.output_tensors = \ - utility.create_predictor(args, 'cls', logger) - - def resize_norm_img(self, img): - imgC, imgH, imgW = self.cls_image_shape - h = img.shape[0] - w = img.shape[1] - ratio = w / float(h) - if math.ceil(imgH * ratio) > imgW: - resized_w = imgW - else: - resized_w = int(math.ceil(imgH * ratio)) - resized_image = cv2.resize(img, (resized_w, imgH)) - resized_image = resized_image.astype('float32') - if self.cls_image_shape[0] == 1: - resized_image = resized_image / 255 - resized_image = resized_image[np.newaxis, :] - else: - resized_image = resized_image.transpose((2, 0, 1)) / 255 - resized_image -= 0.5 - resized_image /= 0.5 - padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) - padding_im[:, :, 0:resized_w] = resized_image - return padding_im - - def __call__(self, img_list): - img_list = copy.deepcopy(img_list) - img_num = len(img_list) - # Calculate the aspect ratio of all text bars - width_list = [] - for img in img_list: - width_list.append(img.shape[1] / float(img.shape[0])) - # Sorting can speed up the cls process - indices = np.argsort(np.array(width_list)) - - cls_res = [['', 0.0]] * img_num - batch_num = self.cls_batch_num - elapse = 0 - for beg_img_no in range(0, img_num, batch_num): - end_img_no = min(img_num, beg_img_no + batch_num) - norm_img_batch = [] - max_wh_ratio = 0 - for ino in range(beg_img_no, end_img_no): - h, w = img_list[indices[ino]].shape[0:2] - wh_ratio = w * 1.0 / h - max_wh_ratio = max(max_wh_ratio, wh_ratio) - for ino in range(beg_img_no, end_img_no): - norm_img = self.resize_norm_img(img_list[indices[ino]]) - norm_img = norm_img[np.newaxis, :] - norm_img_batch.append(norm_img) - norm_img_batch = np.concatenate(norm_img_batch) - norm_img_batch = norm_img_batch.copy() - starttime = time.time() - input_dict = {} - input_dict[self.input_tensor.name] = norm_img_batch - outputs = self.predictor.run(self.output_tensors, input_dict) - prob_out = outputs[0] - cls_result = self.postprocess_op(prob_out) - elapse += time.time() - starttime - for rno in range(len(cls_result)): - label, score = cls_result[rno] - cls_res[indices[beg_img_no + rno]] = [label, score] - if '180' in label and score > self.cls_thresh: - img_list[indices[beg_img_no + rno]] = cv2.rotate( - img_list[indices[beg_img_no + rno]], 1) - return img_list, cls_res, elapse - - -def main(args): - image_file_list = get_image_file_list(args.image_dir) - text_classifier = TextClassifier(args) - valid_image_file_list = [] - img_list = [] - for image_file in image_file_list: - img, flag = check_and_read_gif(image_file) - if not flag: - img = cv2.imread(image_file) - if img is None: - logger.info("error in loading image:{}".format(image_file)) - continue - valid_image_file_list.append(image_file) - img_list.append(img) - try: - img_list, cls_res, predict_time = text_classifier(img_list) - except: - logger.info(traceback.format_exc()) - logger.info( - "ERROR!!!! \n" - "Please read the FAQ:https://github.com/PaddlePaddle/PaddleOCR#faq \n" - "If your model has tps module: " - "TPS does not support variable shape.\n" - "Please set --rec_image_shape='3,32,100' and --rec_char_type='en' ") - exit() - for ino in range(len(img_list)): - logger.info("Predicts of {}:{}".format(valid_image_file_list[ino], - cls_res[ino])) - logger.info("Total predict time for {} images, cost: {:.3f}".format( - len(img_list), predict_time)) - - -if __name__ == "__main__": - main(utility.parse_args()) diff --git a/deploy/paddle2onnx/predict_det.py b/deploy/paddle2onnx/predict_det.py deleted file mode 100644 index 48ab0ec75a82dee944a9e59bd7419eea40e20973..0000000000000000000000000000000000000000 --- a/deploy/paddle2onnx/predict_det.py +++ /dev/null @@ -1,223 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.append(os.path.abspath(os.path.join(__dir__, '../..'))) - -os.environ["FLAGS_allocator_strategy"] = 'auto_growth' - -import cv2 -import numpy as np -import time -import sys - -import utility as utility -from ppocr.utils.logging import get_logger -from ppocr.utils.utility import get_image_file_list, check_and_read_gif -from ppocr.data import create_operators, transform -from ppocr.postprocess import build_post_process - -logger = get_logger() - - -class TextDetector(object): - def __init__(self, args): - self.args = args - self.det_algorithm = args.det_algorithm - pre_process_list = [{ - 'DetResizeForTest': { - 'image_shape': [640, 640] - } - }, { - 'NormalizeImage': { - 'std': [0.229, 0.224, 0.225], - 'mean': [0.485, 0.456, 0.406], - 'scale': '1./255.', - 'order': 'hwc' - } - }, { - 'ToCHWImage': None - }, { - 'KeepKeys': { - 'keep_keys': ['image', 'shape'] - } - }] - postprocess_params = {} - if self.det_algorithm == "DB": - postprocess_params['name'] = 'DBPostProcess' - postprocess_params["thresh"] = args.det_db_thresh - postprocess_params["box_thresh"] = args.det_db_box_thresh - postprocess_params["max_candidates"] = 1000 - postprocess_params["unclip_ratio"] = args.det_db_unclip_ratio - postprocess_params["use_dilation"] = True - elif self.det_algorithm == "EAST": - postprocess_params['name'] = 'EASTPostProcess' - postprocess_params["score_thresh"] = args.det_east_score_thresh - postprocess_params["cover_thresh"] = args.det_east_cover_thresh - postprocess_params["nms_thresh"] = args.det_east_nms_thresh - elif self.det_algorithm == "SAST": - pre_process_list[0] = { - 'DetResizeForTest': { - 'resize_long': args.det_limit_side_len - } - } - postprocess_params['name'] = 'SASTPostProcess' - postprocess_params["score_thresh"] = args.det_sast_score_thresh - postprocess_params["nms_thresh"] = args.det_sast_nms_thresh - self.det_sast_polygon = args.det_sast_polygon - if self.det_sast_polygon: - postprocess_params["sample_pts_num"] = 6 - postprocess_params["expand_scale"] = 1.2 - postprocess_params["shrink_ratio_of_width"] = 0.2 - else: - postprocess_params["sample_pts_num"] = 2 - postprocess_params["expand_scale"] = 1.0 - postprocess_params["shrink_ratio_of_width"] = 0.3 - else: - logger.info("unknown det_algorithm:{}".format(self.det_algorithm)) - sys.exit(0) - - self.preprocess_op = create_operators(pre_process_list) - self.postprocess_op = build_post_process(postprocess_params) - print() - self.predictor, self.input_tensor, self.output_tensors = utility.create_predictor( - args, 'det', logger) # paddle.jit.load(args.det_model_dir) - # self.predictor.eval() - - def order_points_clockwise(self, pts): - """ - reference from: https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py - # sort the points based on their x-coordinates - """ - xSorted = pts[np.argsort(pts[:, 0]), :] - - # grab the left-most and right-most points from the sorted - # x-roodinate points - leftMost = xSorted[:2, :] - rightMost = xSorted[2:, :] - - # now, sort the left-most coordinates according to their - # y-coordinates so we can grab the top-left and bottom-left - # points, respectively - leftMost = leftMost[np.argsort(leftMost[:, 1]), :] - (tl, bl) = leftMost - - rightMost = rightMost[np.argsort(rightMost[:, 1]), :] - (tr, br) = rightMost - - rect = np.array([tl, tr, br, bl], dtype="float32") - return rect - - def clip_det_res(self, points, img_height, img_width): - for pno in range(points.shape[0]): - points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1)) - points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1)) - return points - - def filter_tag_det_res(self, dt_boxes, image_shape): - img_height, img_width = image_shape[0:2] - dt_boxes_new = [] - for box in dt_boxes: - box = self.order_points_clockwise(box) - box = self.clip_det_res(box, img_height, img_width) - rect_width = int(np.linalg.norm(box[0] - box[1])) - rect_height = int(np.linalg.norm(box[0] - box[3])) - if rect_width <= 3 or rect_height <= 3: - continue - dt_boxes_new.append(box) - dt_boxes = np.array(dt_boxes_new) - return dt_boxes - - def filter_tag_det_res_only_clip(self, dt_boxes, image_shape): - img_height, img_width = image_shape[0:2] - dt_boxes_new = [] - for box in dt_boxes: - box = self.clip_det_res(box, img_height, img_width) - dt_boxes_new.append(box) - dt_boxes = np.array(dt_boxes_new) - return dt_boxes - - def __call__(self, img): - ori_im = img.copy() - data = {'image': img} - data = transform(data, self.preprocess_op) - img, shape_list = data - if img is None: - return None, 0 - img = np.expand_dims(img, axis=0) - shape_list = np.expand_dims(shape_list, axis=0) - img = img.copy() - starttime = time.time() - - input_dict = {} - input_dict[self.input_tensor.name] = img - - outputs = self.predictor.run(self.output_tensors, input_dict) - - preds = {} - if self.det_algorithm == "EAST": - preds['f_geo'] = outputs[0] - preds['f_score'] = outputs[1] - elif self.det_algorithm == 'SAST': - preds['f_border'] = outputs[0] - preds['f_score'] = outputs[1] - preds['f_tco'] = outputs[2] - preds['f_tvo'] = outputs[3] - elif self.det_algorithm == 'DB': - preds['maps'] = outputs[0] - else: - raise NotImplementedError - - post_result = self.postprocess_op(preds, shape_list) - dt_boxes = post_result[0]['points'] - if self.det_algorithm == "SAST" and self.det_sast_polygon: - dt_boxes = self.filter_tag_det_res_only_clip(dt_boxes, ori_im.shape) - else: - dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape) - elapse = time.time() - starttime - return dt_boxes, elapse - - -if __name__ == "__main__": - args = utility.parse_args() - image_file_list = get_image_file_list(args.image_dir) - text_detector = TextDetector(args) - count = 0 - total_time = 0 - draw_img_save = "./inference_results" - if not os.path.exists(draw_img_save): - os.makedirs(draw_img_save) - for image_file in image_file_list: - img, flag = check_and_read_gif(image_file) - if not flag: - img = cv2.imread(image_file) - if img is None: - logger.info("error in loading image:{}".format(image_file)) - continue - dt_boxes, elapse = text_detector(img) - if count > 0: - total_time += elapse - count += 1 - logger.info("Predict time of {}: {}".format(image_file, elapse)) - src_im = utility.draw_text_det_res(dt_boxes, image_file) - img_name_pure = os.path.split(image_file)[-1] - img_path = os.path.join(draw_img_save, - "det_res_{}".format(img_name_pure)) - cv2.imwrite(img_path, src_im) - logger.info("The visualized image saved in {}".format(img_path)) - if count > 1: - logger.info("Avg Time: {}".format(total_time / (count - 1))) diff --git a/deploy/paddle2onnx/predict_rec.py b/deploy/paddle2onnx/predict_rec.py deleted file mode 100644 index 648cca2c90dbf10979f1a17583455b86885e2e69..0000000000000000000000000000000000000000 --- a/deploy/paddle2onnx/predict_rec.py +++ /dev/null @@ -1,148 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.append(os.path.abspath(os.path.join(__dir__, '../..'))) - -os.environ["FLAGS_allocator_strategy"] = 'auto_growth' - -import cv2 -import numpy as np -import math -import time -import traceback - -import utility -from ppocr.postprocess import build_post_process -from ppocr.utils.logging import get_logger -from ppocr.utils.utility import get_image_file_list, check_and_read_gif - -logger = get_logger() - - -class TextRecognizer(object): - def __init__(self, args): - self.rec_image_shape = [int(v) for v in args.rec_image_shape.split(",")] - self.character_type = args.rec_char_type - self.rec_batch_num = args.rec_batch_num - self.rec_algorithm = args.rec_algorithm - postprocess_params = { - 'name': 'CTCLabelDecode', - "character_type": args.rec_char_type, - "character_dict_path": args.rec_char_dict_path, - "use_space_char": args.use_space_char - } - self.postprocess_op = build_post_process(postprocess_params) - self.predictor, self.input_tensor, self.output_tensors = \ - utility.create_predictor(args, 'rec', logger) - - def resize_norm_img(self, img, max_wh_ratio): - imgC, imgH, imgW = self.rec_image_shape - assert imgC == img.shape[2] - # if self.character_type == "ch": - # imgW = int((32 * max_wh_ratio)) - h, w = img.shape[:2] - ratio = w / float(h) - if math.ceil(imgH * ratio) > imgW: - resized_w = imgW - else: - resized_w = int(math.ceil(imgH * ratio)) - resized_image = cv2.resize(img, (resized_w, imgH)) - resized_image = resized_image.astype('float32') - resized_image = resized_image.transpose((2, 0, 1)) / 255 - resized_image -= 0.5 - resized_image /= 0.5 - padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) - padding_im[:, :, 0:resized_w] = resized_image - return padding_im - - def __call__(self, img_list): - img_num = len(img_list) - # Calculate the aspect ratio of all text bars - width_list = [] - for img in img_list: - width_list.append(img.shape[1] / float(img.shape[0])) - # Sorting can speed up the recognition process - indices = np.argsort(np.array(width_list)) - - # rec_res = [] - rec_res = [['', 0.0]] * img_num - batch_num = self.rec_batch_num - elapse = 0 - for beg_img_no in range(0, img_num, batch_num): - end_img_no = min(img_num, beg_img_no + batch_num) - norm_img_batch = [] - max_wh_ratio = 0 - for ino in range(beg_img_no, end_img_no): - # h, w = img_list[ino].shape[0:2] - h, w = img_list[indices[ino]].shape[0:2] - wh_ratio = w * 1.0 / h - max_wh_ratio = max(max_wh_ratio, wh_ratio) - for ino in range(beg_img_no, end_img_no): - # norm_img = self.resize_norm_img(img_list[ino], max_wh_ratio) - norm_img = self.resize_norm_img(img_list[indices[ino]], - max_wh_ratio) - norm_img = norm_img[np.newaxis, :] - norm_img_batch.append(norm_img) - norm_img_batch = np.concatenate(norm_img_batch) - norm_img_batch = norm_img_batch.copy() - starttime = time.time() - input_dict = {} - input_dict[self.input_tensor.name] = norm_img_batch - outputs = self.predictor.run(self.output_tensors, input_dict) - preds = outputs[0] - rec_result = self.postprocess_op(preds) - for rno in range(len(rec_result)): - rec_res[indices[beg_img_no + rno]] = rec_result[rno] - elapse += time.time() - starttime - return rec_res, elapse - - -def main(args): - image_file_list = get_image_file_list(args.image_dir) - text_recognizer = TextRecognizer(args) - valid_image_file_list = [] - img_list = [] - for image_file in image_file_list: - img, flag = check_and_read_gif(image_file) - if not flag: - img = cv2.imread(image_file) - if img is None: - logger.info("error in loading image:{}".format(image_file)) - continue - valid_image_file_list.append(image_file) - img_list.append(img) - try: - rec_res, predict_time = text_recognizer(img_list) - except: - logger.info(traceback.format_exc()) - logger.info( - "ERROR!!!! \n" - "Please read the FAQ:https://github.com/PaddlePaddle/PaddleOCR#faq \n" - "If your model has tps module: " - "TPS does not support variable shape.\n" - "Please set --rec_image_shape='3,32,100' and --rec_char_type='en' ") - exit() - for ino in range(len(img_list)): - logger.info("Predicts of {}:{}".format(valid_image_file_list[ino], - rec_res[ino])) - logger.info("Total predict time for {} images, cost: {:.3f}".format( - len(img_list), predict_time)) - - -if __name__ == "__main__": - main(utility.parse_args()) diff --git a/deploy/paddle2onnx/predict_system.py b/deploy/paddle2onnx/predict_system.py deleted file mode 100644 index bebb47269f943ed5bb8bacd68212bbebacb01a11..0000000000000000000000000000000000000000 --- a/deploy/paddle2onnx/predict_system.py +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.append(os.path.abspath(os.path.join(__dir__, '../..'))) - -os.environ["FLAGS_allocator_strategy"] = 'auto_growth' - -import cv2 -import copy -import numpy as np -import time -from PIL import Image -import utility as utility -import predict_rec as predict_rec -import predict_det as predict_det -import predict_cls as predict_cls -from ppocr.utils.utility import get_image_file_list, check_and_read_gif -from ppocr.utils.logging import get_logger -from utility import draw_ocr_box_txt - -logger = get_logger() - - -class TextSystem(object): - def __init__(self, args): - self.text_detector = predict_det.TextDetector(args) - self.text_recognizer = predict_rec.TextRecognizer(args) - self.use_angle_cls = args.use_angle_cls - self.drop_score = args.drop_score - if self.use_angle_cls: - self.text_classifier = predict_cls.TextClassifier(args) - - def get_rotate_crop_image(self, img, points): - ''' - img_height, img_width = img.shape[0:2] - left = int(np.min(points[:, 0])) - right = int(np.max(points[:, 0])) - top = int(np.min(points[:, 1])) - bottom = int(np.max(points[:, 1])) - img_crop = img[top:bottom, left:right, :].copy() - points[:, 0] = points[:, 0] - left - points[:, 1] = points[:, 1] - top - ''' - img_crop_width = int( - max( - np.linalg.norm(points[0] - points[1]), - np.linalg.norm(points[2] - points[3]))) - img_crop_height = int( - max( - np.linalg.norm(points[0] - points[3]), - np.linalg.norm(points[1] - points[2]))) - pts_std = np.float32([[0, 0], [img_crop_width, 0], - [img_crop_width, img_crop_height], - [0, img_crop_height]]) - M = cv2.getPerspectiveTransform(points, pts_std) - dst_img = cv2.warpPerspective( - img, - M, (img_crop_width, img_crop_height), - borderMode=cv2.BORDER_REPLICATE, - flags=cv2.INTER_CUBIC) - dst_img_height, dst_img_width = dst_img.shape[0:2] - if dst_img_height * 1.0 / dst_img_width >= 1.5: - dst_img = np.rot90(dst_img) - return dst_img - - def print_draw_crop_rec_res(self, img_crop_list, rec_res): - bbox_num = len(img_crop_list) - for bno in range(bbox_num): - cv2.imwrite("./output/img_crop_%d.jpg" % bno, img_crop_list[bno]) - logger.info(bno, rec_res[bno]) - - def __call__(self, img): - ori_im = img.copy() - dt_boxes, elapse = self.text_detector(img) - logger.info("dt_boxes num : {}, elapse : {}".format( - len(dt_boxes), elapse)) - if dt_boxes is None: - return None, None - img_crop_list = [] - - dt_boxes = sorted_boxes(dt_boxes) - - for bno in range(len(dt_boxes)): - tmp_box = copy.deepcopy(dt_boxes[bno]) - img_crop = self.get_rotate_crop_image(ori_im, tmp_box) - img_crop_list.append(img_crop) - if self.use_angle_cls: - img_crop_list, angle_list, elapse = self.text_classifier( - img_crop_list) - logger.info("cls num : {}, elapse : {}".format( - len(img_crop_list), elapse)) - - rec_res, elapse = self.text_recognizer(img_crop_list) - logger.info("rec_res num : {}, elapse : {}".format( - len(rec_res), elapse)) - # self.print_draw_crop_rec_res(img_crop_list, rec_res) - filter_boxes, filter_rec_res = [], [] - for box, rec_reuslt in zip(dt_boxes, rec_res): - text, score = rec_reuslt - if score >= self.drop_score: - filter_boxes.append(box) - filter_rec_res.append(rec_reuslt) - return filter_boxes, filter_rec_res - - -def sorted_boxes(dt_boxes): - """ - Sort text boxes in order from top to bottom, left to right - args: - dt_boxes(array):detected text boxes with shape [4, 2] - return: - sorted boxes(array) with shape [4, 2] - """ - num_boxes = dt_boxes.shape[0] - sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0])) - _boxes = list(sorted_boxes) - - for i in range(num_boxes - 1): - if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \ - (_boxes[i + 1][0][0] < _boxes[i][0][0]): - tmp = _boxes[i] - _boxes[i] = _boxes[i + 1] - _boxes[i + 1] = tmp - return _boxes - - -def main(args): - image_file_list = get_image_file_list(args.image_dir) - text_sys = TextSystem(args) - is_visualize = True - font_path = args.vis_font_path - drop_score = args.drop_score - for image_file in image_file_list: - img, flag = check_and_read_gif(image_file) - if not flag: - img = cv2.imread(image_file) - #img = utility.resize_img(img, 640) - #img = utility.padding_img(img, [640,640]) - print(img.shape) - if img is None: - logger.info("error in loading image:{}".format(image_file)) - continue - starttime = time.time() - dt_boxes, rec_res = text_sys(img) - elapse = time.time() - starttime - logger.info("Predict time of %s: %.3fs" % (image_file, elapse)) - - for text, score in rec_res: - logger.info("{}, {:.3f}".format(text, score)) - - if is_visualize: - image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) - boxes = dt_boxes - txts = [rec_res[i][0] for i in range(len(rec_res))] - scores = [rec_res[i][1] for i in range(len(rec_res))] - - draw_img = draw_ocr_box_txt( - image, - boxes, - txts, - scores, - drop_score=drop_score, - font_path=font_path) - draw_img_save = "./results/" - if not os.path.exists(draw_img_save): - os.makedirs(draw_img_save) - cv2.imwrite( - os.path.join(draw_img_save, os.path.basename(image_file)), - draw_img[:, :, ::-1]) - logger.info("The visualized image saved in {}".format( - os.path.join(draw_img_save, os.path.basename(image_file)))) - - -if __name__ == "__main__": - main(utility.parse_args()) \ No newline at end of file diff --git a/deploy/paddle2onnx/readme.md b/deploy/paddle2onnx/readme.md new file mode 100644 index 0000000000000000000000000000000000000000..0350c27ed983dc1cf379c8efaedb521e1ae2e8c7 --- /dev/null +++ b/deploy/paddle2onnx/readme.md @@ -0,0 +1,68 @@ +# paddle2onnx 模型转化与预测 + +本章节介绍 PaddleOCR 模型如何转化为 ONNX 模型,并基于 ONNX 引擎预测。 + +## 1. 环境准备 + +需要准备 Paddle2ONNX 模型转化环境,和 ONNX 模型预测环境 + +### Paddle2ONNX +paddle2onnx 支持将 PaddlePaddle 模型格式转化到 ONNX 模型格式,算子目前稳定支持导出 ONNX Opset 9~11,部分Paddle算子支持更低的ONNX Opset转换。 +更多细节可参考 [Paddle2ONNX](https://github.com/PaddlePaddle/Paddle2ONNX/blob/develop/README_zh.md) + +- 安装 Paddle2ONNX +``` +python3.7 -m pip install paddle2onnx +``` + +- 安装 ONNX +``` +# 建议安装 1.4.0 版本,可根据环境更换版本号 +python3.7 -m pip install onnxruntime==1.4.0 +``` +- Paddle 模型下载 + +有两种方式获取Paddle静态图模型:在 [model_list](../../doc/doc_ch/models_list.md) 中下载PaddleOCR提供的预测模型; +参考[模型导出说明](../../doc/doc_ch/inference.md#训练模型转inference模型)把训练好的权重转为 inference_model。 + +以 ppocr 检测模型为例: + +``` +wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar +cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && cd .. +``` + +## 2. 模型转换 + +使用 Paddle2ONNX 将Paddle静态图模型转换为ONNX模型格式: + +``` +paddle2onnx --model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ \ +--model_filename=inference.pdmodel \ +--params_filename=inference.pdiparams \ +--save_file=./inference/det_mobile_onnx/model.onnx \ +--opset_version=10 \ +--enable_onnx_checker=True +``` + +执行完毕后,ONNX模型会被保存在 `./inference/det_mobile_onnx/` 路径下 + +## 3. onnx 预测 + +以检测模型为例,使用onnx预测可执行如下命令: + +``` +python3.7 ../../tools/infer/predict_det.py --use_gpu=False --use_onnx=True \ +--det_model_dir=./inference/det_mobile_onnx/model.onnx \ +--image_dir=../../doc/imgs/1.jpg +``` + +执行命令后在终端会打印出预测的检测框坐标,并在 `./inference_results/` 下保存可视化结果。 + +``` +root INFO: 1.jpg [[[291, 295], [334, 292], [348, 844], [305, 847]], [[344, 296], [379, 294], [387, 669], [353, 671]]] +The predict time of ../../doc/imgs/1.jpg: 0.06162881851196289 +The visualized image saved in ./inference_results/det_res_1.jpg +``` + +* 注意:ONNX暂时不支持变长预测,因为需要将输入resize到固定输入,预测结果可能与直接使用Paddle预测有细微不同。 diff --git a/deploy/paddle2onnx/utility.py b/deploy/paddle2onnx/utility.py deleted file mode 100644 index c770d8db82c6baabecabafe91b0167100b848eea..0000000000000000000000000000000000000000 --- a/deploy/paddle2onnx/utility.py +++ /dev/null @@ -1,375 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os -import sys -import cv2 -import numpy as np -import json -from PIL import Image, ImageDraw, ImageFont -import math -import onnxruntime as ort - - -def parse_args(): - def str2bool(v): - return v.lower() in ("true", "t", "1") - - parser = argparse.ArgumentParser() - # params for prediction engine - parser.add_argument("--use_gpu", type=str2bool, default=True) - parser.add_argument("--ir_optim", type=str2bool, default=True) - parser.add_argument("--use_tensorrt", type=str2bool, default=False) - parser.add_argument("--use_fp16", type=str2bool, default=False) - parser.add_argument("--gpu_mem", type=int, default=500) - - # params for text detector - parser.add_argument("--image_dir", type=str) - parser.add_argument("--det_algorithm", type=str, default='DB') - parser.add_argument("--det_model_dir", type=str) - parser.add_argument("--det_limit_side_len", type=float, default=960) - parser.add_argument("--det_limit_type", type=str, default='max') - - # DB parmas - parser.add_argument("--det_db_thresh", type=float, default=0.3) - parser.add_argument("--det_db_box_thresh", type=float, default=0.5) - parser.add_argument("--det_db_unclip_ratio", type=float, default=1.6) - parser.add_argument("--max_batch_size", type=int, default=10) - # EAST parmas - parser.add_argument("--det_east_score_thresh", type=float, default=0.8) - parser.add_argument("--det_east_cover_thresh", type=float, default=0.1) - parser.add_argument("--det_east_nms_thresh", type=float, default=0.2) - - # SAST parmas - parser.add_argument("--det_sast_score_thresh", type=float, default=0.5) - parser.add_argument("--det_sast_nms_thresh", type=float, default=0.2) - parser.add_argument("--det_sast_polygon", type=bool, default=False) - - # params for text recognizer - parser.add_argument("--rec_algorithm", type=str, default='CRNN') - parser.add_argument("--rec_model_dir", type=str) - parser.add_argument("--rec_image_shape", type=str, default="3, 32, 100") - parser.add_argument("--rec_char_type", type=str, default='ch') - parser.add_argument("--rec_batch_num", type=int, default=6) - parser.add_argument("--max_text_length", type=int, default=25) - parser.add_argument( - "--rec_char_dict_path", - type=str, - default="./ppocr/utils/ppocr_keys_v1.txt") - parser.add_argument("--use_space_char", type=str2bool, default=True) - parser.add_argument( - "--vis_font_path", type=str, default="./doc/simfang.ttf") - parser.add_argument("--drop_score", type=float, default=0.5) - - # params for text classifier - parser.add_argument("--use_angle_cls", type=str2bool, default=False) - parser.add_argument("--cls_model_dir", type=str) - parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192") - parser.add_argument("--label_list", type=list, default=['0', '180']) - parser.add_argument("--cls_batch_num", type=int, default=6) - parser.add_argument("--cls_thresh", type=float, default=0.9) - - parser.add_argument("--enable_mkldnn", type=str2bool, default=False) - parser.add_argument("--use_pdserving", type=str2bool, default=False) - - return parser.parse_args() - - -def create_predictor(args, mode, logger): - if mode == "det": - model_dir = args.det_model_dir - elif mode == 'cls': - model_dir = args.cls_model_dir - else: - model_dir = args.rec_model_dir - - if model_dir is None: - logger.info("not find {} model file path {}".format(mode, model_dir)) - sys.exit(0) - model_file_path = model_dir - - if not os.path.exists(model_file_path): - logger.info("not find model file path {}".format(model_file_path)) - sys.exit(0) - - sess = ort.InferenceSession(model_file_path) - - return sess, sess.get_inputs()[0], None - - -def draw_text_det_res(dt_boxes, img_path): - src_im = cv2.imread(img_path) - for box in dt_boxes: - box = np.array(box).astype(np.int32).reshape(-1, 2) - cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2) - return src_im - - -def resize_img(img, input_size=600): - """ - resize img and limit the longest side of the image to input_size - """ - img = np.array(img) - im_shape = img.shape - im_size_max = np.max(im_shape[0:2]) - im_scale = float(input_size) / float(im_size_max) - img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale) - return img - - -def padding_img(img, input_size=[640, 640]): - padding_im_h = input_size[0] - padding_im_w = input_size[1] - im_c = img.shape[2] - im_h = img.shape[0] - im_w = img.shape[1] - padding_im = np.zeros( - (padding_im_h, padding_im_w, im_c), dtype=np.float32) + 255 - padding_im[:im_h, :im_w, :] = img - return padding_im - - -def draw_ocr(image, - boxes, - txts=None, - scores=None, - drop_score=0.5, - font_path="./doc/simfang.ttf"): - """ - Visualize the results of OCR detection and recognition - args: - image(Image|array): RGB image - boxes(list): boxes with shape(N, 4, 2) - txts(list): the texts - scores(list): txxs corresponding scores - drop_score(float): only scores greater than drop_threshold will be visualized - font_path: the path of font which is used to draw text - return(array): - the visualized img - """ - if scores is None: - scores = [1] * len(boxes) - box_num = len(boxes) - for i in range(box_num): - if scores is not None and (scores[i] < drop_score or - math.isnan(scores[i])): - continue - box = np.reshape(np.array(boxes[i]), [-1, 1, 2]).astype(np.int64) - image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2) - if txts is not None: - img = np.array(resize_img(image, input_size=600)) - txt_img = text_visual( - txts, - scores, - img_h=img.shape[0], - img_w=600, - threshold=drop_score, - font_path=font_path) - img = np.concatenate([np.array(img), np.array(txt_img)], axis=1) - return img - return image - - -def draw_ocr_box_txt(image, - boxes, - txts, - scores=None, - drop_score=0.5, - font_path="./doc/fonts/simfang.ttf"): - h, w = image.height, image.width - img_left = image.copy() - img_right = Image.new('RGB', (w, h), (255, 255, 255)) - - import random - - random.seed(0) - draw_left = ImageDraw.Draw(img_left) - draw_right = ImageDraw.Draw(img_right) - for idx, (box, txt) in enumerate(zip(boxes, txts)): - if scores is not None and scores[idx] < drop_score: - continue - color = (random.randint(0, 255), random.randint(0, 255), - random.randint(0, 255)) - draw_left.polygon(box, fill=color) - draw_right.polygon( - [ - box[0][0], box[0][1], box[1][0], box[1][1], box[2][0], - box[2][1], box[3][0], box[3][1] - ], - outline=color) - box_height = math.sqrt((box[0][0] - box[3][0])**2 + (box[0][1] - box[3][ - 1])**2) - box_width = math.sqrt((box[0][0] - box[1][0])**2 + (box[0][1] - box[1][ - 1])**2) - if box_height > 2 * box_width: - font_size = max(int(box_width * 0.9), 10) - font = ImageFont.truetype(font_path, font_size, encoding="utf-8") - cur_y = box[0][1] - for c in txt: - char_size = font.getsize(c) - draw_right.text( - (box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font) - cur_y += char_size[1] - else: - font_size = max(int(box_height * 0.8), 10) - font = ImageFont.truetype(font_path, font_size, encoding="utf-8") - draw_right.text( - [box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font) - img_left = Image.blend(image, img_left, 0.5) - img_show = Image.new('RGB', (w * 2, h), (255, 255, 255)) - img_show.paste(img_left, (0, 0, w, h)) - img_show.paste(img_right, (w, 0, w * 2, h)) - return np.array(img_show) - - -def str_count(s): - """ - Count the number of Chinese characters, - a single English character and a single number - equal to half the length of Chinese characters. - args: - s(string): the input of string - return(int): - the number of Chinese characters - """ - import string - count_zh = count_pu = 0 - s_len = len(s) - en_dg_count = 0 - for c in s: - if c in string.ascii_letters or c.isdigit() or c.isspace(): - en_dg_count += 1 - elif c.isalpha(): - count_zh += 1 - else: - count_pu += 1 - return s_len - math.ceil(en_dg_count / 2) - - -def text_visual(texts, - scores, - img_h=400, - img_w=600, - threshold=0., - font_path="./doc/simfang.ttf"): - """ - create new blank img and draw txt on it - args: - texts(list): the text will be draw - scores(list|None): corresponding score of each txt - img_h(int): the height of blank img - img_w(int): the width of blank img - font_path: the path of font which is used to draw text - return(array): - """ - if scores is not None: - assert len(texts) == len( - scores), "The number of txts and corresponding scores must match" - - def create_blank_img(): - blank_img = np.ones(shape=[img_h, img_w], dtype=np.int8) * 255 - blank_img[:, img_w - 1:] = 0 - blank_img = Image.fromarray(blank_img).convert("RGB") - draw_txt = ImageDraw.Draw(blank_img) - return blank_img, draw_txt - - blank_img, draw_txt = create_blank_img() - - font_size = 20 - txt_color = (0, 0, 0) - font = ImageFont.truetype(font_path, font_size, encoding="utf-8") - - gap = font_size + 5 - txt_img_list = [] - count, index = 1, 0 - for idx, txt in enumerate(texts): - index += 1 - if scores[idx] < threshold or math.isnan(scores[idx]): - index -= 1 - continue - first_line = True - while str_count(txt) >= img_w // font_size - 4: - tmp = txt - txt = tmp[:img_w // font_size - 4] - if first_line: - new_txt = str(index) + ': ' + txt - first_line = False - else: - new_txt = ' ' + txt - draw_txt.text((0, gap * count), new_txt, txt_color, font=font) - txt = tmp[img_w // font_size - 4:] - if count >= img_h // gap - 1: - txt_img_list.append(np.array(blank_img)) - blank_img, draw_txt = create_blank_img() - count = 0 - count += 1 - if first_line: - new_txt = str(index) + ': ' + txt + ' ' + '%.3f' % (scores[idx]) - else: - new_txt = " " + txt + " " + '%.3f' % (scores[idx]) - draw_txt.text((0, gap * count), new_txt, txt_color, font=font) - # whether add new blank img or not - if count >= img_h // gap - 1 and idx + 1 < len(texts): - txt_img_list.append(np.array(blank_img)) - blank_img, draw_txt = create_blank_img() - count = 0 - count += 1 - txt_img_list.append(np.array(blank_img)) - if len(txt_img_list) == 1: - blank_img = np.array(txt_img_list[0]) - else: - blank_img = np.concatenate(txt_img_list, axis=1) - return np.array(blank_img) - - -def base64_to_cv2(b64str): - import base64 - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def draw_boxes(image, boxes, scores=None, drop_score=0.5): - if scores is None: - scores = [1] * len(boxes) - for (box, score) in zip(boxes, scores): - if score < drop_score: - continue - box = np.reshape(np.array(box), [-1, 1, 2]).astype(np.int64) - image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2) - return image - - -if __name__ == '__main__': - test_img = "./doc/test_v2" - predict_txt = "./doc/predict.txt" - f = open(predict_txt, 'r') - data = f.readlines() - img_path, anno = data[0].strip().split('\t') - img_name = os.path.basename(img_path) - img_path = os.path.join(test_img, img_name) - image = Image.open(img_path) - - data = json.loads(anno) - boxes, txts, scores = [], [], [] - for dic in data: - boxes.append(dic['points']) - txts.append(dic['transcription']) - scores.append(round(dic['scores'], 3)) - - new_img = draw_ocr(image, boxes, txts, scores) - - cv2.imwrite(img_name, new_img) diff --git a/test_tipc/configs/ppocr_det_mobile_params.txt b/test_tipc/configs/ppocr_det_mobile_params.txt index 6ea3539d8715771e7a92c83220484077a9460f99..62f8e5dd120181267367701f5881672c9b093611 100644 --- a/test_tipc/configs/ppocr_det_mobile_params.txt +++ b/test_tipc/configs/ppocr_det_mobile_params.txt @@ -116,7 +116,7 @@ infer_model:./models/ch_ppocr_mobile_v2.0_det_opt.nb|./models/ch_ppocr_mobile_v2 --save_file:./inference/det_mobile_onnx/model.onnx --opset_version:10 --enable_onnx_checker:True -inference:deploy/paddle2onnx/predict_det.py +inference:tools/infer/predict_det.py --use_gpu:False --det_model_dir: --image_dir:./inference/ch_det_data_50/all-sum-510/ \ No newline at end of file diff --git a/test_tipc/test_paddle2onnx.sh b/test_tipc/test_paddle2onnx.sh index d21b4888337cfd2caa87ada0f30ee4f95e0cda49..5dc6e65ec81e6b8674877fc686c8b3650ce93a59 100644 --- a/test_tipc/test_paddle2onnx.sh +++ b/test_tipc/test_paddle2onnx.sh @@ -11,7 +11,7 @@ python=$(func_parser_value "${lines[2]}") # parser params -dataline=$(awk 'NR==111, NR==122{print}' $FILENAME) +dataline=$(awk 'NR==111, NR==123{print}' $FILENAME) IFS=$'\n' lines=(${dataline}) @@ -63,7 +63,7 @@ function func_paddle2onnx(){ set_gpu=$(func_set_params "${use_gpu_key}" "${use_gpu_value}") set_model_dir=$(func_set_params "${det_model_key}" "${save_file_value}") set_img_dir=$(func_set_params "${image_dir_key}" "${image_dir_value}") - infer_model_cmd="${python} ${inference_py} ${set_gpu} ${set_img_dir} ${set_model_dir} > ${_save_log_path} 2>&1 " + infer_model_cmd="${python} ${inference_py} ${set_gpu} ${set_img_dir} ${set_model_dir} --use_onnx=True > ${_save_log_path} 2>&1 " eval $infer_model_cmd status_check $last_status "${infer_model_cmd}" "${status_log}" } diff --git a/tools/infer/predict_cls.py b/tools/infer/predict_cls.py index 1c68494861e60b4aaef541a4e247071944cf420c..a25cac2600e67667badc76c648c1fcda12981a0f 100755 --- a/tools/infer/predict_cls.py +++ b/tools/infer/predict_cls.py @@ -47,6 +47,7 @@ class TextClassifier(object): self.postprocess_op = build_post_process(postprocess_params) self.predictor, self.input_tensor, self.output_tensors, _ = \ utility.create_predictor(args, 'cls', logger) + self.use_onnx = args.use_onnx def resize_norm_img(self, img): imgC, imgH, imgW = self.cls_image_shape @@ -100,10 +101,16 @@ class TextClassifier(object): norm_img_batch = np.concatenate(norm_img_batch) norm_img_batch = norm_img_batch.copy() - self.input_tensor.copy_from_cpu(norm_img_batch) - self.predictor.run() - prob_out = self.output_tensors[0].copy_to_cpu() - self.predictor.try_shrink_memory() + if self.use_onnx: + input_dict = {} + input_dict[self.input_tensor.name] = norm_img_batch + outputs = self.predictor.run(self.output_tensors, input_dict) + prob_out = outputs[0] + else: + self.input_tensor.copy_from_cpu(norm_img_batch) + self.predictor.run() + prob_out = self.output_tensors[0].copy_to_cpu() + self.predictor.try_shrink_memory() cls_result = self.postprocess_op(prob_out) elapse += time.time() - starttime for rno in range(len(cls_result)): diff --git a/tools/infer/predict_det.py b/tools/infer/predict_det.py index b24ad2bbb504caf1f262b4e47625348ce32d6fce..5dfe8d648f06f6382e8e101a6002f7f1b7441323 100755 --- a/tools/infer/predict_det.py +++ b/tools/infer/predict_det.py @@ -38,6 +38,7 @@ class TextDetector(object): def __init__(self, args): self.args = args self.det_algorithm = args.det_algorithm + self.use_onnx = args.use_onnx pre_process_list = [{ 'DetResizeForTest': { 'limit_side_len': args.det_limit_side_len, @@ -100,7 +101,12 @@ class TextDetector(object): else: logger.info("unknown det_algorithm:{}".format(self.det_algorithm)) sys.exit(0) - + if self.use_onnx: + pre_process_list[0] = { + 'DetResizeForTest': { + 'image_shape': [640, 640] + } + } self.preprocess_op = create_operators(pre_process_list) self.postprocess_op = build_post_process(postprocess_params) self.predictor, self.input_tensor, self.output_tensors, self.config = utility.create_predictor( @@ -198,15 +204,19 @@ class TextDetector(object): if self.args.benchmark: self.autolog.times.stamp() - - self.input_tensor.copy_from_cpu(img) - self.predictor.run() - outputs = [] - for output_tensor in self.output_tensors: - output = output_tensor.copy_to_cpu() - outputs.append(output) - if self.args.benchmark: - self.autolog.times.stamp() + if self.use_onnx: + input_dict = {} + input_dict[self.input_tensor.name] = img + outputs = self.predictor.run(self.output_tensors, input_dict) + else: + self.input_tensor.copy_from_cpu(img) + self.predictor.run() + outputs = [] + for output_tensor in self.output_tensors: + output = output_tensor.copy_to_cpu() + outputs.append(output) + if self.args.benchmark: + self.autolog.times.stamp() preds = {} if self.det_algorithm == "EAST": diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py index 936994a215d10d543537b29cb41bfa42b42590c7..591885c384701784724980b06787c52171f8cc35 100755 --- a/tools/infer/predict_rec.py +++ b/tools/infer/predict_rec.py @@ -73,6 +73,7 @@ class TextRecognizer(object): self.predictor, self.input_tensor, self.output_tensors, self.config = \ utility.create_predictor(args, 'rec', logger) self.benchmark = args.benchmark + self.use_onnx = args.use_onnx if args.benchmark: import auto_log pid = os.getpid() @@ -108,6 +109,8 @@ class TextRecognizer(object): assert imgC == img.shape[2] max_wh_ratio = max(max_wh_ratio, imgW / imgH) imgW = int((32 * max_wh_ratio)) + if self.use_onnx: + imgW = 100 h, w = img.shape[:2] ratio = w / float(h) if math.ceil(imgH * ratio) > imgW: @@ -297,51 +300,72 @@ class TextRecognizer(object): gsrm_slf_attn_bias1_list, gsrm_slf_attn_bias2_list, ] - input_names = self.predictor.get_input_names() - for i in range(len(input_names)): - input_tensor = self.predictor.get_input_handle(input_names[ - i]) - input_tensor.copy_from_cpu(inputs[i]) - self.predictor.run() - outputs = [] - for output_tensor in self.output_tensors: - output = output_tensor.copy_to_cpu() - outputs.append(output) - if self.benchmark: - self.autolog.times.stamp() - preds = {"predict": outputs[2]} + if self.use_onnx: + input_dict = {} + input_dict[self.input_tensor.name] = norm_img_batch + outputs = self.predictor.run(self.output_tensors, + input_dict) + preds = {"predict": outputs[2]} + else: + input_names = self.predictor.get_input_names() + for i in range(len(input_names)): + input_tensor = self.predictor.get_input_handle( + input_names[i]) + input_tensor.copy_from_cpu(inputs[i]) + self.predictor.run() + outputs = [] + for output_tensor in self.output_tensors: + output = output_tensor.copy_to_cpu() + outputs.append(output) + if self.benchmark: + self.autolog.times.stamp() + preds = {"predict": outputs[2]} elif self.rec_algorithm == "SAR": valid_ratios = np.concatenate(valid_ratios) inputs = [ norm_img_batch, valid_ratios, ] - input_names = self.predictor.get_input_names() - for i in range(len(input_names)): - input_tensor = self.predictor.get_input_handle(input_names[ - i]) - input_tensor.copy_from_cpu(inputs[i]) - self.predictor.run() - outputs = [] - for output_tensor in self.output_tensors: - output = output_tensor.copy_to_cpu() - outputs.append(output) - if self.benchmark: - self.autolog.times.stamp() - preds = outputs[0] - else: - self.input_tensor.copy_from_cpu(norm_img_batch) - self.predictor.run() - outputs = [] - for output_tensor in self.output_tensors: - output = output_tensor.copy_to_cpu() - outputs.append(output) - if self.benchmark: - self.autolog.times.stamp() - if len(outputs) != 1: - preds = outputs + if self.use_onnx: + input_dict = {} + input_dict[self.input_tensor.name] = norm_img_batch + outputs = self.predictor.run(self.output_tensors, + input_dict) + preds = outputs[0] else: + input_names = self.predictor.get_input_names() + for i in range(len(input_names)): + input_tensor = self.predictor.get_input_handle( + input_names[i]) + input_tensor.copy_from_cpu(inputs[i]) + self.predictor.run() + outputs = [] + for output_tensor in self.output_tensors: + output = output_tensor.copy_to_cpu() + outputs.append(output) + if self.benchmark: + self.autolog.times.stamp() preds = outputs[0] + else: + if self.use_onnx: + input_dict = {} + input_dict[self.input_tensor.name] = norm_img_batch + outputs = self.predictor.run(self.output_tensors, + input_dict) + preds = outputs[0] + else: + self.input_tensor.copy_from_cpu(norm_img_batch) + self.predictor.run() + outputs = [] + for output_tensor in self.output_tensors: + output = output_tensor.copy_to_cpu() + outputs.append(output) + if self.benchmark: + self.autolog.times.stamp() + if len(outputs) != 1: + preds = outputs + else: + preds = outputs[0] rec_result = self.postprocess_op(preds) for rno in range(len(rec_result)): rec_res[indices[beg_img_no + rno]] = rec_result[rno] diff --git a/tools/infer/utility.py b/tools/infer/utility.py index 41a3c0f14b6378751a367a3709ad7943ee981a4e..a3cac647982b77f5ee54d8681b07e677987d9ccb 100755 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -121,6 +121,7 @@ def init_args(): parser.add_argument("--save_log_path", type=str, default="./log_output/") parser.add_argument("--show_log", type=str2bool, default=True) + parser.add_argument("--use_onnx", type=str2bool, default=False) return parser @@ -144,152 +145,163 @@ def create_predictor(args, mode, logger): if model_dir is None: logger.info("not find {} model file path {}".format(mode, model_dir)) sys.exit(0) - model_file_path = model_dir + "/inference.pdmodel" - params_file_path = model_dir + "/inference.pdiparams" - if not os.path.exists(model_file_path): - raise ValueError("not find model file path {}".format(model_file_path)) - if not os.path.exists(params_file_path): - raise ValueError("not find params file path {}".format( - params_file_path)) - - config = inference.Config(model_file_path, params_file_path) - - if hasattr(args, 'precision'): - if args.precision == "fp16" and args.use_tensorrt: - precision = inference.PrecisionType.Half - elif args.precision == "int8": - precision = inference.PrecisionType.Int8 - else: - precision = inference.PrecisionType.Float32 + if args.use_onnx: + import onnxruntime as ort + model_file_path = model_dir + if not os.path.exists(model_file_path): + raise ValueError("not find model file path {}".format( + model_file_path)) + sess = ort.InferenceSession(model_file_path) + return sess, sess.get_inputs()[0], None, None + else: - precision = inference.PrecisionType.Float32 - - if args.use_gpu: - gpu_id = get_infer_gpuid() - if gpu_id is None: - raise ValueError( - "Not found GPU in current device. Please check your device or set args.use_gpu as False" - ) - config.enable_use_gpu(args.gpu_mem, 0) - if args.use_tensorrt: - config.enable_tensorrt_engine( - precision_mode=precision, - max_batch_size=args.max_batch_size, - min_subgraph_size=args.min_subgraph_size) - # skip the minmum trt subgraph - if mode == "det": - min_input_shape = { - "x": [1, 3, 50, 50], - "conv2d_92.tmp_0": [1, 120, 20, 20], - "conv2d_91.tmp_0": [1, 24, 10, 10], - "conv2d_59.tmp_0": [1, 96, 20, 20], - "nearest_interp_v2_1.tmp_0": [1, 256, 10, 10], - "nearest_interp_v2_2.tmp_0": [1, 256, 20, 20], - "conv2d_124.tmp_0": [1, 256, 20, 20], - "nearest_interp_v2_3.tmp_0": [1, 64, 20, 20], - "nearest_interp_v2_4.tmp_0": [1, 64, 20, 20], - "nearest_interp_v2_5.tmp_0": [1, 64, 20, 20], - "elementwise_add_7": [1, 56, 2, 2], - "nearest_interp_v2_0.tmp_0": [1, 256, 2, 2] - } - max_input_shape = { - "x": [1, 3, 2000, 2000], - "conv2d_92.tmp_0": [1, 120, 400, 400], - "conv2d_91.tmp_0": [1, 24, 200, 200], - "conv2d_59.tmp_0": [1, 96, 400, 400], - "nearest_interp_v2_1.tmp_0": [1, 256, 200, 200], - "conv2d_124.tmp_0": [1, 256, 400, 400], - "nearest_interp_v2_2.tmp_0": [1, 256, 400, 400], - "nearest_interp_v2_3.tmp_0": [1, 64, 400, 400], - "nearest_interp_v2_4.tmp_0": [1, 64, 400, 400], - "nearest_interp_v2_5.tmp_0": [1, 64, 400, 400], - "elementwise_add_7": [1, 56, 400, 400], - "nearest_interp_v2_0.tmp_0": [1, 256, 400, 400] - } - opt_input_shape = { - "x": [1, 3, 640, 640], - "conv2d_92.tmp_0": [1, 120, 160, 160], - "conv2d_91.tmp_0": [1, 24, 80, 80], - "conv2d_59.tmp_0": [1, 96, 160, 160], - "nearest_interp_v2_1.tmp_0": [1, 256, 80, 80], - "nearest_interp_v2_2.tmp_0": [1, 256, 160, 160], - "conv2d_124.tmp_0": [1, 256, 160, 160], - "nearest_interp_v2_3.tmp_0": [1, 64, 160, 160], - "nearest_interp_v2_4.tmp_0": [1, 64, 160, 160], - "nearest_interp_v2_5.tmp_0": [1, 64, 160, 160], - "elementwise_add_7": [1, 56, 40, 40], - "nearest_interp_v2_0.tmp_0": [1, 256, 40, 40] - } - min_pact_shape = { - "nearest_interp_v2_26.tmp_0": [1, 256, 20, 20], - "nearest_interp_v2_27.tmp_0": [1, 64, 20, 20], - "nearest_interp_v2_28.tmp_0": [1, 64, 20, 20], - "nearest_interp_v2_29.tmp_0": [1, 64, 20, 20] - } - max_pact_shape = { - "nearest_interp_v2_26.tmp_0": [1, 256, 400, 400], - "nearest_interp_v2_27.tmp_0": [1, 64, 400, 400], - "nearest_interp_v2_28.tmp_0": [1, 64, 400, 400], - "nearest_interp_v2_29.tmp_0": [1, 64, 400, 400] - } - opt_pact_shape = { - "nearest_interp_v2_26.tmp_0": [1, 256, 160, 160], - "nearest_interp_v2_27.tmp_0": [1, 64, 160, 160], - "nearest_interp_v2_28.tmp_0": [1, 64, 160, 160], - "nearest_interp_v2_29.tmp_0": [1, 64, 160, 160] - } - min_input_shape.update(min_pact_shape) - max_input_shape.update(max_pact_shape) - opt_input_shape.update(opt_pact_shape) - elif mode == "rec": - min_input_shape = {"x": [1, 3, 32, 10]} - max_input_shape = {"x": [args.rec_batch_num, 3, 32, 2000]} - opt_input_shape = {"x": [args.rec_batch_num, 3, 32, 320]} - elif mode == "cls": - min_input_shape = {"x": [1, 3, 48, 10]} - max_input_shape = {"x": [args.rec_batch_num, 3, 48, 2000]} - opt_input_shape = {"x": [args.rec_batch_num, 3, 48, 320]} + model_file_path = model_dir + "/inference.pdmodel" + params_file_path = model_dir + "/inference.pdiparams" + if not os.path.exists(model_file_path): + raise ValueError("not find model file path {}".format( + model_file_path)) + if not os.path.exists(params_file_path): + raise ValueError("not find params file path {}".format( + params_file_path)) + + config = inference.Config(model_file_path, params_file_path) + + if hasattr(args, 'precision'): + if args.precision == "fp16" and args.use_tensorrt: + precision = inference.PrecisionType.Half + elif args.precision == "int8": + precision = inference.PrecisionType.Int8 + else: + precision = inference.PrecisionType.Float32 else: - min_input_shape = {"x": [1, 3, 10, 10]} - max_input_shape = {"x": [1, 3, 1000, 1000]} - opt_input_shape = {"x": [1, 3, 500, 500]} - config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape, - opt_input_shape) + precision = inference.PrecisionType.Float32 + + if args.use_gpu: + gpu_id = get_infer_gpuid() + if gpu_id is None: + raise ValueError( + "Not found GPU in current device. Please check your device or set args.use_gpu as False" + ) + config.enable_use_gpu(args.gpu_mem, 0) + if args.use_tensorrt: + config.enable_tensorrt_engine( + precision_mode=precision, + max_batch_size=args.max_batch_size, + min_subgraph_size=args.min_subgraph_size) + # skip the minmum trt subgraph + if mode == "det": + min_input_shape = { + "x": [1, 3, 50, 50], + "conv2d_92.tmp_0": [1, 120, 20, 20], + "conv2d_91.tmp_0": [1, 24, 10, 10], + "conv2d_59.tmp_0": [1, 96, 20, 20], + "nearest_interp_v2_1.tmp_0": [1, 256, 10, 10], + "nearest_interp_v2_2.tmp_0": [1, 256, 20, 20], + "conv2d_124.tmp_0": [1, 256, 20, 20], + "nearest_interp_v2_3.tmp_0": [1, 64, 20, 20], + "nearest_interp_v2_4.tmp_0": [1, 64, 20, 20], + "nearest_interp_v2_5.tmp_0": [1, 64, 20, 20], + "elementwise_add_7": [1, 56, 2, 2], + "nearest_interp_v2_0.tmp_0": [1, 256, 2, 2] + } + max_input_shape = { + "x": [1, 3, 2000, 2000], + "conv2d_92.tmp_0": [1, 120, 400, 400], + "conv2d_91.tmp_0": [1, 24, 200, 200], + "conv2d_59.tmp_0": [1, 96, 400, 400], + "nearest_interp_v2_1.tmp_0": [1, 256, 200, 200], + "conv2d_124.tmp_0": [1, 256, 400, 400], + "nearest_interp_v2_2.tmp_0": [1, 256, 400, 400], + "nearest_interp_v2_3.tmp_0": [1, 64, 400, 400], + "nearest_interp_v2_4.tmp_0": [1, 64, 400, 400], + "nearest_interp_v2_5.tmp_0": [1, 64, 400, 400], + "elementwise_add_7": [1, 56, 400, 400], + "nearest_interp_v2_0.tmp_0": [1, 256, 400, 400] + } + opt_input_shape = { + "x": [1, 3, 640, 640], + "conv2d_92.tmp_0": [1, 120, 160, 160], + "conv2d_91.tmp_0": [1, 24, 80, 80], + "conv2d_59.tmp_0": [1, 96, 160, 160], + "nearest_interp_v2_1.tmp_0": [1, 256, 80, 80], + "nearest_interp_v2_2.tmp_0": [1, 256, 160, 160], + "conv2d_124.tmp_0": [1, 256, 160, 160], + "nearest_interp_v2_3.tmp_0": [1, 64, 160, 160], + "nearest_interp_v2_4.tmp_0": [1, 64, 160, 160], + "nearest_interp_v2_5.tmp_0": [1, 64, 160, 160], + "elementwise_add_7": [1, 56, 40, 40], + "nearest_interp_v2_0.tmp_0": [1, 256, 40, 40] + } + min_pact_shape = { + "nearest_interp_v2_26.tmp_0": [1, 256, 20, 20], + "nearest_interp_v2_27.tmp_0": [1, 64, 20, 20], + "nearest_interp_v2_28.tmp_0": [1, 64, 20, 20], + "nearest_interp_v2_29.tmp_0": [1, 64, 20, 20] + } + max_pact_shape = { + "nearest_interp_v2_26.tmp_0": [1, 256, 400, 400], + "nearest_interp_v2_27.tmp_0": [1, 64, 400, 400], + "nearest_interp_v2_28.tmp_0": [1, 64, 400, 400], + "nearest_interp_v2_29.tmp_0": [1, 64, 400, 400] + } + opt_pact_shape = { + "nearest_interp_v2_26.tmp_0": [1, 256, 160, 160], + "nearest_interp_v2_27.tmp_0": [1, 64, 160, 160], + "nearest_interp_v2_28.tmp_0": [1, 64, 160, 160], + "nearest_interp_v2_29.tmp_0": [1, 64, 160, 160] + } + min_input_shape.update(min_pact_shape) + max_input_shape.update(max_pact_shape) + opt_input_shape.update(opt_pact_shape) + elif mode == "rec": + min_input_shape = {"x": [1, 3, 32, 10]} + max_input_shape = {"x": [args.rec_batch_num, 3, 32, 2000]} + opt_input_shape = {"x": [args.rec_batch_num, 3, 32, 320]} + elif mode == "cls": + min_input_shape = {"x": [1, 3, 48, 10]} + max_input_shape = {"x": [args.rec_batch_num, 3, 48, 2000]} + opt_input_shape = {"x": [args.rec_batch_num, 3, 48, 320]} + else: + min_input_shape = {"x": [1, 3, 10, 10]} + max_input_shape = {"x": [1, 3, 1000, 1000]} + opt_input_shape = {"x": [1, 3, 500, 500]} + config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape, + opt_input_shape) - else: - config.disable_gpu() - if hasattr(args, "cpu_threads"): - config.set_cpu_math_library_num_threads(args.cpu_threads) else: - # default cpu threads as 10 - config.set_cpu_math_library_num_threads(10) - if args.enable_mkldnn: - # cache 10 different shapes for mkldnn to avoid memory leak - config.set_mkldnn_cache_capacity(10) - config.enable_mkldnn() - if args.precision == "fp16": - config.enable_mkldnn_bfloat16() - # enable memory optim - config.enable_memory_optim() - config.disable_glog_info() - - config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") - if mode == 'table': - config.delete_pass("fc_fuse_pass") # not supported for table - config.switch_use_feed_fetch_ops(False) - config.switch_ir_optim(True) - - # create predictor - predictor = inference.create_predictor(config) - input_names = predictor.get_input_names() - for name in input_names: - input_tensor = predictor.get_input_handle(name) - output_names = predictor.get_output_names() - output_tensors = [] - for output_name in output_names: - output_tensor = predictor.get_output_handle(output_name) - output_tensors.append(output_tensor) - return predictor, input_tensor, output_tensors, config + config.disable_gpu() + if hasattr(args, "cpu_threads"): + config.set_cpu_math_library_num_threads(args.cpu_threads) + else: + # default cpu threads as 10 + config.set_cpu_math_library_num_threads(10) + if args.enable_mkldnn: + # cache 10 different shapes for mkldnn to avoid memory leak + config.set_mkldnn_cache_capacity(10) + config.enable_mkldnn() + if args.precision == "fp16": + config.enable_mkldnn_bfloat16() + # enable memory optim + config.enable_memory_optim() + config.disable_glog_info() + + config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") + if mode == 'table': + config.delete_pass("fc_fuse_pass") # not supported for table + config.switch_use_feed_fetch_ops(False) + config.switch_ir_optim(True) + + # create predictor + predictor = inference.create_predictor(config) + input_names = predictor.get_input_names() + for name in input_names: + input_tensor = predictor.get_input_handle(name) + output_names = predictor.get_output_names() + output_tensors = [] + for output_name in output_names: + output_tensor = predictor.get_output_handle(output_name) + output_tensors.append(output_tensor) + return predictor, input_tensor, output_tensors, config def get_infer_gpuid():