From 681467d4ea4efd3d690951ec594820a5e49ca4a1 Mon Sep 17 00:00:00 2001 From: andyj <87074272+andyjpaddle@users.noreply.github.com> Date: Thu, 10 Aug 2023 16:55:26 +0800 Subject: [PATCH] [bug fix] fix none res in recovery (#10603) * add finetune en doc & test=document_fix * fix dead link & test=document_fix * fix dead link & test=document_fix * update check img * fix det res dtype * update args default type & test=document_fix * fix numpy version * support numpy1.24.0 * fix doc & test=document_fix * update doc * update doc, test=document_fix * fix pdf2word in whl, test=document_fix * fix none res in recovery * update version * format code --- paddleocr.py | 2 +- ppstructure/recovery/recovery_to_doc.py | 2 ++ ppstructure/utility.py | 44 ++++++++++++++----------- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/paddleocr.py b/paddleocr.py index 21559567..549419a1 100644 --- a/paddleocr.py +++ b/paddleocr.py @@ -59,7 +59,7 @@ __all__ = [ ] SUPPORT_DET_MODEL = ['DB'] -VERSION = '2.7.0.1' +VERSION = '2.7.0.2' SUPPORT_REC_MODEL = ['CRNN', 'SVTR_LCNet'] BASE_DIR = os.path.expanduser("~/.paddleocr/") diff --git a/ppstructure/recovery/recovery_to_doc.py b/ppstructure/recovery/recovery_to_doc.py index 05018120..cd1728b6 100644 --- a/ppstructure/recovery/recovery_to_doc.py +++ b/ppstructure/recovery/recovery_to_doc.py @@ -36,6 +36,8 @@ def convert_info_docx(img, res, save_folder, img_name): flag = 1 for i, region in enumerate(res): + if len(region['res']) == 0: + continue img_idx = region['img_idx'] if flag == 2 and region['layout'] == 'single': section = doc.add_section(WD_SECTION.CONTINUOUS) diff --git a/ppstructure/utility.py b/ppstructure/utility.py index 7892376a..a1e29344 100644 --- a/ppstructure/utility.py +++ b/ppstructure/utility.py @@ -16,13 +16,9 @@ import ast import PIL from PIL import Image, ImageDraw, ImageFont import numpy as np -<<<<<<< HEAD -from tools.infer.utility import draw_ocr_box_txt, str2bool, init_args as infer_args - -======= from tools.infer.utility import draw_ocr_box_txt, str2bool, str2int_tuple, init_args as infer_args import math ->>>>>>> 1e11f254 (CV套件建设专项活动 - 文字识别返回单字识别坐标 (#10515)) + def init_args(): parser = infer_args() @@ -138,7 +134,7 @@ def draw_structure_result(image, result, font_path): [(box_layout[0], box_layout[1]), (box_layout[2], box_layout[3])], outline=box_color, width=3) - + if int(PIL.__version__.split('.')[0]) < 10: text_w, text_h = font.getsize(region['type']) else: @@ -167,9 +163,11 @@ def draw_structure_result(image, result, font_path): for word_region in text_result['text_word_region']: char_box = word_region box_height = int( - math.sqrt((char_box[0][0] - char_box[3][0])**2 + (char_box[0][1] - char_box[3][1])**2)) + math.sqrt((char_box[0][0] - char_box[3][0])**2 + ( + char_box[0][1] - char_box[3][1])**2)) box_width = int( - math.sqrt((char_box[0][0] - char_box[1][0])**2 + (char_box[0][1] - char_box[1][1])**2)) + math.sqrt((char_box[0][0] - char_box[1][0])**2 + ( + char_box[0][1] - char_box[1][1])**2)) if box_height == 0 or box_width == 0: continue boxes.append(word_region) @@ -180,9 +178,10 @@ def draw_structure_result(image, result, font_path): img_layout, boxes, txts, scores, font_path=font_path, drop_score=0) return im_show + def cal_ocr_word_box(rec_str, box, rec_word_info): ''' Calculate the detection frame for each word based on the results of recognition and detection of ocr''' - + col_num, word_list, word_col_list, state_list = rec_word_info box = box.tolist() bbox_x_start = box[0][0] @@ -190,7 +189,7 @@ def cal_ocr_word_box(rec_str, box, rec_word_info): bbox_y_start = box[0][1] bbox_y_end = box[2][1] - cell_width = (bbox_x_end - bbox_x_start)/col_num + cell_width = (bbox_x_end - bbox_x_start) / col_num word_box_list = [] word_box_content_list = [] @@ -200,26 +199,31 @@ def cal_ocr_word_box(rec_str, box, rec_word_info): if state == 'cn': if len(word_col) != 1: char_seq_length = (word_col[-1] - word_col[0] + 1) * cell_width - char_width = char_seq_length/(len(word_col)-1) + char_width = char_seq_length / (len(word_col) - 1) cn_width_list.append(char_width) cn_col_list += word_col word_box_content_list += word else: cell_x_start = bbox_x_start + int(word_col[0] * cell_width) - cell_x_end = bbox_x_start + int((word_col[-1]+1) * cell_width) - cell = ((cell_x_start, bbox_y_start), (cell_x_end, bbox_y_start), (cell_x_end, bbox_y_end), (cell_x_start, bbox_y_end)) + cell_x_end = bbox_x_start + int((word_col[-1] + 1) * cell_width) + cell = ((cell_x_start, bbox_y_start), (cell_x_end, bbox_y_start), + (cell_x_end, bbox_y_end), (cell_x_start, bbox_y_end)) word_box_list.append(cell) word_box_content_list.append("".join(word)) if len(cn_col_list) != 0: if len(cn_width_list) != 0: avg_char_width = np.mean(cn_width_list) else: - avg_char_width = (bbox_x_end - bbox_x_start)/len(rec_str) + avg_char_width = (bbox_x_end - bbox_x_start) / len(rec_str) for center_idx in cn_col_list: - center_x = (center_idx+0.5)*cell_width - cell_x_start = max(int(center_x - avg_char_width/2), 0) + bbox_x_start - cell_x_end = min(int(center_x + avg_char_width/2), bbox_x_end-bbox_x_start) + bbox_x_start - cell = ((cell_x_start, bbox_y_start), (cell_x_end, bbox_y_start), (cell_x_end, bbox_y_end), (cell_x_start, bbox_y_end)) + center_x = (center_idx + 0.5) * cell_width + cell_x_start = max(int(center_x - avg_char_width / 2), + 0) + bbox_x_start + cell_x_end = min( + int(center_x + avg_char_width / 2), bbox_x_end - + bbox_x_start) + bbox_x_start + cell = ((cell_x_start, bbox_y_start), (cell_x_end, bbox_y_start), + (cell_x_end, bbox_y_end), (cell_x_start, bbox_y_end)) word_box_list.append(cell) - - return word_box_content_list, word_box_list \ No newline at end of file + + return word_box_content_list, word_box_list -- GitLab