From 1e27820f59c0234d5db8c591f7701bf33be3d346 Mon Sep 17 00:00:00 2001 From: WenmuZhou <572459439@qq.com> Date: Wed, 10 Aug 2022 10:52:34 +0000 Subject: [PATCH] add merge flag --- ppstructure/table/predict_structure.py | 4 ++- ppstructure/table/predict_table.py | 38 ++++++++++++++++++++++++-- ppstructure/utility.py | 2 ++ 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/ppstructure/table/predict_structure.py b/ppstructure/table/predict_structure.py index 01d46759..c4a816fd 100755 --- a/ppstructure/table/predict_structure.py +++ b/ppstructure/table/predict_structure.py @@ -73,12 +73,14 @@ class TableStructurer(object): postprocess_params = { 'name': 'TableLabelDecode', "character_dict_path": args.table_char_dict_path, + 'merge_no_span_structure': args.merge_no_span_structure } else: postprocess_params = { 'name': 'TableMasterLabelDecode', "character_dict_path": args.table_char_dict_path, - 'box_shape': 'pad' + 'box_shape': 'pad', + 'merge_no_span_structure': args.merge_no_span_structure } self.preprocess_op = create_operators(pre_process_list) diff --git a/ppstructure/table/predict_table.py b/ppstructure/table/predict_table.py index b0c7ef58..35ce8890 100644 --- a/ppstructure/table/predict_table.py +++ b/ppstructure/table/predict_table.py @@ -101,6 +101,7 @@ class TableSystem(object): start = time.time() structure_res, elapse = self._structure(copy.deepcopy(img)) + result['cell_bbox'] = structure_res[1] time_dict['table'] = elapse dt_boxes, rec_res, det_elapse, rec_elapse = self._ocr( @@ -175,8 +176,23 @@ def main(args): image_file_list = image_file_list[args.process_id::args.total_process_num] os.makedirs(args.output, exist_ok=True) - text_sys = TableSystem(args) + table_sys = TableSystem(args) img_num = len(image_file_list) + + f_html = open( + os.path.join(args.output, 'show.html'), mode='w', encoding='utf-8') + f_html.write('\n\n') + f_html.write('\n') + f_html.write( + "" + ) + f_html.write("\n") + f_html.write('') + f_html.write('') + f_html.write('') + f_html.write("\n") + for i, image_file in enumerate(image_file_list): logger.info("[{}/{}] {}".format(i, img_num, image_file)) img, flag = check_and_read_gif(image_file) @@ -188,13 +204,31 @@ def main(args): logger.error("error in loading image:{}".format(image_file)) continue starttime = time.time() - pred_res, _ = text_sys(img) + pred_res, _ = table_sys(img) pred_html = pred_res['html'] logger.info(pred_html) to_excel(pred_html, excel_path) logger.info('excel saved to {}'.format(excel_path)) elapse = time.time() - starttime logger.info("Predict time : {:.3f}s".format(elapse)) + + # img = predict_strture.draw_rectangle(image_file, pred_res['cell_bbox'], use_xywh) + img = utility.draw_boxes(cv2.imread(image_file), pred_res['cell_bbox']) + img_save_path = os.path.join(args.output, os.path.basename(image_file)) + cv2.imwrite(img_save_path, img) + + f_html.write("\n") + f_html.write(f'\n') + f_html.write('
img name\n') + f_html.write('ori imagetable htmlcell box
{os.path.basename(image_file)}
\n') + f_html.write(f'
' + pred_html.replace( + '
', '').replace('
', '') + + '
\n') + f_html.write( + f'\n') + f_html.write("\n") + f_html.write("\n") + f_html.close() + if args.benchmark: text_sys.autolog.report() diff --git a/ppstructure/utility.py b/ppstructure/utility.py index 767c5704..1c77cecd 100644 --- a/ppstructure/utility.py +++ b/ppstructure/utility.py @@ -27,6 +27,8 @@ def init_args(): parser.add_argument("--table_max_len", type=int, default=488) parser.add_argument("--table_algorithm", type=str, default='TableAttn') parser.add_argument("--table_model_dir", type=str) + parser.add_argument( + "--merge_no_span_structure", type=str2bool, default=False) parser.add_argument( "--table_char_dict_path", type=str, -- GitLab