diff --git a/ppstructure/table/predict_structure.py b/ppstructure/table/predict_structure.py index 01d4675943ee1cae1a793aa65869f3ad28937c63..c4a816fd87d5608460ad62041d994ec744f1b6f1 100755 --- a/ppstructure/table/predict_structure.py +++ b/ppstructure/table/predict_structure.py @@ -73,12 +73,14 @@ class TableStructurer(object): postprocess_params = { 'name': 'TableLabelDecode', "character_dict_path": args.table_char_dict_path, + 'merge_no_span_structure': args.merge_no_span_structure } else: postprocess_params = { 'name': 'TableMasterLabelDecode', "character_dict_path": args.table_char_dict_path, - 'box_shape': 'pad' + 'box_shape': 'pad', + 'merge_no_span_structure': args.merge_no_span_structure } self.preprocess_op = create_operators(pre_process_list) diff --git a/ppstructure/table/predict_table.py b/ppstructure/table/predict_table.py index b0c7ef589ffcfe4c3c3ec1fd43550813c9d27dcc..35ce8890cf3bafbeb02002773ebf5a4e22cb175a 100644 --- a/ppstructure/table/predict_table.py +++ b/ppstructure/table/predict_table.py @@ -101,6 +101,7 @@ class TableSystem(object): start = time.time() structure_res, elapse = self._structure(copy.deepcopy(img)) + result['cell_bbox'] = structure_res[1] time_dict['table'] = elapse dt_boxes, rec_res, det_elapse, rec_elapse = self._ocr( @@ -175,8 +176,23 @@ def main(args): image_file_list = image_file_list[args.process_id::args.total_process_num] os.makedirs(args.output, exist_ok=True) - text_sys = TableSystem(args) + table_sys = TableSystem(args) img_num = len(image_file_list) + + f_html = open( + os.path.join(args.output, 'show.html'), mode='w', encoding='utf-8') + f_html.write('\n\n') + f_html.write('\n') + f_html.write( + "" + ) + f_html.write("\n") + f_html.write('') + f_html.write('') + f_html.write('') + f_html.write("\n") + for i, image_file in enumerate(image_file_list): logger.info("[{}/{}] {}".format(i, img_num, image_file)) img, flag = check_and_read_gif(image_file) @@ -188,13 +204,31 @@ def main(args): logger.error("error in loading image:{}".format(image_file)) continue starttime = time.time() - pred_res, _ = text_sys(img) + pred_res, _ = table_sys(img) pred_html = pred_res['html'] logger.info(pred_html) to_excel(pred_html, excel_path) logger.info('excel saved to {}'.format(excel_path)) elapse = time.time() - starttime logger.info("Predict time : {:.3f}s".format(elapse)) + + # img = predict_strture.draw_rectangle(image_file, pred_res['cell_bbox'], use_xywh) + img = utility.draw_boxes(cv2.imread(image_file), pred_res['cell_bbox']) + img_save_path = os.path.join(args.output, os.path.basename(image_file)) + cv2.imwrite(img_save_path, img) + + f_html.write("\n") + f_html.write(f'\n') + f_html.write('
img name\n') + f_html.write('ori imagetable htmlcell box
{os.path.basename(image_file)}
\n') + f_html.write(f'
' + pred_html.replace( + '
', '').replace('
', '') + + '
\n') + f_html.write( + f'\n') + f_html.write("\n") + f_html.write("\n") + f_html.close() + if args.benchmark: text_sys.autolog.report() diff --git a/ppstructure/utility.py b/ppstructure/utility.py index 767c5704fac6b59fe61285ce237d3f3e402480ac..1c77cecd5cc85010a1e10b7a07d9817ee37a9d37 100644 --- a/ppstructure/utility.py +++ b/ppstructure/utility.py @@ -27,6 +27,8 @@ def init_args(): parser.add_argument("--table_max_len", type=int, default=488) parser.add_argument("--table_algorithm", type=str, default='TableAttn') parser.add_argument("--table_model_dir", type=str) + parser.add_argument( + "--merge_no_span_structure", type=str2bool, default=False) parser.add_argument( "--table_char_dict_path", type=str,