From e753481471b2223f3b7c0feb4b5437088e2f7635 Mon Sep 17 00:00:00 2001 From: zhoujun Date: Tue, 26 Jul 2022 13:35:26 +0800 Subject: [PATCH] filter ocr result by table box (#6996) --- ppstructure/table/predict_table.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ppstructure/table/predict_table.py b/ppstructure/table/predict_table.py index aa054595..becc6dae 100644 --- a/ppstructure/table/predict_table.py +++ b/ppstructure/table/predict_table.py @@ -129,11 +129,25 @@ class TableSystem(object): def rebuild_table(self, structure_res, dt_boxes, rec_res): pred_structures, pred_bboxes = structure_res + dt_boxes, rec_res = self.filter_ocr_result(pred_bboxes,dt_boxes, rec_res) matched_index = self.match_result(dt_boxes, pred_bboxes) pred_html, pred = self.get_pred_html(pred_structures, matched_index, rec_res) return pred_html, pred + def filter_ocr_result(self, pred_bboxes,dt_boxes, rec_res): + y1 = pred_bboxes[:,1::2].min() + new_dt_boxes = [] + new_rec_res = [] + + for box,rec in zip(dt_boxes, rec_res): + if np.max(box[1::2]) < y1: + continue + new_dt_boxes.append(box) + new_rec_res.append(rec) + return new_dt_boxes, new_rec_res + + def match_result(self, dt_boxes, pred_bboxes): matched = {} for i, gt_box in enumerate(dt_boxes): -- GitLab