Auto stash before merge of "table" and "origin/new"

f1d0c8a2 · qq_25193841 · 1e6af3bb · f1d0c8a2 · f1d0c8a2
隐藏空白更改
内联并排

Showing with 59 addition and 4 deletion

PPOCRLabel/PPOCRLabel.py PPOCRLabel/PPOCRLabel.py +15 -4

PPOCRLabel/libs/utils.py PPOCRLabel/libs/utils.py +44 -0

未找到文件。
--- a/PPOCRLabel/PPOCRLabel.py
+++ b/PPOCRLabel/PPOCRLabel.py
@@ -21,6 +21,7 @@ import os.path
 import platform
 import subprocess
 import sys
+import xlrd
 from functools import partial

 from PyQt5.QtCore import QSize, Qt, QPoint, QByteArray, QTimer, QFileInfo, QPointF, QProcess
@@ -2349,9 +2350,19 @@ class MainWindow(QMainWindow):
                return

            # read xlsx file, convert to HTML
-            xd = pd.ExcelFile(csv_path)
-            df = xd.parse()
-            structure = df.to_html(index = False)
+            # xd = pd.ExcelFile(csv_path)
+            # df = xd.parse()
+            # structure = df.to_html(index = False)
+            excel = xlrd.open_workbook(csv_path)
+            sheet0 = excel.sheet_by_index(0)  # only sheet 0
+            merged_cells = sheet0.merged_cells # (0,1,1,3) start row, end row, start col, end col
+
+            html_list = [['td'] * sheet0.ncols for i in range(sheet0.nrows)]
+
+            for merged in merged_cells:
+                html_list = expand_list(merged, html_list)
+
+            token_list = convert_token(html_list)

            # load box annotations
            cells = []
@@ -2370,7 +2381,7 @@ class MainWindow(QMainWindow):
                split = 'test'

            #  save dict
-            html = {'structure': {'tokens': structure}, 'cell': cells}
+            html = {'structure': {'tokens': token_list}, 'cell': cells}
            json_results.append({'filename': filename, 'split': split, 'imgid': imgid, 'html': html})
            imgid += 1


--- a/PPOCRLabel/libs/utils.py
+++ b/PPOCRLabel/libs/utils.py
@@ -188,6 +188,50 @@ def OBB2HBB(obb) -> np.array:
    return hbb


+def expand_list(merged, html_list):
+    '''
+    Fill blanks according to merged cells
+    '''
+    sr, er, sc, ec = merged
+    for i in range(sr, er):
+        for j in range(sc, ec):
+            html_list[i][j] = None
+    html_list[sr][sc] = ''
+    if ec - sc > 1:
+        html_list[sr][sc] += " colspan={}".format(ec - sc)
+    if er - sr > 1:
+        html_list[sr][sc] += " rowspan={}".format(er - sr)
+    return html_list
+
+
+def convert_token(html_list):
+    '''
+    Convert raw html to label format
+    '''
+    token_list = ["<tbody>"]
+    # final html list:
+    for row in html_list:
+        token_list.append("<tr>")
+        for col in row:
+            if col == None:
+                continue
+            elif col == 'td':
+                token_list.extend(["<td>", "</td>"])
+            else:
+                token_list.append("<td")
+                if 'colspan' in col:
+                    _, n = col.split('colspan=')
+                    token_list.append(" colspan=\"{}\"".format(n))
+                if 'rowspan' in col:
+                    _, n = col.split('rowspan=')
+                    token_list.append(" rowspan=\"{}\"".format(n))
+                token_list.append(">")
+        token_list.append("</tr>")
+    token_list.append("</tbody>")
+
+    return token_list
+
+
 def stepsInfo(lang='en'):
    if lang == 'ch':
        msg = "1. 安装与运行：使用上述命令安装与运行程序。\n" \