diff --git a/PPOCRLabel/PPOCRLabel.py b/PPOCRLabel/PPOCRLabel.py index 703c4e40cd8c222ebd50c3cf2f086a3c9a878858..022e5aae47da8cdb22b6f5167dd75e05986e5b62 100644 --- a/PPOCRLabel/PPOCRLabel.py +++ b/PPOCRLabel/PPOCRLabel.py @@ -21,6 +21,7 @@ import os.path import platform import subprocess import sys +import xlrd from functools import partial from PyQt5.QtCore import QSize, Qt, QPoint, QByteArray, QTimer, QFileInfo, QPointF, QProcess @@ -611,7 +612,7 @@ class MainWindow(QMainWindow): zoomIn, zoomOut, zoomOrg, None, fitWindow, fitWidth)) - addActions(self.menus.autolabel, (AutoRec, reRec, alcm, None, help)) + addActions(self.menus.autolabel, (AutoRec, reRec, cellreRec, alcm, None, help)) self.menus.file.aboutToShow.connect(self.updateFileMenu) @@ -2131,7 +2132,8 @@ class MainWindow(QMainWindow): TableRec_excel_dir = self.lastOpenDir + '/tableRec_excel_output/' os.makedirs(TableRec_excel_dir, exist_ok=True) - filename = os.path.basename(self.filePath) + filename, _ = os.path.splitext(os.path.basename(self.filePath)) + excel_path = TableRec_excel_dir + '{}.xlsx'.format(filename) if res is None: @@ -2203,19 +2205,26 @@ class MainWindow(QMainWindow): return # automatically open excel annotation file - try: - import win32com.client - except: - print("CANNOT OPEN .xlsx. It could be one of the following reasons: " \ - "Only support Windows | No python win32com") + if platform.system() == 'Windows': + try: + import win32com.client + except: + print("CANNOT OPEN .xlsx. It could be one of the following reasons: " \ + "Only support Windows | No python win32com") - try: - xl = win32com.client.Dispatch("Excel.Application") - xl.Visible = True - xl.Workbooks.Open(excel_path) - except: - print("CANNOT OPEN .xlsx. It could be the following reasons: " \ - ".xlsx is not existed") + try: + xl = win32com.client.Dispatch("Excel.Application") + xl.Visible = True + xl.Workbooks.Open(excel_path) + # excelEx = "You need to show the excel executable at this point" + # subprocess.Popen([excelEx, excel_path]) + + # os.startfile(excel_path) + except: + print("CANNOT OPEN .xlsx. It could be the following reasons: " \ + ".xlsx is not existed") + else: + os.system('open ' + os.path.normpath(excel_path)) print('time cost: ', time.time() - start) @@ -2313,8 +2322,6 @@ class MainWindow(QMainWindow): # 'Please check the label.txt and tableRec_excel_output\n' # QMessageBox.information(self, "Information", msg) # return - - train_split, val_split, test_split = partitionDialog.getDataPartition() # check validate if train_split + val_split + test_split > 100: @@ -2334,7 +2341,7 @@ class MainWindow(QMainWindow): imgid = 0 for image_path in labeldict.keys(): # load csv annotations - filename = os.path.basename(image_path) + filename, _ = os.path.splitext(os.path.basename(image_path)) csv_path = os.path.join(TableRec_excel_dir, filename + '.xlsx') if not os.path.exists(csv_path): msg = 'ERROR, Can not find ' + csv_path @@ -2342,9 +2349,20 @@ class MainWindow(QMainWindow): return # read xlsx file, convert to HTML - xd = pd.ExcelFile(csv_path) - df = xd.parse() - structure = df.to_html() + # xd = pd.ExcelFile(csv_path) + # df = xd.parse() + # structure = df.to_html(index = False) + excel = xlrd.open_workbook(csv_path) + sheet0 = excel.sheet_by_index(0) # only sheet 0 + merged_cells = sheet0.merged_cells # (0,1,1,3) start row, end row, start col, end col + + html_list = [['td'] * sheet0.ncols for i in range(sheet0.nrows)] + + for merged in merged_cells: + html_list = expand_list(merged, html_list) + + token_list = convert_token(html_list) + # load box annotations cells = [] @@ -2363,7 +2381,7 @@ class MainWindow(QMainWindow): split = 'test' # save dict - html = {'structure': {'tokens': structure}, 'cell': cells} + html = {'structure': {'tokens': token_list}, 'cell': cells} json_results.append({'filename': filename, 'split': split, 'imgid': imgid, 'html': html}) imgid += 1 diff --git a/PPOCRLabel/libs/utils.py b/PPOCRLabel/libs/utils.py index c49b5068822ecd253ec4d719c1a01bf990ceaf29..86a0336d450f97ada673662ccad63e746fd27125 100644 --- a/PPOCRLabel/libs/utils.py +++ b/PPOCRLabel/libs/utils.py @@ -188,6 +188,50 @@ def OBB2HBB(obb) -> np.array: return hbb +def expand_list(merged, html_list): + ''' + Fill blanks according to merged cells + ''' + sr, er, sc, ec = merged + for i in range(sr, er): + for j in range(sc, ec): + html_list[i][j] = None + html_list[sr][sc] = '' + if ec - sc > 1: + html_list[sr][sc] += " colspan={}".format(ec - sc) + if er - sr > 1: + html_list[sr][sc] += " rowspan={}".format(er - sr) + return html_list + + +def convert_token(html_list): + ''' + Convert raw html to label format + ''' + token_list = ["
"] + # final html list: + for row in html_list: + token_list.append("