未验证 提交 2838ac70 编写于 作者: W whjdark 提交者: GitHub

Merge pull request #1 from Evezerest/table2

add excel2html 
...@@ -21,6 +21,7 @@ import os.path ...@@ -21,6 +21,7 @@ import os.path
import platform import platform
import subprocess import subprocess
import sys import sys
import xlrd
from functools import partial from functools import partial
from PyQt5.QtCore import QSize, Qt, QPoint, QByteArray, QTimer, QFileInfo, QPointF, QProcess from PyQt5.QtCore import QSize, Qt, QPoint, QByteArray, QTimer, QFileInfo, QPointF, QProcess
...@@ -611,7 +612,7 @@ class MainWindow(QMainWindow): ...@@ -611,7 +612,7 @@ class MainWindow(QMainWindow):
zoomIn, zoomOut, zoomOrg, None, zoomIn, zoomOut, zoomOrg, None,
fitWindow, fitWidth)) fitWindow, fitWidth))
addActions(self.menus.autolabel, (AutoRec, reRec, alcm, None, help)) addActions(self.menus.autolabel, (AutoRec, reRec, cellreRec, alcm, None, help))
self.menus.file.aboutToShow.connect(self.updateFileMenu) self.menus.file.aboutToShow.connect(self.updateFileMenu)
...@@ -2131,7 +2132,8 @@ class MainWindow(QMainWindow): ...@@ -2131,7 +2132,8 @@ class MainWindow(QMainWindow):
TableRec_excel_dir = self.lastOpenDir + '/tableRec_excel_output/' TableRec_excel_dir = self.lastOpenDir + '/tableRec_excel_output/'
os.makedirs(TableRec_excel_dir, exist_ok=True) os.makedirs(TableRec_excel_dir, exist_ok=True)
filename = os.path.basename(self.filePath) filename, _ = os.path.splitext(os.path.basename(self.filePath))
excel_path = TableRec_excel_dir + '{}.xlsx'.format(filename) excel_path = TableRec_excel_dir + '{}.xlsx'.format(filename)
if res is None: if res is None:
...@@ -2203,19 +2205,26 @@ class MainWindow(QMainWindow): ...@@ -2203,19 +2205,26 @@ class MainWindow(QMainWindow):
return return
# automatically open excel annotation file # automatically open excel annotation file
try: if platform.system() == 'Windows':
import win32com.client try:
except: import win32com.client
print("CANNOT OPEN .xlsx. It could be one of the following reasons: " \ except:
"Only support Windows | No python win32com") print("CANNOT OPEN .xlsx. It could be one of the following reasons: " \
"Only support Windows | No python win32com")
try: try:
xl = win32com.client.Dispatch("Excel.Application") xl = win32com.client.Dispatch("Excel.Application")
xl.Visible = True xl.Visible = True
xl.Workbooks.Open(excel_path) xl.Workbooks.Open(excel_path)
except: # excelEx = "You need to show the excel executable at this point"
print("CANNOT OPEN .xlsx. It could be the following reasons: " \ # subprocess.Popen([excelEx, excel_path])
".xlsx is not existed")
# os.startfile(excel_path)
except:
print("CANNOT OPEN .xlsx. It could be the following reasons: " \
".xlsx is not existed")
else:
os.system('open ' + os.path.normpath(excel_path))
print('time cost: ', time.time() - start) print('time cost: ', time.time() - start)
...@@ -2313,8 +2322,6 @@ class MainWindow(QMainWindow): ...@@ -2313,8 +2322,6 @@ class MainWindow(QMainWindow):
# 'Please check the label.txt and tableRec_excel_output\n' # 'Please check the label.txt and tableRec_excel_output\n'
# QMessageBox.information(self, "Information", msg) # QMessageBox.information(self, "Information", msg)
# return # return
train_split, val_split, test_split = partitionDialog.getDataPartition() train_split, val_split, test_split = partitionDialog.getDataPartition()
# check validate # check validate
if train_split + val_split + test_split > 100: if train_split + val_split + test_split > 100:
...@@ -2334,7 +2341,7 @@ class MainWindow(QMainWindow): ...@@ -2334,7 +2341,7 @@ class MainWindow(QMainWindow):
imgid = 0 imgid = 0
for image_path in labeldict.keys(): for image_path in labeldict.keys():
# load csv annotations # load csv annotations
filename = os.path.basename(image_path) filename, _ = os.path.splitext(os.path.basename(image_path))
csv_path = os.path.join(TableRec_excel_dir, filename + '.xlsx') csv_path = os.path.join(TableRec_excel_dir, filename + '.xlsx')
if not os.path.exists(csv_path): if not os.path.exists(csv_path):
msg = 'ERROR, Can not find ' + csv_path msg = 'ERROR, Can not find ' + csv_path
...@@ -2342,9 +2349,20 @@ class MainWindow(QMainWindow): ...@@ -2342,9 +2349,20 @@ class MainWindow(QMainWindow):
return return
# read xlsx file, convert to HTML # read xlsx file, convert to HTML
xd = pd.ExcelFile(csv_path) # xd = pd.ExcelFile(csv_path)
df = xd.parse() # df = xd.parse()
structure = df.to_html() # structure = df.to_html(index = False)
excel = xlrd.open_workbook(csv_path)
sheet0 = excel.sheet_by_index(0) # only sheet 0
merged_cells = sheet0.merged_cells # (0,1,1,3) start row, end row, start col, end col
html_list = [['td'] * sheet0.ncols for i in range(sheet0.nrows)]
for merged in merged_cells:
html_list = expand_list(merged, html_list)
token_list = convert_token(html_list)
# load box annotations # load box annotations
cells = [] cells = []
...@@ -2363,7 +2381,7 @@ class MainWindow(QMainWindow): ...@@ -2363,7 +2381,7 @@ class MainWindow(QMainWindow):
split = 'test' split = 'test'
# save dict # save dict
html = {'structure': {'tokens': structure}, 'cell': cells} html = {'structure': {'tokens': token_list}, 'cell': cells}
json_results.append({'filename': filename, 'split': split, 'imgid': imgid, 'html': html}) json_results.append({'filename': filename, 'split': split, 'imgid': imgid, 'html': html})
imgid += 1 imgid += 1
......
...@@ -188,6 +188,50 @@ def OBB2HBB(obb) -> np.array: ...@@ -188,6 +188,50 @@ def OBB2HBB(obb) -> np.array:
return hbb return hbb
def expand_list(merged, html_list):
'''
Fill blanks according to merged cells
'''
sr, er, sc, ec = merged
for i in range(sr, er):
for j in range(sc, ec):
html_list[i][j] = None
html_list[sr][sc] = ''
if ec - sc > 1:
html_list[sr][sc] += " colspan={}".format(ec - sc)
if er - sr > 1:
html_list[sr][sc] += " rowspan={}".format(er - sr)
return html_list
def convert_token(html_list):
'''
Convert raw html to label format
'''
token_list = ["<tbody>"]
# final html list:
for row in html_list:
token_list.append("<tr>")
for col in row:
if col == None:
continue
elif col == 'td':
token_list.extend(["<td>", "</td>"])
else:
token_list.append("<td")
if 'colspan' in col:
_, n = col.split('colspan=')
token_list.append(" colspan=\"{}\"".format(n))
if 'rowspan' in col:
_, n = col.split('rowspan=')
token_list.append(" rowspan=\"{}\"".format(n))
token_list.append(">")
token_list.append("</tr>")
token_list.append("</tbody>")
return token_list
def stepsInfo(lang='en'): def stepsInfo(lang='en'):
if lang == 'ch': if lang == 'ch':
msg = "1. 安装与运行:使用上述命令安装与运行程序。\n" \ msg = "1. 安装与运行:使用上述命令安装与运行程序。\n" \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册