未验证 提交 2838ac70 编写于 作者: W whjdark 提交者: GitHub

Merge pull request #1 from Evezerest/table2

add excel2html 
......@@ -21,6 +21,7 @@ import os.path
import platform
import subprocess
import sys
import xlrd
from functools import partial
from PyQt5.QtCore import QSize, Qt, QPoint, QByteArray, QTimer, QFileInfo, QPointF, QProcess
......@@ -611,7 +612,7 @@ class MainWindow(QMainWindow):
zoomIn, zoomOut, zoomOrg, None,
fitWindow, fitWidth))
addActions(self.menus.autolabel, (AutoRec, reRec, alcm, None, help))
addActions(self.menus.autolabel, (AutoRec, reRec, cellreRec, alcm, None, help))
self.menus.file.aboutToShow.connect(self.updateFileMenu)
......@@ -2131,7 +2132,8 @@ class MainWindow(QMainWindow):
TableRec_excel_dir = self.lastOpenDir + '/tableRec_excel_output/'
os.makedirs(TableRec_excel_dir, exist_ok=True)
filename = os.path.basename(self.filePath)
filename, _ = os.path.splitext(os.path.basename(self.filePath))
excel_path = TableRec_excel_dir + '{}.xlsx'.format(filename)
if res is None:
......@@ -2203,19 +2205,26 @@ class MainWindow(QMainWindow):
return
# automatically open excel annotation file
try:
import win32com.client
except:
print("CANNOT OPEN .xlsx. It could be one of the following reasons: " \
"Only support Windows | No python win32com")
if platform.system() == 'Windows':
try:
import win32com.client
except:
print("CANNOT OPEN .xlsx. It could be one of the following reasons: " \
"Only support Windows | No python win32com")
try:
xl = win32com.client.Dispatch("Excel.Application")
xl.Visible = True
xl.Workbooks.Open(excel_path)
except:
print("CANNOT OPEN .xlsx. It could be the following reasons: " \
".xlsx is not existed")
try:
xl = win32com.client.Dispatch("Excel.Application")
xl.Visible = True
xl.Workbooks.Open(excel_path)
# excelEx = "You need to show the excel executable at this point"
# subprocess.Popen([excelEx, excel_path])
# os.startfile(excel_path)
except:
print("CANNOT OPEN .xlsx. It could be the following reasons: " \
".xlsx is not existed")
else:
os.system('open ' + os.path.normpath(excel_path))
print('time cost: ', time.time() - start)
......@@ -2313,8 +2322,6 @@ class MainWindow(QMainWindow):
# 'Please check the label.txt and tableRec_excel_output\n'
# QMessageBox.information(self, "Information", msg)
# return
train_split, val_split, test_split = partitionDialog.getDataPartition()
# check validate
if train_split + val_split + test_split > 100:
......@@ -2334,7 +2341,7 @@ class MainWindow(QMainWindow):
imgid = 0
for image_path in labeldict.keys():
# load csv annotations
filename = os.path.basename(image_path)
filename, _ = os.path.splitext(os.path.basename(image_path))
csv_path = os.path.join(TableRec_excel_dir, filename + '.xlsx')
if not os.path.exists(csv_path):
msg = 'ERROR, Can not find ' + csv_path
......@@ -2342,9 +2349,20 @@ class MainWindow(QMainWindow):
return
# read xlsx file, convert to HTML
xd = pd.ExcelFile(csv_path)
df = xd.parse()
structure = df.to_html()
# xd = pd.ExcelFile(csv_path)
# df = xd.parse()
# structure = df.to_html(index = False)
excel = xlrd.open_workbook(csv_path)
sheet0 = excel.sheet_by_index(0) # only sheet 0
merged_cells = sheet0.merged_cells # (0,1,1,3) start row, end row, start col, end col
html_list = [['td'] * sheet0.ncols for i in range(sheet0.nrows)]
for merged in merged_cells:
html_list = expand_list(merged, html_list)
token_list = convert_token(html_list)
# load box annotations
cells = []
......@@ -2363,7 +2381,7 @@ class MainWindow(QMainWindow):
split = 'test'
# save dict
html = {'structure': {'tokens': structure}, 'cell': cells}
html = {'structure': {'tokens': token_list}, 'cell': cells}
json_results.append({'filename': filename, 'split': split, 'imgid': imgid, 'html': html})
imgid += 1
......
......@@ -188,6 +188,50 @@ def OBB2HBB(obb) -> np.array:
return hbb
def expand_list(merged, html_list):
'''
Fill blanks according to merged cells
'''
sr, er, sc, ec = merged
for i in range(sr, er):
for j in range(sc, ec):
html_list[i][j] = None
html_list[sr][sc] = ''
if ec - sc > 1:
html_list[sr][sc] += " colspan={}".format(ec - sc)
if er - sr > 1:
html_list[sr][sc] += " rowspan={}".format(er - sr)
return html_list
def convert_token(html_list):
'''
Convert raw html to label format
'''
token_list = ["<tbody>"]
# final html list:
for row in html_list:
token_list.append("<tr>")
for col in row:
if col == None:
continue
elif col == 'td':
token_list.extend(["<td>", "</td>"])
else:
token_list.append("<td")
if 'colspan' in col:
_, n = col.split('colspan=')
token_list.append(" colspan=\"{}\"".format(n))
if 'rowspan' in col:
_, n = col.split('rowspan=')
token_list.append(" rowspan=\"{}\"".format(n))
token_list.append(">")
token_list.append("</tr>")
token_list.append("</tbody>")
return token_list
def stepsInfo(lang='en'):
if lang == 'ch':
msg = "1. 安装与运行:使用上述命令安装与运行程序。\n" \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册