Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleOCR
提交
2838ac70
P
PaddleOCR
项目概览
PaddlePaddle
/
PaddleOCR
大约 1 年 前同步成功
通知
1528
Star
32962
Fork
6643
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
108
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleOCR
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
108
Issue
108
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2838ac70
编写于
5月 06, 2022
作者:
W
whjdark
提交者:
GitHub
5月 06, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1 from Evezerest/table2
add excel2html
上级
6413fb1f
650bad88
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
83 addition
and
21 deletion
+83
-21
PPOCRLabel/PPOCRLabel.py
PPOCRLabel/PPOCRLabel.py
+39
-21
PPOCRLabel/libs/utils.py
PPOCRLabel/libs/utils.py
+44
-0
未找到文件。
PPOCRLabel/PPOCRLabel.py
浏览文件 @
2838ac70
...
...
@@ -21,6 +21,7 @@ import os.path
import
platform
import
subprocess
import
sys
import
xlrd
from
functools
import
partial
from
PyQt5.QtCore
import
QSize
,
Qt
,
QPoint
,
QByteArray
,
QTimer
,
QFileInfo
,
QPointF
,
QProcess
...
...
@@ -611,7 +612,7 @@ class MainWindow(QMainWindow):
zoomIn
,
zoomOut
,
zoomOrg
,
None
,
fitWindow
,
fitWidth
))
addActions
(
self
.
menus
.
autolabel
,
(
AutoRec
,
reRec
,
alcm
,
None
,
help
))
addActions
(
self
.
menus
.
autolabel
,
(
AutoRec
,
reRec
,
cellreRec
,
alcm
,
None
,
help
))
self
.
menus
.
file
.
aboutToShow
.
connect
(
self
.
updateFileMenu
)
...
...
@@ -2131,7 +2132,8 @@ class MainWindow(QMainWindow):
TableRec_excel_dir
=
self
.
lastOpenDir
+
'/tableRec_excel_output/'
os
.
makedirs
(
TableRec_excel_dir
,
exist_ok
=
True
)
filename
=
os
.
path
.
basename
(
self
.
filePath
)
filename
,
_
=
os
.
path
.
splitext
(
os
.
path
.
basename
(
self
.
filePath
))
excel_path
=
TableRec_excel_dir
+
'{}.xlsx'
.
format
(
filename
)
if
res
is
None
:
...
...
@@ -2203,6 +2205,7 @@ class MainWindow(QMainWindow):
return
# automatically open excel annotation file
if
platform
.
system
()
==
'Windows'
:
try
:
import
win32com.client
except
:
...
...
@@ -2213,9 +2216,15 @@ class MainWindow(QMainWindow):
xl
=
win32com
.
client
.
Dispatch
(
"Excel.Application"
)
xl
.
Visible
=
True
xl
.
Workbooks
.
Open
(
excel_path
)
# excelEx = "You need to show the excel executable at this point"
# subprocess.Popen([excelEx, excel_path])
# os.startfile(excel_path)
except
:
print
(
"CANNOT OPEN .xlsx. It could be the following reasons: "
\
".xlsx is not existed"
)
else
:
os
.
system
(
'open '
+
os
.
path
.
normpath
(
excel_path
))
print
(
'time cost: '
,
time
.
time
()
-
start
)
...
...
@@ -2313,8 +2322,6 @@ class MainWindow(QMainWindow):
# 'Please check the label.txt and tableRec_excel_output\n'
# QMessageBox.information(self, "Information", msg)
# return
train_split
,
val_split
,
test_split
=
partitionDialog
.
getDataPartition
()
# check validate
if
train_split
+
val_split
+
test_split
>
100
:
...
...
@@ -2334,7 +2341,7 @@ class MainWindow(QMainWindow):
imgid
=
0
for
image_path
in
labeldict
.
keys
():
# load csv annotations
filename
=
os
.
path
.
basename
(
image_path
)
filename
,
_
=
os
.
path
.
splitext
(
os
.
path
.
basename
(
image_path
)
)
csv_path
=
os
.
path
.
join
(
TableRec_excel_dir
,
filename
+
'.xlsx'
)
if
not
os
.
path
.
exists
(
csv_path
):
msg
=
'ERROR, Can not find '
+
csv_path
...
...
@@ -2342,9 +2349,20 @@ class MainWindow(QMainWindow):
return
# read xlsx file, convert to HTML
xd
=
pd
.
ExcelFile
(
csv_path
)
df
=
xd
.
parse
()
structure
=
df
.
to_html
()
# xd = pd.ExcelFile(csv_path)
# df = xd.parse()
# structure = df.to_html(index = False)
excel
=
xlrd
.
open_workbook
(
csv_path
)
sheet0
=
excel
.
sheet_by_index
(
0
)
# only sheet 0
merged_cells
=
sheet0
.
merged_cells
# (0,1,1,3) start row, end row, start col, end col
html_list
=
[[
'td'
]
*
sheet0
.
ncols
for
i
in
range
(
sheet0
.
nrows
)]
for
merged
in
merged_cells
:
html_list
=
expand_list
(
merged
,
html_list
)
token_list
=
convert_token
(
html_list
)
# load box annotations
cells
=
[]
...
...
@@ -2363,7 +2381,7 @@ class MainWindow(QMainWindow):
split
=
'test'
# save dict
html
=
{
'structure'
:
{
'tokens'
:
structure
},
'cell'
:
cells
}
html
=
{
'structure'
:
{
'tokens'
:
token_list
},
'cell'
:
cells
}
json_results
.
append
({
'filename'
:
filename
,
'split'
:
split
,
'imgid'
:
imgid
,
'html'
:
html
})
imgid
+=
1
...
...
PPOCRLabel/libs/utils.py
浏览文件 @
2838ac70
...
...
@@ -188,6 +188,50 @@ def OBB2HBB(obb) -> np.array:
return
hbb
def
expand_list
(
merged
,
html_list
):
'''
Fill blanks according to merged cells
'''
sr
,
er
,
sc
,
ec
=
merged
for
i
in
range
(
sr
,
er
):
for
j
in
range
(
sc
,
ec
):
html_list
[
i
][
j
]
=
None
html_list
[
sr
][
sc
]
=
''
if
ec
-
sc
>
1
:
html_list
[
sr
][
sc
]
+=
" colspan={}"
.
format
(
ec
-
sc
)
if
er
-
sr
>
1
:
html_list
[
sr
][
sc
]
+=
" rowspan={}"
.
format
(
er
-
sr
)
return
html_list
def
convert_token
(
html_list
):
'''
Convert raw html to label format
'''
token_list
=
[
"<tbody>"
]
# final html list:
for
row
in
html_list
:
token_list
.
append
(
"<tr>"
)
for
col
in
row
:
if
col
==
None
:
continue
elif
col
==
'td'
:
token_list
.
extend
([
"<td>"
,
"</td>"
])
else
:
token_list
.
append
(
"<td"
)
if
'colspan'
in
col
:
_
,
n
=
col
.
split
(
'colspan='
)
token_list
.
append
(
" colspan=
\"
{}
\"
"
.
format
(
n
))
if
'rowspan'
in
col
:
_
,
n
=
col
.
split
(
'rowspan='
)
token_list
.
append
(
" rowspan=
\"
{}
\"
"
.
format
(
n
))
token_list
.
append
(
">"
)
token_list
.
append
(
"</tr>"
)
token_list
.
append
(
"</tbody>"
)
return
token_list
def
stepsInfo
(
lang
=
'en'
):
if
lang
==
'ch'
:
msg
=
"1. 安装与运行:使用上述命令安装与运行程序。
\n
"
\
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录