Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleOCR
提交
2acde6c5
P
PaddleOCR
项目概览
s920243400
/
PaddleOCR
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleOCR
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleOCR
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2acde6c5
编写于
5月 08, 2022
作者:
E
Evezerest
提交者:
GitHub
5月 08, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #6126 from whjdark/new
add table recognition
上级
8a8c9975
1f3ca7a5
变更
5
展开全部
隐藏空白更改
内联
并排
Showing
5 changed file
with
525 addition
and
19 deletion
+525
-19
PPOCRLabel/PPOCRLabel.py
PPOCRLabel/PPOCRLabel.py
+332
-16
PPOCRLabel/libs/dataPartitionDialog.py
PPOCRLabel/libs/dataPartitionDialog.py
+113
-0
PPOCRLabel/libs/utils.py
PPOCRLabel/libs/utils.py
+71
-0
PPOCRLabel/resources/strings/strings-en.properties
PPOCRLabel/resources/strings/strings-en.properties
+4
-1
PPOCRLabel/resources/strings/strings-zh-CN.properties
PPOCRLabel/resources/strings/strings-zh-CN.properties
+5
-2
未找到文件。
PPOCRLabel/PPOCRLabel.py
浏览文件 @
2acde6c5
此差异已折叠。
点击以展开。
PPOCRLabel/libs/dataPartitionDialog.py
0 → 100644
浏览文件 @
2acde6c5
try
:
from
PyQt5.QtGui
import
*
from
PyQt5.QtCore
import
*
from
PyQt5.QtWidgets
import
*
except
ImportError
:
from
PyQt4.QtGui
import
*
from
PyQt4.QtCore
import
*
from
libs.utils
import
newIcon
import
time
import
datetime
import
json
import
cv2
import
numpy
as
np
BB
=
QDialogButtonBox
class
DataPartitionDialog
(
QDialog
):
def
__init__
(
self
,
parent
=
None
):
super
().
__init__
()
self
.
parnet
=
parent
self
.
title
=
'DATA PARTITION'
self
.
train_ratio
=
70
self
.
val_ratio
=
15
self
.
test_ratio
=
15
self
.
initUI
()
def
initUI
(
self
):
self
.
setWindowTitle
(
self
.
title
)
self
.
setWindowModality
(
Qt
.
ApplicationModal
)
self
.
flag_accept
=
True
if
self
.
parnet
.
lang
==
'ch'
:
msg
=
"导出JSON前请保存所有图像的标注且关闭EXCEL!"
else
:
msg
=
"Please save all the annotations and close the EXCEL before exporting JSON!"
info_msg
=
QLabel
(
msg
,
self
)
info_msg
.
setWordWrap
(
True
)
info_msg
.
setStyleSheet
(
"color: red"
)
info_msg
.
setFont
(
QFont
(
'Arial'
,
12
))
train_lbl
=
QLabel
(
'Train split: '
,
self
)
train_lbl
.
setFont
(
QFont
(
'Arial'
,
15
))
val_lbl
=
QLabel
(
'Valid split: '
,
self
)
val_lbl
.
setFont
(
QFont
(
'Arial'
,
15
))
test_lbl
=
QLabel
(
'Test split: '
,
self
)
test_lbl
.
setFont
(
QFont
(
'Arial'
,
15
))
self
.
train_input
=
QLineEdit
(
self
)
self
.
train_input
.
setFont
(
QFont
(
'Arial'
,
15
))
self
.
val_input
=
QLineEdit
(
self
)
self
.
val_input
.
setFont
(
QFont
(
'Arial'
,
15
))
self
.
test_input
=
QLineEdit
(
self
)
self
.
test_input
.
setFont
(
QFont
(
'Arial'
,
15
))
self
.
train_input
.
setText
(
str
(
self
.
train_ratio
))
self
.
val_input
.
setText
(
str
(
self
.
val_ratio
))
self
.
test_input
.
setText
(
str
(
self
.
test_ratio
))
validator
=
QIntValidator
(
0
,
100
)
self
.
train_input
.
setValidator
(
validator
)
self
.
val_input
.
setValidator
(
validator
)
self
.
test_input
.
setValidator
(
validator
)
gridlayout
=
QGridLayout
()
gridlayout
.
addWidget
(
info_msg
,
0
,
0
,
1
,
2
)
gridlayout
.
addWidget
(
train_lbl
,
1
,
0
)
gridlayout
.
addWidget
(
val_lbl
,
2
,
0
)
gridlayout
.
addWidget
(
test_lbl
,
3
,
0
)
gridlayout
.
addWidget
(
self
.
train_input
,
1
,
1
)
gridlayout
.
addWidget
(
self
.
val_input
,
2
,
1
)
gridlayout
.
addWidget
(
self
.
test_input
,
3
,
1
)
bb
=
BB
(
BB
.
Ok
|
BB
.
Cancel
,
Qt
.
Horizontal
,
self
)
bb
.
button
(
BB
.
Ok
).
setIcon
(
newIcon
(
'done'
))
bb
.
button
(
BB
.
Cancel
).
setIcon
(
newIcon
(
'undo'
))
bb
.
accepted
.
connect
(
self
.
validate
)
bb
.
rejected
.
connect
(
self
.
cancel
)
gridlayout
.
addWidget
(
bb
,
4
,
0
,
1
,
2
)
self
.
setLayout
(
gridlayout
)
self
.
show
()
def
validate
(
self
):
self
.
flag_accept
=
True
self
.
accept
()
def
cancel
(
self
):
self
.
flag_accept
=
False
self
.
reject
()
def
getStatus
(
self
):
return
self
.
flag_accept
def
getDataPartition
(
self
):
self
.
train_ratio
=
int
(
self
.
train_input
.
text
())
self
.
val_ratio
=
int
(
self
.
val_input
.
text
())
self
.
test_ratio
=
int
(
self
.
test_input
.
text
())
return
self
.
train_ratio
,
self
.
val_ratio
,
self
.
test_ratio
def
closeEvent
(
self
,
event
):
self
.
flag_accept
=
False
self
.
reject
()
PPOCRLabel/libs/utils.py
浏览文件 @
2acde6c5
...
...
@@ -161,6 +161,77 @@ def get_rotate_crop_image(img, points):
print
(
e
)
def
boxPad
(
box
,
imgShape
,
pad
:
int
)
->
np
.
array
:
"""
Pad a box with [pad] pixels on each side.
"""
box
=
np
.
array
(
box
,
dtype
=
np
.
int32
)
box
[
0
][
0
],
box
[
0
][
1
]
=
box
[
0
][
0
]
-
pad
,
box
[
0
][
1
]
-
pad
box
[
1
][
0
],
box
[
1
][
1
]
=
box
[
1
][
0
]
+
pad
,
box
[
1
][
1
]
-
pad
box
[
2
][
0
],
box
[
2
][
1
]
=
box
[
2
][
0
]
+
pad
,
box
[
2
][
1
]
+
pad
box
[
3
][
0
],
box
[
3
][
1
]
=
box
[
3
][
0
]
-
pad
,
box
[
3
][
1
]
+
pad
h
,
w
,
_
=
imgShape
box
[:,
0
]
=
np
.
clip
(
box
[:,
0
],
0
,
w
)
box
[:,
1
]
=
np
.
clip
(
box
[:,
1
],
0
,
h
)
return
box
def
OBB2HBB
(
obb
)
->
np
.
array
:
"""
Convert Oriented Bounding Box to Horizontal Bounding Box.
"""
hbb
=
np
.
zeros
(
4
,
dtype
=
np
.
int32
)
hbb
[
0
]
=
min
(
obb
[:,
0
])
hbb
[
1
]
=
min
(
obb
[:,
1
])
hbb
[
2
]
=
max
(
obb
[:,
0
])
hbb
[
3
]
=
max
(
obb
[:,
1
])
return
hbb
def
expand_list
(
merged
,
html_list
):
'''
Fill blanks according to merged cells
'''
sr
,
er
,
sc
,
ec
=
merged
for
i
in
range
(
sr
,
er
):
for
j
in
range
(
sc
,
ec
):
html_list
[
i
][
j
]
=
None
html_list
[
sr
][
sc
]
=
''
if
ec
-
sc
>
1
:
html_list
[
sr
][
sc
]
+=
" colspan={}"
.
format
(
ec
-
sc
)
if
er
-
sr
>
1
:
html_list
[
sr
][
sc
]
+=
" rowspan={}"
.
format
(
er
-
sr
)
return
html_list
def
convert_token
(
html_list
):
'''
Convert raw html to label format
'''
token_list
=
[
"<tbody>"
]
# final html list:
for
row
in
html_list
:
token_list
.
append
(
"<tr>"
)
for
col
in
row
:
if
col
==
None
:
continue
elif
col
==
'td'
:
token_list
.
extend
([
"<td>"
,
"</td>"
])
else
:
token_list
.
append
(
"<td"
)
if
'colspan'
in
col
:
_
,
n
=
col
.
split
(
'colspan='
)
token_list
.
append
(
" colspan=
\"
{}
\"
"
.
format
(
n
))
if
'rowspan'
in
col
:
_
,
n
=
col
.
split
(
'rowspan='
)
token_list
.
append
(
" rowspan=
\"
{}
\"
"
.
format
(
n
))
token_list
.
extend
([
">"
,
"</td>"
])
token_list
.
append
(
"</tr>"
)
token_list
.
append
(
"</tbody>"
)
return
token_list
def
stepsInfo
(
lang
=
'en'
):
if
lang
==
'ch'
:
msg
=
"1. 安装与运行:使用上述命令安装与运行程序。
\n
"
\
...
...
PPOCRLabel/resources/strings/strings-en.properties
浏览文件 @
2acde6c5
...
...
@@ -84,7 +84,7 @@ mhelp=Help
iconList
=
Icon List
detectionBoxposition
=
Detection box position
recognitionResult
=
Recognition result
creatPolygon
=
Create
Quadrilateral
creatPolygon
=
Create
PolygonBox
rotateLeft
=
Left turn 90 degrees
rotateRight
=
Right turn 90 degrees
drawSquares
=
Draw Squares
...
...
@@ -110,3 +110,6 @@ lockBoxDetail=Lock selected box/Unlock all box
keyListTitle
=
Key List
keyDialogTip
=
Enter object label
keyChange
=
Change Box Key
TableRecognition
=
Table Recognition
cellreRecognition
=
Cell Re-Recognition
exportJSON
=
export JSON(PubTabNet)
PPOCRLabel/resources/strings/strings-zh-CN.properties
浏览文件 @
2acde6c5
...
...
@@ -84,7 +84,7 @@ mhelp=帮助
iconList
=
缩略图
detectionBoxposition
=
检测框位置
recognitionResult
=
识别结果
creatPolygon
=
四点
标注
creatPolygon
=
多边形
标注
drawSquares
=
正方形标注
rotateLeft
=
图片左旋转90度
rotateRight
=
图片右旋转90度
...
...
@@ -109,4 +109,7 @@ lockBox=锁定框/解除锁定框
lockBoxDetail
=
若当前没有框处于锁定状态则锁定选中的框,若存在锁定框则解除所有锁定框的锁定状态
keyListTitle
=
关键词列表
keyDialogTip
=
请输入类型名称
keyChange
=
更改Box关键字类别
\ No newline at end of file
keyChange
=
更改Box关键字类别
TableRecognition
=
表格识别
cellreRecognition
=
单元格重识别
exportJSON
=
导出表格JSON标注
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录