Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleOCR
提交
e15cf0d1
P
PaddleOCR
项目概览
PaddlePaddle
/
PaddleOCR
大约 1 年 前同步成功
通知
1528
Star
32962
Fork
6643
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
108
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleOCR
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
108
Issue
108
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e15cf0d1
编写于
5月 05, 2022
作者:
qq_25193841
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'origin/new' into table
上级
320cb381
8b228a1f
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
159 addition
and
91 deletion
+159
-91
PPOCRLabel/PPOCRLabel.py
PPOCRLabel/PPOCRLabel.py
+56
-89
PPOCRLabel/libs/dataPartitionDialog.py
PPOCRLabel/libs/dataPartitionDialog.py
+101
-0
PPOCRLabel/resources/strings/strings-en.properties
PPOCRLabel/resources/strings/strings-en.properties
+1
-1
PPOCRLabel/resources/strings/strings-zh-CN.properties
PPOCRLabel/resources/strings/strings-zh-CN.properties
+1
-1
未找到文件。
PPOCRLabel/PPOCRLabel.py
浏览文件 @
e15cf0d1
...
...
@@ -202,11 +202,8 @@ class MainWindow(QMainWindow):
self
.
reRecogButton
.
setIcon
(
newIcon
(
'reRec'
,
30
))
self
.
reRecogButton
.
setToolButtonStyle
(
Qt
.
ToolButtonTextBesideIcon
)
self
.
cellreRecButton
=
QToolButton
()
self
.
cellreRecButton
.
setToolButtonStyle
(
Qt
.
ToolButtonTextBesideIcon
)
self
.
tableRecButton
=
QToolButton
()
self
.
tableRecButton
.
setToolButtonStyle
(
Qt
.
ToolButtonTextBesideIcon
)
self
.
newButton
=
QToolButton
()
self
.
newButton
.
setToolButtonStyle
(
Qt
.
ToolButtonTextBesideIcon
)
...
...
@@ -221,9 +218,9 @@ class MainWindow(QMainWindow):
leftTopToolBox
=
QGridLayout
()
leftTopToolBox
.
addWidget
(
self
.
newButton
,
0
,
0
,
1
,
1
)
leftTopToolBox
.
addWidget
(
self
.
createpolyButton
,
0
,
1
,
1
,
1
)
leftTopToolBox
.
addWidget
(
self
.
reRecogButton
,
0
,
2
,
1
,
1
)
leftTopToolBox
.
addWidget
(
self
.
tableRecButton
,
1
,
0
,
1
,
1
)
leftTopToolBox
.
addWidget
(
self
.
cellreRecButton
,
1
,
1
,
1
,
1
)
leftTopToolBox
.
addWidget
(
self
.
reRecogButton
,
1
,
0
,
1
,
1
)
leftTopToolBox
.
addWidget
(
self
.
tableRecButton
,
1
,
1
,
1
,
1
)
leftTopToolBoxContainer
=
QWidget
()
leftTopToolBoxContainer
.
setLayout
(
leftTopToolBox
)
listLayout
.
addWidget
(
leftTopToolBoxContainer
)
...
...
@@ -507,7 +504,6 @@ class MainWindow(QMainWindow):
self
.
AutoRecognition
.
setDefaultAction
(
AutoRec
)
self
.
reRecogButton
.
setDefaultAction
(
reRec
)
self
.
tableRecButton
.
setDefaultAction
(
tableRec
)
self
.
cellreRecButton
.
setDefaultAction
(
cellreRec
)
# self.preButton.setDefaultAction(openPrevImg)
# self.nextButton.setDefaultAction(openNextImg)
...
...
@@ -564,11 +560,11 @@ class MainWindow(QMainWindow):
rotateLeft
=
rotateLeft
,
rotateRight
=
rotateRight
,
lock
=
lock
,
exportJSON
=
exportJSON
,
fileMenuActions
=
(
opendir
,
open_dataset_dir
,
saveLabel
,
exportJSON
,
resetAll
,
quit
),
beginner
=
(),
advanced
=
(),
editMenu
=
(
createpoly
,
edit
,
copy
,
delete
,
singleRere
,
None
,
undo
,
undoLastPoint
,
editMenu
=
(
createpoly
,
edit
,
copy
,
delete
,
singleRere
,
cellreRec
,
None
,
undo
,
undoLastPoint
,
None
,
rotateLeft
,
rotateRight
,
None
,
color1
,
self
.
drawSquaresOption
,
lock
,
None
,
change_cls
),
beginnerContext
=
(
create
,
createpoly
,
edit
,
copy
,
delete
,
singleRere
,
rotateLeft
,
rotateRight
,
lock
,
change_cls
),
create
,
createpoly
,
edit
,
copy
,
delete
,
singleRere
,
cellreRec
,
rotateLeft
,
rotateRight
,
lock
,
change_cls
),
advancedContext
=
(
createMode
,
editMode
,
edit
,
copy
,
delete
,
shapeLineColor
,
shapeFillColor
),
onLoadActive
=
(
create
,
createpoly
,
createMode
,
editMode
),
...
...
@@ -1025,6 +1021,7 @@ class MainWindow(QMainWindow):
self
.
_noSelectionSlot
=
False
n_selected
=
len
(
selected_shapes
)
self
.
actions
.
singleRere
.
setEnabled
(
n_selected
)
self
.
actions
.
cellreRec
.
setEnabled
(
n_selected
)
self
.
actions
.
delete
.
setEnabled
(
n_selected
)
self
.
actions
.
copy
.
setEnabled
(
n_selected
)
self
.
actions
.
edit
.
setEnabled
(
n_selected
==
1
)
...
...
@@ -1690,12 +1687,10 @@ class MainWindow(QMainWindow):
self
.
haveAutoReced
=
False
self
.
AutoRecognition
.
setEnabled
(
True
)
self
.
reRecogButton
.
setEnabled
(
True
)
self
.
cellreRecButton
.
setEnabled
(
True
)
self
.
tableRecButton
.
setEnabled
(
True
)
self
.
actions
.
AutoRec
.
setEnabled
(
True
)
self
.
actions
.
reRec
.
setEnabled
(
True
)
self
.
actions
.
tableRec
.
setEnabled
(
True
)
self
.
actions
.
cellreRec
.
setEnabled
(
True
)
self
.
actions
.
open_dataset_dir
.
setEnabled
(
True
)
self
.
actions
.
rotateLeft
.
setEnabled
(
True
)
self
.
actions
.
rotateRight
.
setEnabled
(
True
)
...
...
@@ -2229,87 +2224,59 @@ class MainWindow(QMainWindow):
re-recognise text in a cell
'''
img
=
cv2
.
imread
(
self
.
filePath
)
for
shape
in
self
.
canvas
.
selectedShapes
:
box
=
[[
int
(
p
.
x
()),
int
(
p
.
y
())]
for
p
in
shape
.
points
]
if
self
.
canvas
.
shapes
:
self
.
result_dic
=
[]
self
.
result_dic_locked
=
[]
# result_dic_locked stores the ocr result of self.canvas.lockedShapes
rec_flag
=
0
for
shape
in
self
.
canvas
.
shapes
:
box
=
[[
int
(
p
.
x
()),
int
(
p
.
y
())]
for
p
in
shape
.
points
]
if
len
(
box
)
>
4
:
box
=
self
.
gen_quad_from_poly
(
np
.
array
(
box
))
assert
len
(
box
)
==
4
if
len
(
box
)
>
4
:
box
=
self
.
gen_quad_from_poly
(
np
.
array
(
box
))
assert
len
(
box
)
==
4
# pad around bbox for better text recognition accuracy
_box
=
boxPad
(
box
,
img
.
shape
,
6
)
img_crop
=
get_rotate_crop_image
(
img
,
np
.
array
(
_box
,
np
.
float32
))
if
img_crop
is
None
:
msg
=
'Can not recognise the detection box in '
+
self
.
filePath
+
'. Please change manually'
QMessageBox
.
information
(
self
,
"Information"
,
msg
)
return
# pad around bbox for better text recognition accuracy
print
(
box
)
_box
=
boxPad
(
box
,
img
.
shape
,
6
)
print
(
_box
)
img_crop
=
get_rotate_crop_image
(
img
,
np
.
array
(
_box
,
np
.
float32
))
if
img_crop
is
None
:
msg
=
'Can not recognise the detection box in '
+
self
.
filePath
+
'. Please change manually'
QMessageBox
.
information
(
self
,
"Information"
,
msg
)
return
# merge the text result in the cell
texts
=
''
probs
=
0.
# the probability of the cell is avgerage prob of every text box in the cell
bboxes
=
self
.
ocr
.
ocr
(
img_crop
,
det
=
True
,
rec
=
False
,
cls
=
False
)
if
len
(
bboxes
)
>
0
:
bboxes
.
reverse
()
# top row text at first
for
_bbox
in
bboxes
:
patch
=
get_rotate_crop_image
(
img_crop
,
np
.
array
(
_bbox
,
np
.
float32
))
rec_res
=
self
.
ocr
.
ocr
(
patch
,
det
=
False
,
rec
=
True
,
cls
=
False
)
text
=
rec_res
[
0
][
0
]
if
text
!=
''
:
texts
+=
text
+
(
' '
if
text
[
0
].
isalpha
()
else
''
)
# add space between english word
probs
+=
rec_res
[
0
][
1
]
probs
=
probs
/
len
(
bboxes
)
result
=
[(
texts
.
strip
(),
probs
)]
# merge the text result in the cell
texts
=
''
probs
=
0.
# the probability of the cell is avgerage prob of every text box in the cell
bboxes
=
self
.
ocr
.
ocr
(
img_crop
,
det
=
True
,
rec
=
False
,
cls
=
False
)
if
len
(
bboxes
)
>
0
:
bboxes
.
reverse
()
# top row text at first
for
_bbox
in
bboxes
:
patch
=
get_rotate_crop_image
(
img_crop
,
np
.
array
(
_bbox
,
np
.
float32
))
rec_res
=
self
.
ocr
.
ocr
(
patch
,
det
=
False
,
rec
=
True
,
cls
=
False
)
text
=
rec_res
[
0
][
0
]
if
text
!=
''
:
texts
+=
text
+
(
' '
if
text
[
0
].
isalpha
()
else
''
)
# add space between english word
probs
+=
rec_res
[
0
][
1
]
probs
=
probs
/
len
(
bboxes
)
result
=
[(
texts
.
strip
(),
probs
)]
if
result
[
0
][
0
]
!=
''
:
if
shape
.
line_color
==
DEFAULT_LOCK_COLOR
:
shape
.
label
=
result
[
0
][
0
]
result
.
insert
(
0
,
box
)
self
.
result_dic_locked
.
append
(
result
)
else
:
result
.
insert
(
0
,
box
)
self
.
result_dic
.
append
(
result
)
else
:
print
(
'Can not recognise the box'
)
if
shape
.
line_color
==
DEFAULT_LOCK_COLOR
:
shape
.
label
=
result
[
0
][
0
]
self
.
result_dic_locked
.
append
([
box
,
(
self
.
noLabelText
,
0
)])
else
:
self
.
result_dic
.
append
([
box
,
(
self
.
noLabelText
,
0
)])
try
:
if
self
.
noLabelText
==
shape
.
label
or
result
[
1
][
0
]
==
shape
.
label
:
print
(
'label no change'
)
else
:
rec_flag
+=
1
except
IndexError
as
e
:
print
(
'Can not recognise the box'
)
if
(
len
(
self
.
result_dic
)
>
0
and
rec_flag
>
0
)
or
self
.
canvas
.
lockedShapes
:
self
.
canvas
.
isInTheSameImage
=
True
self
.
saveFile
(
mode
=
'Auto'
)
self
.
loadFile
(
self
.
filePath
)
self
.
canvas
.
isInTheSameImage
=
False
self
.
setDirty
()
elif
len
(
self
.
result_dic
)
==
len
(
self
.
canvas
.
shapes
)
and
rec_flag
==
0
:
if
self
.
lang
==
'ch'
:
QMessageBox
.
information
(
self
,
"Information"
,
"识别结果保持一致!"
)
if
result
[
0
][
0
]
!=
''
:
result
.
insert
(
0
,
box
)
print
(
'result in reRec is '
,
result
)
if
result
[
1
][
0
]
==
shape
.
label
:
print
(
'label no change'
)
else
:
QMessageBox
.
information
(
self
,
"Information"
,
"The recognition result remains unchanged!"
)
shape
.
label
=
result
[
1
][
0
]
else
:
print
(
'Can not recgonise in '
,
self
.
filePath
)
else
:
QMessageBox
.
information
(
self
,
"Information"
,
"Draw a box!"
)
print
(
'Can not recognise the box'
)
if
self
.
noLabelText
==
shape
.
label
:
print
(
'label no change'
)
else
:
shape
.
label
=
self
.
noLabelText
self
.
singleLabel
(
shape
)
self
.
setDirty
()
def
exportJSON
(
self
):
'''
export PPLabel and CSV to JSON (PubTabNet)
'''
import
pandas
as
pd
from
PyQt5.QtWidgets
import
QInput
Dialog
from
libs.dataPartitionDialog
import
DataPartition
Dialog
if
self
.
lang
==
'ch'
:
QMessageBox
.
information
(
self
,
"Information"
,
"导出JSON前请保存所有图像的标注且关闭EXCEL!!!!!!!!!!!!"
)
...
...
@@ -2346,18 +2313,18 @@ class MainWindow(QMainWindow):
# return
# data partition user input
train_split
,
ok
=
QInputDialog
.
getInt
(
self
,
"DataPatition"
,
"How many data for Training (%):"
,
70
,
0
,
100
,
1
)
if
not
ok
:
return
val_split
,
ok
=
QInputDialog
.
getInt
(
self
,
"DataPatition"
,
"How many data for Validatiion (%):"
,
15
,
0
,
100
,
1
)
if
not
ok
:
partitionDialog
=
DataPartitionDialog
()
partitionDialog
.
exec
()
if
partitionDialog
.
getStatus
()
==
False
:
return
test_split
,
ok
=
QInputDialog
.
getInt
(
self
,
"DataPatition"
,
"How many data for Testing (%):"
,
15
,
0
,
100
,
1
)
train_split
,
val_split
,
test_split
=
partitionDialog
.
getDataPartition
()
# check validate
if
train_split
+
val_split
+
test_split
>
100
:
QMessageBox
.
information
(
self
,
"Information"
,
"The sum of training, validation and testing data should be less than 100%"
)
msg
=
"The sum of training, validation and testing data should be less than 100%"
QMessageBox
.
information
(
self
,
"Information"
,
msg
)
return
print
(
train_split
,
val_split
,
test_split
)
train_split
,
val_split
,
test_split
=
float
(
train_split
)
/
100.
,
float
(
val_split
)
/
100.
,
float
(
test_split
)
/
100.
train_id
=
int
(
len
(
labeldict
)
*
train_split
)
val_id
=
int
(
len
(
labeldict
)
*
(
train_split
+
val_split
))
...
...
@@ -2407,7 +2374,7 @@ class MainWindow(QMainWindow):
with
open
(
"{}/annotation.json"
.
format
(
self
.
lastOpenDir
),
"w"
)
as
fid
:
fid
.
write
(
json
.
dumps
(
json_results
))
msg
=
'JSON sucessfully saved in
'
,
"{}/annotation.json"
.
format
(
self
.
lastOpenDir
)
msg
=
'JSON sucessfully saved in
{}/annotation.json'
.
format
(
self
.
lastOpenDir
)
QMessageBox
.
information
(
self
,
"Information"
,
msg
)
def
autolcm
(
self
):
...
...
PPOCRLabel/libs/dataPartitionDialog.py
0 → 100644
浏览文件 @
e15cf0d1
try
:
from
PyQt5.QtGui
import
*
from
PyQt5.QtCore
import
*
from
PyQt5.QtWidgets
import
*
except
ImportError
:
from
PyQt4.QtGui
import
*
from
PyQt4.QtCore
import
*
from
libs.utils
import
newIcon
import
time
import
datetime
import
json
import
cv2
import
numpy
as
np
BB
=
QDialogButtonBox
class
DataPartitionDialog
(
QDialog
):
def
__init__
(
self
):
super
().
__init__
()
self
.
title
=
'DATA PARTITION'
self
.
train_ratio
=
70
self
.
val_ratio
=
15
self
.
test_ratio
=
15
self
.
initUI
()
def
initUI
(
self
):
self
.
setWindowTitle
(
self
.
title
)
self
.
setWindowModality
(
Qt
.
ApplicationModal
)
self
.
flag_accept
=
True
train_lbl
=
QLabel
(
'Train split: '
,
self
)
train_lbl
.
setFont
(
QFont
(
'Arial'
,
15
))
val_lbl
=
QLabel
(
'Valid split: '
,
self
)
val_lbl
.
setFont
(
QFont
(
'Arial'
,
15
))
test_lbl
=
QLabel
(
'Test split: '
,
self
)
test_lbl
.
setFont
(
QFont
(
'Arial'
,
15
))
self
.
train_input
=
QLineEdit
(
self
)
self
.
train_input
.
setFont
(
QFont
(
'Arial'
,
15
))
self
.
val_input
=
QLineEdit
(
self
)
self
.
val_input
.
setFont
(
QFont
(
'Arial'
,
15
))
self
.
test_input
=
QLineEdit
(
self
)
self
.
test_input
.
setFont
(
QFont
(
'Arial'
,
15
))
self
.
train_input
.
setText
(
str
(
self
.
train_ratio
))
self
.
val_input
.
setText
(
str
(
self
.
val_ratio
))
self
.
test_input
.
setText
(
str
(
self
.
test_ratio
))
validator
=
QIntValidator
(
0
,
100
)
self
.
train_input
.
setValidator
(
validator
)
self
.
val_input
.
setValidator
(
validator
)
self
.
test_input
.
setValidator
(
validator
)
gridlayout
=
QGridLayout
()
gridlayout
.
addWidget
(
train_lbl
,
0
,
0
)
gridlayout
.
addWidget
(
val_lbl
,
1
,
0
)
gridlayout
.
addWidget
(
test_lbl
,
2
,
0
)
gridlayout
.
addWidget
(
self
.
train_input
,
0
,
1
)
gridlayout
.
addWidget
(
self
.
val_input
,
1
,
1
)
gridlayout
.
addWidget
(
self
.
test_input
,
2
,
1
)
bb
=
BB
(
BB
.
Ok
|
BB
.
Cancel
,
Qt
.
Horizontal
,
self
)
bb
.
button
(
BB
.
Ok
).
setIcon
(
newIcon
(
'done'
))
bb
.
button
(
BB
.
Cancel
).
setIcon
(
newIcon
(
'undo'
))
bb
.
accepted
.
connect
(
self
.
validate
)
bb
.
rejected
.
connect
(
self
.
cancel
)
gridlayout
.
addWidget
(
bb
,
3
,
0
,
1
,
2
)
self
.
setLayout
(
gridlayout
)
self
.
show
()
def
validate
(
self
):
self
.
flag_accept
=
True
self
.
accept
()
def
cancel
(
self
):
self
.
flag_accept
=
False
self
.
reject
()
def
getStatus
(
self
):
return
self
.
flag_accept
def
getDataPartition
(
self
):
self
.
train_ratio
=
int
(
self
.
train_input
.
text
())
self
.
val_ratio
=
int
(
self
.
val_input
.
text
())
self
.
test_ratio
=
int
(
self
.
test_input
.
text
())
return
self
.
train_ratio
,
self
.
val_ratio
,
self
.
test_ratio
def
closeEvent
(
self
,
event
):
self
.
flag_accept
=
False
self
.
reject
()
PPOCRLabel/resources/strings/strings-en.properties
浏览文件 @
e15cf0d1
...
...
@@ -84,7 +84,7 @@ mhelp=Help
iconList
=
Icon List
detectionBoxposition
=
Detection box position
recognitionResult
=
Recognition result
creatPolygon
=
Create
Quadrilateral
creatPolygon
=
Create
PolygonBox
rotateLeft
=
Left turn 90 degrees
rotateRight
=
Right turn 90 degrees
drawSquares
=
Draw Squares
...
...
PPOCRLabel/resources/strings/strings-zh-CN.properties
浏览文件 @
e15cf0d1
...
...
@@ -84,7 +84,7 @@ mhelp=帮助
iconList
=
缩略图
detectionBoxposition
=
检测框位置
recognitionResult
=
识别结果
creatPolygon
=
四点
标注
creatPolygon
=
多边形
标注
drawSquares
=
正方形标注
rotateLeft
=
图片左旋转90度
rotateRight
=
图片右旋转90度
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录