Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleOCR
提交
ce321153
P
PaddleOCR
项目概览
PaddlePaddle
/
PaddleOCR
大约 1 年 前同步成功
通知
1528
Star
32962
Fork
6643
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
108
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleOCR
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
108
Issue
108
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ce321153
编写于
8月 14, 2022
作者:
文幕地方
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
rm unused code
上级
73c77ff7
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
14 addition
and
82 deletion
+14
-82
deploy/cpp_infer/src/postprocess_op.cpp
deploy/cpp_infer/src/postprocess_op.cpp
+2
-2
ppocr/utils/visual.py
ppocr/utils/visual.py
+1
-0
ppstructure/docs/quickstart.md
ppstructure/docs/quickstart.md
+2
-2
ppstructure/table/eval_table.py
ppstructure/table/eval_table.py
+2
-1
ppstructure/table/predict_table.py
ppstructure/table/predict_table.py
+6
-3
ppstructure/table/table_master_match.py
ppstructure/table/table_master_match.py
+0
-74
tools/train.py
tools/train.py
+1
-0
未找到文件。
deploy/cpp_infer/src/postprocess_op.cpp
浏览文件 @
ce321153
...
...
@@ -400,7 +400,7 @@ void TablePostProcessor::Run(
score
+=
char_score
;
rec_html_tags
.
push_back
(
html_tag
);
// box
if
(
html_tag
==
"<td>"
||
html_tag
==
"<td"
)
{
if
(
html_tag
==
"<td>"
||
html_tag
==
"<td"
||
html_tag
==
"<td></td>"
)
{
for
(
int
point_idx
=
0
;
point_idx
<
loc_preds_shape
[
2
];
point_idx
+=
2
)
{
std
::
vector
<
int
>
point
(
2
,
0
);
...
...
@@ -416,7 +416,7 @@ void TablePostProcessor::Run(
}
}
score
/=
count
;
if
(
isnan
(
score
)
||
rec_boxes
.
size
()
==
0
||
rec_html_tags
.
size
()
==
0
)
{
if
(
isnan
(
score
)
||
rec_boxes
.
size
()
==
0
)
{
score
=
-
1
;
}
rec_scores
.
push_back
(
score
);
...
...
ppocr/utils/visual.py
浏览文件 @
ce321153
...
...
@@ -114,6 +114,7 @@ def draw_re_results(image,
def
draw_rectangle
(
img_path
,
boxes
):
boxes
=
np
.
array
(
boxes
)
img
=
cv2
.
imread
(
img_path
)
img_show
=
img
.
copy
()
for
box
in
boxes
.
astype
(
int
):
...
...
ppstructure/docs/quickstart.md
浏览文件 @
ce321153
...
...
@@ -4,7 +4,7 @@
-
[
2. 便捷使用
](
#2-便捷使用
)
-
[
2.1 命令行使用
](
#21-命令行使用
)
-
[
2.1.1 图像方向分类+版面分析+表格识别
](
#211-图像方向分类版面分析表格识别
)
-
[
2.1.
1 版面分析+表格识别
](
#211
-版面分析表格识别
)
-
[
2.1.
2 版面分析+表格识别
](
#212
-版面分析表格识别
)
-
[
2.1.3 版面分析
](
#213-版面分析
)
-
[
2.1.4 表格识别
](
#214-表格识别
)
-
[
2.1.5 DocVQA
](
#215-docvqa
)
...
...
@@ -44,7 +44,7 @@ paddleocr --image_dir=PaddleOCR/ppstructure/docs/table/1.png --type=structure --
```
<a
name=
"212"
></a>
#### 2.1.
1
版面分析+表格识别
#### 2.1.
2
版面分析+表格识别
```
bash
paddleocr
--image_dir
=
PaddleOCR/ppstructure/docs/table/1.png
--type
=
structure
```
...
...
ppstructure/table/eval_table.py
浏览文件 @
ce321153
# Copyright (c) 202
0
PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 202
2
PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
sys
...
...
ppstructure/table/predict_table.py
浏览文件 @
ce321153
...
...
@@ -117,7 +117,6 @@ class TableSystem(object):
pred_html
=
self
.
match
(
structure_res
,
dt_boxes
,
rec_res
)
toc
=
time
.
time
()
time_dict
[
'match'
]
=
toc
-
tic
# pred_html = self.match(1, 1, 1,img_name)
result
[
'html'
]
=
pred_html
if
self
.
benchmark
:
self
.
autolog
.
times
.
end
(
stamp
=
True
)
...
...
@@ -212,8 +211,12 @@ def main(args):
elapse
=
time
.
time
()
-
starttime
logger
.
info
(
"Predict time : {:.3f}s"
.
format
(
elapse
))
# img = predict_strture.draw_rectangle(image_file, pred_res['cell_bbox'], use_xywh)
img
=
utility
.
draw_boxes
(
cv2
.
imread
(
image_file
),
pred_res
[
'cell_bbox'
])
if
len
(
pred_res
[
'cell_bbox'
])
>
0
and
len
(
pred_res
[
'cell_bbox'
][
0
])
==
4
:
img
=
predict_strture
.
draw_rectangle
(
image_file
,
pred_res
[
'cell_bbox'
])
else
:
img
=
utility
.
draw_boxes
(
img
,
pred_res
[
'cell_bbox'
])
img_save_path
=
os
.
path
.
join
(
args
.
output
,
os
.
path
.
basename
(
image_file
))
cv2
.
imwrite
(
img_save_path
,
img
)
...
...
ppstructure/table/table_master_match.py
浏览文件 @
ce321153
...
...
@@ -273,10 +273,6 @@ def sort_bbox(end2end_xywh_bboxes, no_match_end2end_indexes):
end2end_sorted_idx_list
,
end2end_sorted_bbox_list
\
=
flatten
(
sorted_groups
,
sorted_bbox_groups
)
# check sorted
#img = cv2.imread('/data_0/yejiaquan/data/TableRecognization/singleVal/PMC3286376_004_00.png')
#img = drawBboxAfterSorted(img, sorted_groups, sorted_bbox_groups)
return
end2end_sorted_idx_list
,
end2end_sorted_bbox_list
,
sorted_groups
,
sorted_bbox_groups
...
...
@@ -302,9 +298,6 @@ def get_bboxes_list(end2end_result, structure_master_result):
# structure master
src_bboxes
=
structure_master_result
[
'bbox'
]
src_bboxes
=
remove_empty_bboxes
(
src_bboxes
)
# structure_master_xywh_bboxes = src_bboxes
# xyxy_bboxes = xywh2xyxy(src_bboxes)
# structure_master_xyxy_bboxes = xyxy_bboxes
structure_master_xyxy_bboxes
=
src_bboxes
xywh_bbox
=
xyxy2xywh
(
src_bboxes
)
structure_master_xywh_bboxes
=
xywh_bbox
...
...
@@ -410,64 +403,6 @@ def extra_match(no_match_end2end_indexes, master_bbox_nums):
return
extra_match_list
def
match_visual
(
file_name
,
match_list
,
end2end_xyxy
,
master_xyxy
,
prex
=
'ordinary_match'
):
"""
Show the match result by xyxy coord style.
:param file_name:
:param match_list:
:param end2end_xyxy:
:param master_xyxy:
:param prex:
:return:
"""
folder
=
''
save_folder
=
'/data_0/cache'
file_path
=
os
.
path
.
join
(
folder
,
file_name
)
img_end2end
=
cv2
.
imread
(
file_path
)
img_master
=
copy
.
deepcopy
(
img_end2end
)
text_color
=
(
0
,
0
,
255
)
bbox_color
=
(
255
,
0
,
0
)
master_nums
=
len
(
master_xyxy
)
for
idx
,
match_group
in
enumerate
(
match_list
):
end2end_idx
,
master_index
=
match_group
[
0
],
match_group
[
1
]
# master_index larger than master_nums, did not draw master bbox.
if
master_index
<
master_nums
:
# draw master
master_bbox
=
master_xyxy
[
master_index
]
img_master
=
cv2
.
rectangle
(
img_master
,
(
int
(
master_bbox
[
0
]),
int
(
master_bbox
[
1
])),
(
int
(
master_bbox
[
2
]),
int
(
master_bbox
[
3
])),
bbox_color
,
thickness
=
1
)
master_text_coord
=
(
int
(
master_bbox
[
0
])
-
4
,
int
(
master_bbox
[
1
]))
img_master
=
cv2
.
putText
(
img_master
,
str
(
master_index
),
master_text_coord
,
1
,
1
,
text_color
,
2
)
# draw end2end
end2end_bbox
=
end2end_xyxy
[
end2end_idx
]
img_end2end
=
cv2
.
rectangle
(
img_end2end
,
(
int
(
end2end_bbox
[
0
]),
int
(
end2end_bbox
[
1
])),
(
int
(
end2end_bbox
[
2
]),
int
(
end2end_bbox
[
3
])),
bbox_color
,
thickness
=
1
)
end2end_text_coord
=
(
int
(
end2end_bbox
[
0
])
-
4
,
int
(
end2end_bbox
[
1
]))
# write end2end bbox matching master bbox's index
img_end2end
=
cv2
.
putText
(
img_end2end
,
str
(
master_index
),
end2end_text_coord
,
1
,
1
,
text_color
,
2
)
img
=
np
.
hstack
([
img_end2end
,
img_master
])
save_path
=
os
.
path
.
join
(
save_folder
,
'{}_matchShow.png'
.
format
(
prex
))
cv2
.
imwrite
(
save_path
,
img
)
def
get_match_dict
(
match_list
):
"""
Convert match_list to a dict, where key is master bbox's index, value is end2end bbox index.
...
...
@@ -555,8 +490,6 @@ def merge_span_token(master_token_list):
pattern <td colspan="3">
'<td' + 'colspan=" "' + '>' + '</td>'
"""
# tmp = master_token_list[pointer] + master_token_list[pointer+1] + master_token_list[pointer+2] + \
# master_token_list[pointer+3]
tmp
=
''
.
join
(
master_token_list
[
pointer
:
pointer
+
3
+
1
])
pointer
+=
4
new_master_token_list
.
append
(
tmp
)
...
...
@@ -569,8 +502,6 @@ def merge_span_token(master_token_list):
pattern <td rowspan="2" colspan="3">
'<td' + 'rowspan=" "' + 'colspan=" "' + '>' + '</td>'
"""
# tmp = master_token_list[pointer] + master_token_list[pointer+1] + \
# master_token_list[pointer+2] + master_token_list[pointer+3] + master_token_list[pointer+4]
tmp
=
''
.
join
(
master_token_list
[
pointer
:
pointer
+
4
+
1
])
pointer
+=
5
new_master_token_list
.
append
(
tmp
)
...
...
@@ -909,11 +840,6 @@ class Matcher:
'sorted_bboxes_groups'
:
sorted_bboxes_groups
}
# ordinary match show
# match_visual(file_name, match_list, end2end_xyxy_bboxes, structure_master_xyxy_bboxes, prex='ordinary_match')
# extra match show
# match_visual(file_name, match_list_add_extra_match, end2end_xyxy_bboxes, structure_master_xyxy_bboxes, prex='extra_match')
# format output
match_result_dict
=
self
.
_format
(
match_result_dict
,
file_name
)
...
...
tools/train.py
浏览文件 @
ce321153
...
...
@@ -125,6 +125,7 @@ def main(config, device, logger, vdl_writer):
logger
.
info
(
'convert_sync_batchnorm'
)
model
=
apply_to_static
(
model
,
config
,
logger
)
logger
.
info
(
model
)
# build loss
loss_class
=
build_loss
(
config
[
'Loss'
])
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录