Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
weixin_41840029
PaddleOCR
提交
20466055
P
PaddleOCR
项目概览
weixin_41840029
/
PaddleOCR
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleOCR
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleOCR
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
20466055
编写于
6月 03, 2021
作者:
W
WenmuZhou
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add save_dir to args
上级
0bf30fea
变更
1
显示空白变更内容
内联
并排
Showing
1 changed file
with
16 addition
and
8 deletion
+16
-8
ppstructure/predict_system.py
ppstructure/predict_system.py
+16
-8
未找到文件。
ppstructure/predict_system.py
浏览文件 @
20466055
...
...
@@ -38,6 +38,8 @@ logger = get_logger()
def
parse_args
():
parser
=
utility
.
init_args
()
# params for output
parser
.
add_argument
(
"--table_output"
,
type
=
str
,
default
=
'output/table'
)
# params for table structure
parser
.
add_argument
(
"--table_max_len"
,
type
=
int
,
default
=
488
)
parser
.
add_argument
(
"--table_max_text_length"
,
type
=
int
,
default
=
100
)
...
...
@@ -65,9 +67,9 @@ class OCRSystem():
layout_res
=
self
.
table_layout
(
copy
.
deepcopy
(
img
))
for
region
in
layout_res
:
x1
,
y1
,
x2
,
y2
=
region
[
'bbox'
]
roi_img
=
ori_im
[
y1
:
y2
,
x1
:
x2
,:]
roi_img
=
ori_im
[
y1
:
y2
,
x1
:
x2
,
:]
if
region
[
'label'
]
==
'table'
:
res
=
self
.
t
able
_system
(
roi_img
)
res
=
self
.
t
ext
_system
(
roi_img
)
else
:
res
=
self
.
text_system
(
roi_img
)
region
[
'res'
]
=
res
...
...
@@ -77,15 +79,15 @@ class OCRSystem():
def
main
(
args
):
image_file_list
=
get_image_file_list
(
args
.
image_dir
)
image_file_list
=
image_file_list
[
args
.
process_id
::
args
.
total_process_num
]
excel_save_folder
=
'output/table'
os
.
makedirs
(
excel_
save_folder
,
exist_ok
=
True
)
save_folder
=
args
.
table_output
os
.
makedirs
(
save_folder
,
exist_ok
=
True
)
text_sys
=
OCRSystem
(
args
)
img_num
=
len
(
image_file_list
)
for
i
,
image_file
in
enumerate
(
image_file_list
):
logger
.
info
(
"[{}/{}] {}"
.
format
(
i
,
img_num
,
image_file
))
img
,
flag
=
check_and_read_gif
(
image_file
)
imgname
=
os
.
path
.
basename
(
image_file
).
split
(
'.'
)[
0
]
img
_
name
=
os
.
path
.
basename
(
image_file
).
split
(
'.'
)[
0
]
# excel_path = os.path.join(excel_save_folder, + '.xlsx')
if
not
flag
:
img
=
cv2
.
imread
(
image_file
)
...
...
@@ -95,11 +97,17 @@ def main(args):
starttime
=
time
.
time
()
res
=
text_sys
(
img
)
excel_save_folder
=
os
.
path
.
join
(
save_folder
,
img_name
)
os
.
makedirs
(
excel_save_folder
,
exist_ok
=
True
)
# save res
for
region
in
res
:
if
region
[
'label'
]
==
'table'
:
# x1, y1, x2, y2 = region['bbox']
excel_path
=
os
.
path
.
join
(
excel_save_folder
,
'{}_{}.xlsx'
.
format
(
imgname
,
region
[
'bbox'
]))
to_excel
(
region
[
'res'
],
excel_path
)
excel_path
=
os
.
path
.
join
(
excel_save_folder
,
'{}.xlsx'
.
format
(
region
[
'bbox'
]))
to_excel
(
region
[
'res'
],
excel_path
)
else
:
with
open
(
os
.
path
.
join
(
excel_save_folder
,
'res.txt'
),
'a'
,
encoding
=
'utf8'
)
as
f
:
for
box
,
rec_res
in
zip
(
*
region
[
'res'
]):
f
.
write
(
'{}
\t
{}
\n
'
.
format
(
np
.
array
(
box
).
reshape
(
-
1
).
tolist
(),
rec_res
))
logger
.
info
(
res
)
elapse
=
time
.
time
()
-
starttime
logger
.
info
(
"Predict time : {:.3f}s"
.
format
(
elapse
))
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录