Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleOCR
提交
731688c2
P
PaddleOCR
项目概览
PaddlePaddle
/
PaddleOCR
大约 1 年 前同步成功
通知
1528
Star
32962
Fork
6643
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
108
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleOCR
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
108
Issue
108
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
731688c2
编写于
8月 10, 2022
作者:
文幕地方
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add PP-Structurev2 to whl
上级
73ca6c2e
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
40 addition
and
10 deletion
+40
-10
paddleocr.py
paddleocr.py
+40
-10
未找到文件。
paddleocr.py
浏览文件 @
731688c2
...
@@ -47,14 +47,14 @@ __all__ = [
...
@@ -47,14 +47,14 @@ __all__ = [
]
]
SUPPORT_DET_MODEL
=
[
'DB'
]
SUPPORT_DET_MODEL
=
[
'DB'
]
VERSION
=
'2.
5.0.3
'
VERSION
=
'2.
6
'
SUPPORT_REC_MODEL
=
[
'CRNN'
,
'SVTR_LCNet'
]
SUPPORT_REC_MODEL
=
[
'CRNN'
,
'SVTR_LCNet'
]
BASE_DIR
=
os
.
path
.
expanduser
(
"~/.paddleocr/"
)
BASE_DIR
=
os
.
path
.
expanduser
(
"~/.paddleocr/"
)
DEFAULT_OCR_MODEL_VERSION
=
'PP-OCRv3'
DEFAULT_OCR_MODEL_VERSION
=
'PP-OCRv3'
SUPPORT_OCR_MODEL_VERSION
=
[
'PP-OCR'
,
'PP-OCRv2'
,
'PP-OCRv3'
]
SUPPORT_OCR_MODEL_VERSION
=
[
'PP-OCR'
,
'PP-OCRv2'
,
'PP-OCRv3'
]
DEFAULT_STRUCTURE_MODEL_VERSION
=
'PP-S
TRUCTURE
'
DEFAULT_STRUCTURE_MODEL_VERSION
=
'PP-S
tructurev2
'
SUPPORT_STRUCTURE_MODEL_VERSION
=
[
'PP-S
TRUCTURE
'
]
SUPPORT_STRUCTURE_MODEL_VERSION
=
[
'PP-S
tructure'
,
'PP-Structurev2
'
]
MODEL_URLS
=
{
MODEL_URLS
=
{
'OCR'
:
{
'OCR'
:
{
'PP-OCRv3'
:
{
'PP-OCRv3'
:
{
...
@@ -263,7 +263,7 @@ MODEL_URLS = {
...
@@ -263,7 +263,7 @@ MODEL_URLS = {
}
}
},
},
'STRUCTURE'
:
{
'STRUCTURE'
:
{
'PP-S
TRUCTURE
'
:
{
'PP-S
tructure
'
:
{
'table'
:
{
'table'
:
{
'en'
:
{
'en'
:
{
'url'
:
'url'
:
...
@@ -271,6 +271,24 @@ MODEL_URLS = {
...
@@ -271,6 +271,24 @@ MODEL_URLS = {
'dict_path'
:
'ppocr/utils/dict/table_structure_dict.txt'
'dict_path'
:
'ppocr/utils/dict/table_structure_dict.txt'
}
}
}
}
},
'PP-Structurev2'
:
{
'table'
:
{
'en'
:
{
'url'
:
''
,
'dict_path'
:
'ppocr/utils/dict/table_structure_dict.txt'
},
'ch'
:
{
'url'
:
''
,
'dict_path'
:
'ppocr/utils/dict/table_structure_dict.txt'
}
},
'layout'
:
{
'ch'
:
{
'url'
:
''
,
'dict_path'
:
'ppocr/utils/dict/layout_publaynet_dict.txt'
}
}
}
}
}
}
}
}
...
@@ -298,12 +316,15 @@ def parse_args(mMain=True):
...
@@ -298,12 +316,15 @@ def parse_args(mMain=True):
"--structure_version"
,
"--structure_version"
,
type
=
str
,
type
=
str
,
choices
=
SUPPORT_STRUCTURE_MODEL_VERSION
,
choices
=
SUPPORT_STRUCTURE_MODEL_VERSION
,
default
=
'PP-S
TRUCTURE
'
,
default
=
'PP-S
tructure
'
,
help
=
'Model version, the current model support list is as follows:'
help
=
'Model version, the current model support list is as follows:'
' 1. STRUCTURE Support en table structure model.'
)
' 1. PP-Structure Support en table structure model.'
' 2. PP-Structure Support ch and en table structure model.'
)
for
action
in
parser
.
_actions
:
for
action
in
parser
.
_actions
:
if
action
.
dest
in
[
'rec_char_dict_path'
,
'table_char_dict_path'
]:
if
action
.
dest
in
[
'rec_char_dict_path'
,
'table_char_dict_path'
,
'layout_dict_path'
]:
action
.
default
=
None
action
.
default
=
None
if
mMain
:
if
mMain
:
return
parser
.
parse_args
()
return
parser
.
parse_args
()
...
@@ -477,7 +498,7 @@ class PaddleOCR(predict_system.TextSystem):
...
@@ -477,7 +498,7 @@ class PaddleOCR(predict_system.TextSystem):
if
isinstance
(
img
,
np
.
ndarray
)
and
len
(
img
.
shape
)
==
2
:
if
isinstance
(
img
,
np
.
ndarray
)
and
len
(
img
.
shape
)
==
2
:
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_GRAY2BGR
)
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_GRAY2BGR
)
if
det
and
rec
:
if
det
and
rec
:
dt_boxes
,
rec_res
=
self
.
__call__
(
img
,
cls
)
dt_boxes
,
rec_res
,
_
=
self
.
__call__
(
img
,
cls
)
return
[[
box
.
tolist
(),
res
]
for
box
,
res
in
zip
(
dt_boxes
,
rec_res
)]
return
[[
box
.
tolist
(),
res
]
for
box
,
res
in
zip
(
dt_boxes
,
rec_res
)]
elif
det
and
not
rec
:
elif
det
and
not
rec
:
dt_boxes
,
elapse
=
self
.
text_detector
(
img
)
dt_boxes
,
elapse
=
self
.
text_detector
(
img
)
...
@@ -520,14 +541,20 @@ class PPStructure(StructureSystem):
...
@@ -520,14 +541,20 @@ class PPStructure(StructureSystem):
params
.
rec_model_dir
,
params
.
rec_model_dir
,
os
.
path
.
join
(
BASE_DIR
,
'whl'
,
'rec'
,
lang
),
rec_model_config
[
'url'
])
os
.
path
.
join
(
BASE_DIR
,
'whl'
,
'rec'
,
lang
),
rec_model_config
[
'url'
])
table_model_config
=
get_model_config
(
table_model_config
=
get_model_config
(
'STRUCTURE'
,
params
.
structure_version
,
'table'
,
'
en
'
)
'STRUCTURE'
,
params
.
structure_version
,
'table'
,
'
ch
'
)
params
.
table_model_dir
,
table_url
=
confirm_model_dir_url
(
params
.
table_model_dir
,
table_url
=
confirm_model_dir_url
(
params
.
table_model_dir
,
params
.
table_model_dir
,
os
.
path
.
join
(
BASE_DIR
,
'whl'
,
'table'
),
table_model_config
[
'url'
])
os
.
path
.
join
(
BASE_DIR
,
'whl'
,
'table'
),
table_model_config
[
'url'
])
layout_model_config
=
get_model_config
(
'STRUCTURE'
,
params
.
structure_version
,
'layout'
,
'ch'
)
params
.
layout_model_dir
,
layout_url
=
confirm_model_dir_url
(
params
.
layout_model_dir
,
os
.
path
.
join
(
BASE_DIR
,
'whl'
,
'layout'
),
layout_model_config
[
'url'
])
# download model
# download model
maybe_download
(
params
.
det_model_dir
,
det_url
)
maybe_download
(
params
.
det_model_dir
,
det_url
)
maybe_download
(
params
.
rec_model_dir
,
rec_url
)
maybe_download
(
params
.
rec_model_dir
,
rec_url
)
maybe_download
(
params
.
table_model_dir
,
table_url
)
maybe_download
(
params
.
table_model_dir
,
table_url
)
maybe_download
(
params
.
layout_model_dir
,
layout_url
)
if
params
.
rec_char_dict_path
is
None
:
if
params
.
rec_char_dict_path
is
None
:
params
.
rec_char_dict_path
=
str
(
params
.
rec_char_dict_path
=
str
(
...
@@ -535,6 +562,9 @@ class PPStructure(StructureSystem):
...
@@ -535,6 +562,9 @@ class PPStructure(StructureSystem):
if
params
.
table_char_dict_path
is
None
:
if
params
.
table_char_dict_path
is
None
:
params
.
table_char_dict_path
=
str
(
params
.
table_char_dict_path
=
str
(
Path
(
__file__
).
parent
/
table_model_config
[
'dict_path'
])
Path
(
__file__
).
parent
/
table_model_config
[
'dict_path'
])
if
params
.
layout_dict_path
is
None
:
params
.
layout_dict_path
=
str
(
Path
(
__file__
).
parent
/
layout_model_config
[
'dict_path'
])
logger
.
debug
(
params
)
logger
.
debug
(
params
)
super
().
__init__
(
params
)
super
().
__init__
(
params
)
...
@@ -557,7 +587,7 @@ class PPStructure(StructureSystem):
...
@@ -557,7 +587,7 @@ class PPStructure(StructureSystem):
if
isinstance
(
img
,
np
.
ndarray
)
and
len
(
img
.
shape
)
==
2
:
if
isinstance
(
img
,
np
.
ndarray
)
and
len
(
img
.
shape
)
==
2
:
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_GRAY2BGR
)
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_GRAY2BGR
)
res
=
super
().
__call__
(
img
,
return_ocr_result_in_table
)
res
,
_
=
super
().
__call__
(
img
,
return_ocr_result_in_table
)
return
res
return
res
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录