Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleOCR
提交
3c912550
P
PaddleOCR
项目概览
PaddlePaddle
/
PaddleOCR
大约 1 年 前同步成功
通知
1528
Star
32962
Fork
6643
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
108
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleOCR
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
108
Issue
108
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
3c912550
编写于
9月 06, 2021
作者:
文幕地方
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add 2.1 models to paddleocr whl
上级
c1ed243f
变更
1
显示空白变更内容
内联
并排
Showing
1 changed file
with
191 addition
and
110 deletion
+191
-110
paddleocr.py
paddleocr.py
+191
-110
未找到文件。
paddleocr.py
浏览文件 @
3c912550
...
...
@@ -33,15 +33,47 @@ from tools.infer.utility import draw_ocr, str2bool
from
ppstructure.utility
import
init_args
,
draw_structure_result
from
ppstructure.predict_system
import
OCRSystem
,
save_structure_res
__all__
=
[
'PaddleOCR'
,
'PPStructure'
,
'draw_ocr'
,
'draw_structure_result'
,
'save_structure_res'
,
'download_with_progressbar'
]
__all__
=
[
'PaddleOCR'
,
'PPStructure'
,
'draw_ocr'
,
'draw_structure_result'
,
'save_structure_res'
,
'download_with_progressbar'
]
model_urls
=
{
SUPPORT_DET_MODEL
=
[
'DB'
]
VERSION
=
'2.2.1'
SUPPORT_REC_MODEL
=
[
'CRNN'
]
BASE_DIR
=
os
.
path
.
expanduser
(
"~/.paddleocr/"
)
DEFAULT_MODEL_VERSION
=
'2.0'
MODEL_URLS
=
{
'2.1'
:
{
'det'
:
{
'ch'
:
{
'url'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.1/chinese/ch_ppocr_mobile_v2.1_det_infer.tar'
,
},
},
'rec'
:
{
'ch'
:
{
'url'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.1/chinese/ch_ppocr_mobile_v2.1_rec_infer.tar'
,
'dict_path'
:
'./ppocr/utils/ppocr_keys_v1.txt'
}
}
},
'2.0'
:
{
'det'
:
{
'ch'
:
'ch'
:
{
'url'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar'
,
'en'
:
},
'en'
:
{
'url'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_ppocr_mobile_v2.0_det_infer.tar'
,
'structure'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar'
},
'structure'
:
{
'url'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar'
}
},
'rec'
:
{
'ch'
:
{
...
...
@@ -115,22 +147,27 @@ model_urls = {
'dict_path'
:
'./ppocr/utils/dict/devanagari_dict.txt'
},
'structure'
:
{
'url'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar'
,
'url'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar'
,
'dict_path'
:
'ppocr/utils/dict/table_dict.txt'
}
},
'cls'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar'
,
'cls'
:
{
'ch'
:
{
'url'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar'
,
}
},
'table'
:
{
'url'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar'
,
'en'
:
{
'url'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar'
,
'dict_path'
:
'ppocr/utils/dict/table_structure_dict.txt'
}
}
}
}
SUPPORT_DET_MODEL
=
[
'DB'
]
VERSION
=
'2.2.0.1'
SUPPORT_REC_MODEL
=
[
'CRNN'
]
BASE_DIR
=
os
.
path
.
expanduser
(
"~/.paddleocr/"
)
def
parse_args
(
mMain
=
True
):
import
argparse
...
...
@@ -140,6 +177,7 @@ def parse_args(mMain=True):
parser
.
add_argument
(
"--det"
,
type
=
str2bool
,
default
=
True
)
parser
.
add_argument
(
"--rec"
,
type
=
str2bool
,
default
=
True
)
parser
.
add_argument
(
"--type"
,
type
=
str
,
default
=
'ocr'
)
parser
.
add_argument
(
"--version"
,
type
=
str
,
default
=
'2.1'
)
for
action
in
parser
.
_actions
:
if
action
.
dest
in
[
'rec_char_dict_path'
,
'table_char_dict_path'
]:
...
...
@@ -155,19 +193,19 @@ def parse_args(mMain=True):
def
parse_lang
(
lang
):
latin_lang
=
[
'af'
,
'az'
,
'bs'
,
'cs'
,
'cy'
,
'da'
,
'de'
,
'es'
,
'et'
,
'fr'
,
'ga'
,
'h
r'
,
'hu'
,
'id'
,
'is'
,
'it'
,
'ku'
,
'la'
,
'lt'
,
'lv'
,
'mi'
,
'ms
'
,
'
mt'
,
'nl'
,
'no'
,
'oc'
,
'pi'
,
'pl'
,
'pt'
,
'ro'
,
'rs_latin'
,
'sk
'
,
's
l'
,
'sq'
,
'sv'
,
's
w'
,
'tl'
,
'tr'
,
'uz'
,
'vi'
'af'
,
'az'
,
'bs'
,
'cs'
,
'cy'
,
'da'
,
'de'
,
'es'
,
'et'
,
'fr'
,
'ga'
,
'hr'
,
'h
u'
,
'id'
,
'is'
,
'it'
,
'ku'
,
'la'
,
'lt'
,
'lv'
,
'mi'
,
'ms'
,
'mt'
,
'nl
'
,
'
no'
,
'oc'
,
'pi'
,
'pl'
,
'pt'
,
'ro'
,
'rs_latin'
,
'sk'
,
'sl'
,
'sq'
,
'sv
'
,
'sw'
,
'tl'
,
'tr'
,
'uz'
,
'vi'
]
arabic_lang
=
[
'ar'
,
'fa'
,
'ug'
,
'ur'
]
cyrillic_lang
=
[
'ru'
,
'rs_cyrillic'
,
'be'
,
'bg'
,
'uk'
,
'mn'
,
'abq'
,
'ady'
,
'kbd'
,
'
ava'
,
'
dar'
,
'inh'
,
'che'
,
'lbe'
,
'lez'
,
'tab'
'ru'
,
'rs_cyrillic'
,
'be'
,
'bg'
,
'uk'
,
'mn'
,
'abq'
,
'ady'
,
'kbd'
,
'ava'
,
'dar'
,
'inh'
,
'che'
,
'lbe'
,
'lez'
,
'tab'
]
devanagari_lang
=
[
'hi'
,
'mr'
,
'ne'
,
'bh'
,
'mai'
,
'ang'
,
'bho'
,
'mah'
,
'sck'
,
'new'
,
'
gom'
,
'
sa'
,
'bgc'
'hi'
,
'mr'
,
'ne'
,
'bh'
,
'mai'
,
'ang'
,
'bho'
,
'mah'
,
'sck'
,
'new'
,
'gom'
,
'sa'
,
'bgc'
]
if
lang
in
latin_lang
:
lang
=
"latin"
...
...
@@ -177,9 +215,9 @@ def parse_lang(lang):
lang
=
"cyrillic"
elif
lang
in
devanagari_lang
:
lang
=
"devanagari"
assert
lang
in
model_urls
[
assert
lang
in
MODEL_URLS
[
DEFAULT_MODEL_VERSION
]
[
'rec'
],
'param lang must in {}, but got {}'
.
format
(
model_urls
[
'rec'
].
keys
(),
lang
)
MODEL_URLS
[
DEFAULT_MODEL_VERSION
]
[
'rec'
].
keys
(),
lang
)
if
lang
==
"ch"
:
det_lang
=
"ch"
elif
lang
==
'structure'
:
...
...
@@ -189,6 +227,35 @@ def parse_lang(lang):
return
lang
,
det_lang
def
get_model_config
(
version
,
model_type
,
lang
):
if
version
not
in
MODEL_URLS
:
logger
.
warning
(
'version {} not in {}, use version {} instead'
.
format
(
version
,
MODEL_URLS
.
keys
(),
DEFAULT_MODEL_VERSION
))
version
=
DEFAULT_MODEL_VERSION
if
model_type
not
in
MODEL_URLS
[
version
]:
if
model_type
in
MODEL_URLS
[
DEFAULT_MODEL_VERSION
]:
logger
.
warning
(
'version {} not support {} models, use version {} instead'
.
format
(
version
,
model_type
,
DEFAULT_MODEL_VERSION
))
version
=
DEFAULT_MODEL_VERSION
else
:
logger
.
error
(
'{} models is not support, we only support {}'
.
format
(
model_type
,
MODEL_URLS
[
DEFAULT_MODEL_VERSION
].
keys
()))
sys
.
exit
(
-
1
)
if
lang
not
in
MODEL_URLS
[
version
][
model_type
]:
if
lang
in
MODEL_URLS
[
DEFAULT_MODEL_VERSION
][
model_type
]:
logger
.
warning
(
'lang {} is not support in {}, use {} instead'
.
format
(
lang
,
version
,
DEFAULT_MODEL_VERSION
))
version
=
DEFAULT_MODEL_VERSION
else
:
logger
.
error
(
'lang {} is not support, we only support {} for {} models'
.
format
(
lang
,
MODEL_URLS
[
DEFAULT_MODEL_VERSION
][
model_type
].
keys
(
),
model_type
))
sys
.
exit
(
-
1
)
return
MODEL_URLS
[
version
][
model_type
][
lang
]
class
PaddleOCR
(
predict_system
.
TextSystem
):
def
__init__
(
self
,
**
kwargs
):
"""
...
...
@@ -204,15 +271,21 @@ class PaddleOCR(predict_system.TextSystem):
lang
,
det_lang
=
parse_lang
(
params
.
lang
)
# init model dir
params
.
det_model_dir
,
det_url
=
confirm_model_dir_url
(
params
.
det_model_dir
,
det_model_config
=
get_model_config
(
params
.
version
,
'det'
,
det_lang
)
params
.
det_model_dir
,
det_url
=
confirm_model_dir_url
(
params
.
det_model_dir
,
os
.
path
.
join
(
BASE_DIR
,
VERSION
,
'ocr'
,
'det'
,
det_lang
),
model_urls
[
'det'
][
det_lang
])
params
.
rec_model_dir
,
rec_url
=
confirm_model_dir_url
(
params
.
rec_model_dir
,
det_model_config
[
'url'
])
rec_model_config
=
get_model_config
(
params
.
version
,
'rec'
,
lang
)
params
.
rec_model_dir
,
rec_url
=
confirm_model_dir_url
(
params
.
rec_model_dir
,
os
.
path
.
join
(
BASE_DIR
,
VERSION
,
'ocr'
,
'rec'
,
lang
),
model_urls
[
'rec'
][
lang
][
'url'
])
params
.
cls_model_dir
,
cls_url
=
confirm_model_dir_url
(
params
.
cls_model_dir
,
rec_model_config
[
'url'
])
cls_model_config
=
get_model_config
(
params
.
version
,
'cls'
,
'ch'
)
params
.
cls_model_dir
,
cls_url
=
confirm_model_dir_url
(
params
.
cls_model_dir
,
os
.
path
.
join
(
BASE_DIR
,
VERSION
,
'ocr'
,
'cls'
),
model_urls
[
'cls
'
])
cls_model_config
[
'url
'
])
# download model
maybe_download
(
params
.
det_model_dir
,
det_url
)
maybe_download
(
params
.
rec_model_dir
,
rec_url
)
...
...
@@ -226,7 +299,8 @@ class PaddleOCR(predict_system.TextSystem):
sys
.
exit
(
0
)
if
params
.
rec_char_dict_path
is
None
:
params
.
rec_char_dict_path
=
str
(
Path
(
__file__
).
parent
/
model_urls
[
'rec'
][
lang
][
'dict_path'
])
params
.
rec_char_dict_path
=
str
(
Path
(
__file__
).
parent
/
rec_model_config
[
'dict_path'
])
print
(
params
)
# init det_model and rec_model
...
...
@@ -293,24 +367,32 @@ class PPStructure(OCRSystem):
lang
,
det_lang
=
parse_lang
(
params
.
lang
)
# init model dir
params
.
det_model_dir
,
det_url
=
confirm_model_dir_url
(
params
.
det_model_dir
,
det_model_config
=
get_model_config
(
params
.
version
,
'det'
,
det_lang
)
params
.
det_model_dir
,
det_url
=
confirm_model_dir_url
(
params
.
det_model_dir
,
os
.
path
.
join
(
BASE_DIR
,
VERSION
,
'ocr'
,
'det'
,
det_lang
),
model_urls
[
'det'
][
det_lang
])
params
.
rec_model_dir
,
rec_url
=
confirm_model_dir_url
(
params
.
rec_model_dir
,
det_model_config
[
'url'
])
rec_model_config
=
get_model_config
(
params
.
version
,
'rec'
,
lang
)
params
.
rec_model_dir
,
rec_url
=
confirm_model_dir_url
(
params
.
rec_model_dir
,
os
.
path
.
join
(
BASE_DIR
,
VERSION
,
'ocr'
,
'rec'
,
lang
),
model_urls
[
'rec'
][
lang
][
'url'
])
params
.
table_model_dir
,
table_url
=
confirm_model_dir_url
(
params
.
table_model_dir
,
rec_model_config
[
'url'
])
table_model_config
=
get_model_config
(
params
.
version
,
'table'
,
'en'
)
params
.
table_model_dir
,
table_url
=
confirm_model_dir_url
(
params
.
table_model_dir
,
os
.
path
.
join
(
BASE_DIR
,
VERSION
,
'ocr'
,
'table'
),
model_urls
[
'table'
]
[
'url'
])
table_model_config
[
'url'
])
# download model
maybe_download
(
params
.
det_model_dir
,
det_url
)
maybe_download
(
params
.
rec_model_dir
,
rec_url
)
maybe_download
(
params
.
table_model_dir
,
table_url
)
if
params
.
rec_char_dict_path
is
None
:
params
.
rec_char_dict_path
=
str
(
Path
(
__file__
).
parent
/
model_urls
[
'rec'
][
lang
][
'dict_path'
])
params
.
rec_char_dict_path
=
str
(
Path
(
__file__
).
parent
/
rec_model_config
[
'dict_path'
])
if
params
.
table_char_dict_path
is
None
:
params
.
table_char_dict_path
=
str
(
Path
(
__file__
).
parent
/
model_urls
[
'table'
][
'dict_path'
])
params
.
table_char_dict_path
=
str
(
Path
(
__file__
).
parent
/
table_model_config
[
'dict_path'
])
print
(
params
)
super
().
__init__
(
params
)
...
...
@@ -374,4 +456,3 @@ def main():
for
item
in
result
:
item
.
pop
(
'img'
)
logger
.
info
(
item
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录