Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
weixin_41840029
PaddleOCR
提交
a5f75115
P
PaddleOCR
项目概览
weixin_41840029
/
PaddleOCR
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleOCR
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleOCR
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a5f75115
编写于
6月 05, 2021
作者:
W
WenmuZhou
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
mv download func to ppocr/utils/network.py
上级
20466055
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
106 addition
and
86 deletion
+106
-86
paddleocr.py
paddleocr.py
+40
-86
ppocr/utils/network.py
ppocr/utils/network.py
+66
-0
未找到文件。
paddleocr.py
浏览文件 @
a5f75115
...
@@ -21,15 +21,13 @@ sys.path.append(os.path.join(__dir__, ''))
...
@@ -21,15 +21,13 @@ sys.path.append(os.path.join(__dir__, ''))
import
cv2
import
cv2
import
numpy
as
np
import
numpy
as
np
from
pathlib
import
Path
from
pathlib
import
Path
import
tarfile
import
requests
from
tqdm
import
tqdm
from
tools.infer
import
predict_system
from
tools.infer
import
predict_system
from
ppocr.utils.logging
import
get_logger
from
ppocr.utils.logging
import
get_logger
logger
=
get_logger
()
logger
=
get_logger
()
from
ppocr.utils.utility
import
check_and_read_gif
,
get_image_file_list
from
ppocr.utils.utility
import
check_and_read_gif
,
get_image_file_list
from
ppocr.utils.network
import
maybe_download
,
download_with_progressbar
from
tools.infer.utility
import
draw_ocr
,
init_args
,
str2bool
from
tools.infer.utility
import
draw_ocr
,
init_args
,
str2bool
__all__
=
[
'PaddleOCR'
]
__all__
=
[
'PaddleOCR'
]
...
@@ -37,84 +35,84 @@ __all__ = ['PaddleOCR']
...
@@ -37,84 +35,84 @@ __all__ = ['PaddleOCR']
model_urls
=
{
model_urls
=
{
'det'
:
{
'det'
:
{
'ch'
:
'ch'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar'
,
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar'
,
'en'
:
'en'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_ppocr_mobile_v2.0_det_infer.tar'
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_ppocr_mobile_v2.0_det_infer.tar'
},
},
'rec'
:
{
'rec'
:
{
'ch'
:
{
'ch'
:
{
'url'
:
'url'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar'
,
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar'
,
'dict_path'
:
'./ppocr/utils/ppocr_keys_v1.txt'
'dict_path'
:
'./ppocr/utils/ppocr_keys_v1.txt'
},
},
'en'
:
{
'en'
:
{
'url'
:
'url'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar'
,
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar'
,
'dict_path'
:
'./ppocr/utils/en_dict.txt'
'dict_path'
:
'./ppocr/utils/en_dict.txt'
},
},
'french'
:
{
'french'
:
{
'url'
:
'url'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tar'
,
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tar'
,
'dict_path'
:
'./ppocr/utils/dict/french_dict.txt'
'dict_path'
:
'./ppocr/utils/dict/french_dict.txt'
},
},
'german'
:
{
'german'
:
{
'url'
:
'url'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar'
,
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar'
,
'dict_path'
:
'./ppocr/utils/dict/german_dict.txt'
'dict_path'
:
'./ppocr/utils/dict/german_dict.txt'
},
},
'korean'
:
{
'korean'
:
{
'url'
:
'url'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar'
,
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar'
,
'dict_path'
:
'./ppocr/utils/dict/korean_dict.txt'
'dict_path'
:
'./ppocr/utils/dict/korean_dict.txt'
},
},
'japan'
:
{
'japan'
:
{
'url'
:
'url'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar'
,
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar'
,
'dict_path'
:
'./ppocr/utils/dict/japan_dict.txt'
'dict_path'
:
'./ppocr/utils/dict/japan_dict.txt'
},
},
'chinese_cht'
:
{
'chinese_cht'
:
{
'url'
:
'url'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar'
,
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar'
,
'dict_path'
:
'./ppocr/utils/dict/chinese_cht_dict.txt'
'dict_path'
:
'./ppocr/utils/dict/chinese_cht_dict.txt'
},
},
'ta'
:
{
'ta'
:
{
'url'
:
'url'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar'
,
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar'
,
'dict_path'
:
'./ppocr/utils/dict/ta_dict.txt'
'dict_path'
:
'./ppocr/utils/dict/ta_dict.txt'
},
},
'te'
:
{
'te'
:
{
'url'
:
'url'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar'
,
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar'
,
'dict_path'
:
'./ppocr/utils/dict/te_dict.txt'
'dict_path'
:
'./ppocr/utils/dict/te_dict.txt'
},
},
'ka'
:
{
'ka'
:
{
'url'
:
'url'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar'
,
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar'
,
'dict_path'
:
'./ppocr/utils/dict/ka_dict.txt'
'dict_path'
:
'./ppocr/utils/dict/ka_dict.txt'
},
},
'latin'
:
{
'latin'
:
{
'url'
:
'url'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tar'
,
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tar'
,
'dict_path'
:
'./ppocr/utils/dict/latin_dict.txt'
'dict_path'
:
'./ppocr/utils/dict/latin_dict.txt'
},
},
'arabic'
:
{
'arabic'
:
{
'url'
:
'url'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tar'
,
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tar'
,
'dict_path'
:
'./ppocr/utils/dict/arabic_dict.txt'
'dict_path'
:
'./ppocr/utils/dict/arabic_dict.txt'
},
},
'cyrillic'
:
{
'cyrillic'
:
{
'url'
:
'url'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar'
,
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar'
,
'dict_path'
:
'./ppocr/utils/dict/cyrillic_dict.txt'
'dict_path'
:
'./ppocr/utils/dict/cyrillic_dict.txt'
},
},
'devanagari'
:
{
'devanagari'
:
{
'url'
:
'url'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar'
,
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar'
,
'dict_path'
:
'./ppocr/utils/dict/devanagari_dict.txt'
'dict_path'
:
'./ppocr/utils/dict/devanagari_dict.txt'
}
}
},
},
'cls'
:
'cls'
:
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar'
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar'
}
}
SUPPORT_DET_MODEL
=
[
'DB'
]
SUPPORT_DET_MODEL
=
[
'DB'
]
...
@@ -123,50 +121,6 @@ SUPPORT_REC_MODEL = ['CRNN']
...
@@ -123,50 +121,6 @@ SUPPORT_REC_MODEL = ['CRNN']
BASE_DIR
=
os
.
path
.
expanduser
(
"~/.paddleocr/"
)
BASE_DIR
=
os
.
path
.
expanduser
(
"~/.paddleocr/"
)
def
download_with_progressbar
(
url
,
save_path
):
response
=
requests
.
get
(
url
,
stream
=
True
)
total_size_in_bytes
=
int
(
response
.
headers
.
get
(
'content-length'
,
0
))
block_size
=
1024
# 1 Kibibyte
progress_bar
=
tqdm
(
total
=
total_size_in_bytes
,
unit
=
'iB'
,
unit_scale
=
True
)
with
open
(
save_path
,
'wb'
)
as
file
:
for
data
in
response
.
iter_content
(
block_size
):
progress_bar
.
update
(
len
(
data
))
file
.
write
(
data
)
progress_bar
.
close
()
if
total_size_in_bytes
==
0
or
progress_bar
.
n
!=
total_size_in_bytes
:
logger
.
error
(
"Something went wrong while downloading models"
)
sys
.
exit
(
0
)
def
maybe_download
(
model_storage_directory
,
url
):
# using custom model
tar_file_name_list
=
[
'inference.pdiparams'
,
'inference.pdiparams.info'
,
'inference.pdmodel'
]
if
not
os
.
path
.
exists
(
os
.
path
.
join
(
model_storage_directory
,
'inference.pdiparams'
)
)
or
not
os
.
path
.
exists
(
os
.
path
.
join
(
model_storage_directory
,
'inference.pdmodel'
)):
tmp_path
=
os
.
path
.
join
(
model_storage_directory
,
url
.
split
(
'/'
)[
-
1
])
print
(
'download {} to {}'
.
format
(
url
,
tmp_path
))
os
.
makedirs
(
model_storage_directory
,
exist_ok
=
True
)
download_with_progressbar
(
url
,
tmp_path
)
with
tarfile
.
open
(
tmp_path
,
'r'
)
as
tarObj
:
for
member
in
tarObj
.
getmembers
():
filename
=
None
for
tar_file_name
in
tar_file_name_list
:
if
tar_file_name
in
member
.
name
:
filename
=
tar_file_name
if
filename
is
None
:
continue
file
=
tarObj
.
extractfile
(
member
)
with
open
(
os
.
path
.
join
(
model_storage_directory
,
filename
),
'wb'
)
as
f
:
f
.
write
(
file
.
read
())
os
.
remove
(
tmp_path
)
def
parse_args
(
mMain
=
True
):
def
parse_args
(
mMain
=
True
):
import
argparse
import
argparse
parser
=
init_args
()
parser
=
init_args
()
...
@@ -194,10 +148,10 @@ class PaddleOCR(predict_system.TextSystem):
...
@@ -194,10 +148,10 @@ class PaddleOCR(predict_system.TextSystem):
args:
args:
**kwargs: other params show in paddleocr --help
**kwargs: other params show in paddleocr --help
"""
"""
p
ostprocess_p
arams
=
parse_args
(
mMain
=
False
)
params
=
parse_args
(
mMain
=
False
)
p
ostprocess_p
arams
.
__dict__
.
update
(
**
kwargs
)
params
.
__dict__
.
update
(
**
kwargs
)
self
.
use_angle_cls
=
p
ostprocess_p
arams
.
use_angle_cls
self
.
use_angle_cls
=
params
.
use_angle_cls
lang
=
p
ostprocess_p
arams
.
lang
lang
=
params
.
lang
latin_lang
=
[
latin_lang
=
[
'af'
,
'az'
,
'bs'
,
'cs'
,
'cy'
,
'da'
,
'de'
,
'es'
,
'et'
,
'fr'
,
'ga'
,
'af'
,
'az'
,
'bs'
,
'cs'
,
'cy'
,
'da'
,
'de'
,
'es'
,
'et'
,
'fr'
,
'ga'
,
'hr'
,
'hu'
,
'id'
,
'is'
,
'it'
,
'ku'
,
'la'
,
'lt'
,
'lv'
,
'mi'
,
'ms'
,
'hr'
,
'hu'
,
'id'
,
'is'
,
'it'
,
'ku'
,
'la'
,
'lt'
,
'lv'
,
'mi'
,
'ms'
,
...
@@ -223,46 +177,46 @@ class PaddleOCR(predict_system.TextSystem):
...
@@ -223,46 +177,46 @@ class PaddleOCR(predict_system.TextSystem):
lang
=
"devanagari"
lang
=
"devanagari"
assert
lang
in
model_urls
[
assert
lang
in
model_urls
[
'rec'
],
'param lang must in {}, but got {}'
.
format
(
'rec'
],
'param lang must in {}, but got {}'
.
format
(
model_urls
[
'rec'
].
keys
(),
lang
)
model_urls
[
'rec'
].
keys
(),
lang
)
if
lang
==
"ch"
:
if
lang
==
"ch"
:
det_lang
=
"ch"
det_lang
=
"ch"
else
:
else
:
det_lang
=
"en"
det_lang
=
"en"
use_inner_dict
=
False
use_inner_dict
=
False
if
p
ostprocess_p
arams
.
rec_char_dict_path
is
None
:
if
params
.
rec_char_dict_path
is
None
:
use_inner_dict
=
True
use_inner_dict
=
True
p
ostprocess_p
arams
.
rec_char_dict_path
=
model_urls
[
'rec'
][
lang
][
params
.
rec_char_dict_path
=
model_urls
[
'rec'
][
lang
][
'dict_path'
]
'dict_path'
]
# init model dir
# init model dir
if
p
ostprocess_p
arams
.
det_model_dir
is
None
:
if
params
.
det_model_dir
is
None
:
p
ostprocess_p
arams
.
det_model_dir
=
os
.
path
.
join
(
BASE_DIR
,
VERSION
,
params
.
det_model_dir
=
os
.
path
.
join
(
BASE_DIR
,
VERSION
,
'det'
,
det_lang
)
'det'
,
det_lang
)
if
p
ostprocess_p
arams
.
rec_model_dir
is
None
:
if
params
.
rec_model_dir
is
None
:
p
ostprocess_p
arams
.
rec_model_dir
=
os
.
path
.
join
(
BASE_DIR
,
VERSION
,
params
.
rec_model_dir
=
os
.
path
.
join
(
BASE_DIR
,
VERSION
,
'rec'
,
lang
)
'rec'
,
lang
)
if
postprocess_params
.
cls_model_dir
is
None
:
if
params
.
cls_model_dir
is
None
:
postprocess_params
.
cls_model_dir
=
os
.
path
.
join
(
BASE_DIR
,
'cls'
)
params
.
cls_model_dir
=
os
.
path
.
join
(
BASE_DIR
,
'cls'
)
print
(
postprocess_params
)
# download model
# download model
maybe_download
(
p
ostprocess_p
arams
.
det_model_dir
,
maybe_download
(
params
.
det_model_dir
,
model_urls
[
'det'
][
det_lang
])
model_urls
[
'det'
][
det_lang
])
maybe_download
(
p
ostprocess_p
arams
.
rec_model_dir
,
maybe_download
(
params
.
rec_model_dir
,
model_urls
[
'rec'
][
lang
][
'url'
])
model_urls
[
'rec'
][
lang
][
'url'
])
maybe_download
(
p
ostprocess_p
arams
.
cls_model_dir
,
model_urls
[
'cls'
])
maybe_download
(
params
.
cls_model_dir
,
model_urls
[
'cls'
])
if
p
ostprocess_p
arams
.
det_algorithm
not
in
SUPPORT_DET_MODEL
:
if
params
.
det_algorithm
not
in
SUPPORT_DET_MODEL
:
logger
.
error
(
'det_algorithm must in {}'
.
format
(
SUPPORT_DET_MODEL
))
logger
.
error
(
'det_algorithm must in {}'
.
format
(
SUPPORT_DET_MODEL
))
sys
.
exit
(
0
)
sys
.
exit
(
0
)
if
p
ostprocess_p
arams
.
rec_algorithm
not
in
SUPPORT_REC_MODEL
:
if
params
.
rec_algorithm
not
in
SUPPORT_REC_MODEL
:
logger
.
error
(
'rec_algorithm must in {}'
.
format
(
SUPPORT_REC_MODEL
))
logger
.
error
(
'rec_algorithm must in {}'
.
format
(
SUPPORT_REC_MODEL
))
sys
.
exit
(
0
)
sys
.
exit
(
0
)
if
use_inner_dict
:
if
use_inner_dict
:
p
ostprocess_p
arams
.
rec_char_dict_path
=
str
(
params
.
rec_char_dict_path
=
str
(
Path
(
__file__
).
parent
/
p
ostprocess_p
arams
.
rec_char_dict_path
)
Path
(
__file__
).
parent
/
params
.
rec_char_dict_path
)
print
(
params
)
# init det_model and rec_model
# init det_model and rec_model
super
().
__init__
(
p
ostprocess_p
arams
)
super
().
__init__
(
params
)
def
ocr
(
self
,
img
,
det
=
True
,
rec
=
True
,
cls
=
True
):
def
ocr
(
self
,
img
,
det
=
True
,
rec
=
True
,
cls
=
True
):
"""
"""
...
...
ppocr/utils/network.py
0 → 100644
浏览文件 @
a5f75115
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
sys
import
tarfile
import
requests
from
tqdm
import
tqdm
from
ppocr.utils.logging
import
get_logger
def
download_with_progressbar
(
url
,
save_path
):
logger
=
get_logger
()
response
=
requests
.
get
(
url
,
stream
=
True
)
total_size_in_bytes
=
int
(
response
.
headers
.
get
(
'content-length'
,
0
))
block_size
=
1024
# 1 Kibibyte
progress_bar
=
tqdm
(
total
=
total_size_in_bytes
,
unit
=
'iB'
,
unit_scale
=
True
)
with
open
(
save_path
,
'wb'
)
as
file
:
for
data
in
response
.
iter_content
(
block_size
):
progress_bar
.
update
(
len
(
data
))
file
.
write
(
data
)
progress_bar
.
close
()
if
total_size_in_bytes
==
0
or
progress_bar
.
n
!=
total_size_in_bytes
:
logger
.
error
(
"Something went wrong while downloading models"
)
sys
.
exit
(
0
)
def
maybe_download
(
model_storage_directory
,
url
):
# using custom model
tar_file_name_list
=
[
'inference.pdiparams'
,
'inference.pdiparams.info'
,
'inference.pdmodel'
]
if
not
os
.
path
.
exists
(
os
.
path
.
join
(
model_storage_directory
,
'inference.pdiparams'
)
)
or
not
os
.
path
.
exists
(
os
.
path
.
join
(
model_storage_directory
,
'inference.pdmodel'
)):
tmp_path
=
os
.
path
.
join
(
model_storage_directory
,
url
.
split
(
'/'
)[
-
1
])
print
(
'download {} to {}'
.
format
(
url
,
tmp_path
))
os
.
makedirs
(
model_storage_directory
,
exist_ok
=
True
)
download_with_progressbar
(
url
,
tmp_path
)
with
tarfile
.
open
(
tmp_path
,
'r'
)
as
tarObj
:
for
member
in
tarObj
.
getmembers
():
filename
=
None
for
tar_file_name
in
tar_file_name_list
:
if
tar_file_name
in
member
.
name
:
filename
=
tar_file_name
if
filename
is
None
:
continue
file
=
tarObj
.
extractfile
(
member
)
with
open
(
os
.
path
.
join
(
model_storage_directory
,
filename
),
'wb'
)
as
f
:
f
.
write
(
file
.
read
())
os
.
remove
(
tmp_path
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录