Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleOCR
提交
46ac85ad
P
PaddleOCR
项目概览
PaddlePaddle
/
PaddleOCR
大约 1 年 前同步成功
通知
1528
Star
32962
Fork
6643
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
108
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleOCR
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
108
Issue
108
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
46ac85ad
编写于
1月 19, 2021
作者:
只会git clone的程序员
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix some problems
上级
141d50d6
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
80 addition
and
58 deletion
+80
-58
configs/rec/multi_language/generate_multi_language_configs.py
...igs/rec/multi_language/generate_multi_language_configs.py
+36
-30
ppocr/utils/dict/german_dict.txt
ppocr/utils/dict/german_dict.txt
+44
-28
未找到文件。
configs/rec/multi_language/generate_multi_language_configs.py
浏览文件 @
46ac85ad
import
yaml
from
argparse
import
ArgumentParser
,
RawDescriptionHelpFormatter
import
os.path
support_list
=
{
'it'
:
'italian'
,
'xi'
:
'spanish'
,
'pu'
:
'portuguese'
,
'ru'
:
'russian'
,
'ar'
:
'arabic'
,
...
...
@@ -8,6 +9,12 @@ support_list = {
'te'
:
'telugu'
,
'ka'
:
'kannada'
,
'chinese_cht'
:
'chinese tradition'
,
'hi'
:
'hindi'
,
'mr'
:
'marathi'
,
'ne'
:
'nepali'
,
}
assert
(
os
.
path
.
isfile
(
"./rec_multi_language_lite_train.yml"
)
),
"Loss basic configuration file rec_multi_language_lite_train.yml.
\
You can download it from
\
https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/configs/rec/multi_language/"
global_config
=
yaml
.
load
(
open
(
"./rec_multi_language_lite_train.yml"
,
'rb'
),
Loader
=
yaml
.
Loader
)
class
ArgsParser
(
ArgumentParser
):
...
...
@@ -17,18 +24,20 @@ class ArgsParser(ArgumentParser):
self
.
add_argument
(
"-o"
,
"--opt"
,
nargs
=
'+'
,
help
=
"set configuration options"
)
self
.
add_argument
(
"-
t"
,
"--typ
e"
,
nargs
=
'+'
,
help
=
"set language type, support {}"
.
format
(
support_list
))
"-
l"
,
"--languag
e"
,
nargs
=
'+'
,
help
=
"set language type, support {}"
.
format
(
support_list
))
self
.
add_argument
(
"--train"
,
type
=
str
,
help
=
"you can use this command to change the default path"
)
"--train"
,
type
=
str
,
help
=
"you can use this command to change the
train dataset
default path"
)
self
.
add_argument
(
"--val"
,
type
=
str
,
help
=
"you can use this command to change the default path"
)
"--val"
,
type
=
str
,
help
=
"you can use this command to change the
eval dataset
default path"
)
self
.
add_argument
(
"--dict"
,
type
=
str
,
help
=
"you can use this command to change the default path"
)
"--dict"
,
type
=
str
,
help
=
"you can use this command to change the dictionary default path"
)
self
.
add_argument
(
"--dataset_root_path"
,
type
=
str
,
help
=
"you can use this command to change the dataset default root path"
)
def
parse_args
(
self
,
argv
=
None
):
args
=
super
(
ArgsParser
,
self
).
parse_args
(
argv
)
args
.
opt
=
self
.
_parse_opt
(
args
.
opt
)
args
.
type
,
args
.
config
=
self
.
_set_language
(
args
.
typ
e
)
args
.
language
=
self
.
_set_language
(
args
.
languag
e
)
return
args
def
_parse_opt
(
self
,
opts
):
...
...
@@ -42,33 +51,26 @@ class ArgsParser(ArgumentParser):
return
config
def
_set_language
(
self
,
type
):
config
=
{
'Global'
:{
'character_dict_path'
:
None
,
'save_model_dir'
:
None
},
'Train'
:{
'dataset'
:{
'label_file_list'
:
None
}},
'Eval'
:
{
'dataset'
:
{
'label_file_list'
:
None
}},
}
assert
(
type
),
"please use -t or --type to choose language type"
assert
(
type
[
0
]
in
support_list
.
keys
()
),
"the sub_keys(-t or --type) can only be one of support list:
\n
{},
\n
but get: {}, "
\
"please check your running command"
.
format
(
support_list
,
type
)
config
[
'Global'
][
'character_dict_path'
]
=
'ppocr/utils/dict/{}_dict.txt'
.
format
(
type
[
0
])
config
[
'Global'
][
'save_model_dir'
]
=
'./output/rec_{}_lite'
.
format
(
type
[
0
])
config
[
'Train'
][
'dataset'
][
'label_file_list'
]
=
[
"./
train_data/{}_train.txt"
.
format
(
type
[
0
])]
config
[
'Eval'
][
'dataset'
][
'label_file_list'
]
=
[
"./
train_data/{}_val.txt"
.
format
(
type
[
0
])]
return
type
[
0
]
,
config
global_
config
[
'Global'
][
'character_dict_path'
]
=
'ppocr/utils/dict/{}_dict.txt'
.
format
(
type
[
0
])
global_
config
[
'Global'
][
'save_model_dir'
]
=
'./output/rec_{}_lite'
.
format
(
type
[
0
])
global_config
[
'Train'
][
'dataset'
][
'label_file_list'
]
=
[
"
train_data/{}_train.txt"
.
format
(
type
[
0
])]
global_config
[
'Eval'
][
'dataset'
][
'label_file_list'
]
=
[
"
train_data/{}_val.txt"
.
format
(
type
[
0
])]
return
type
[
0
]
def
merge_config
(
global_config
,
new_
config
):
def
merge_config
(
config
):
"""
Merge config into global config.
Args:
global_config: Source config
new_config (dict): Config to be merged.
config (dict): Config to be merged.
Returns: global config
"""
for
key
,
value
in
new_
config
.
items
():
for
key
,
value
in
config
.
items
():
if
"."
not
in
key
:
if
isinstance
(
value
,
dict
)
and
key
in
global_config
:
global_config
[
key
].
update
(
value
)
...
...
@@ -78,8 +80,7 @@ def merge_config(global_config,new_config):
sub_keys
=
key
.
split
(
'.'
)
assert
(
sub_keys
[
0
]
in
global_config
),
"the sub_keys can only be one of global_config: {}, but get: {}, "
\
"please check your running command"
.
format
(
),
"the sub_keys can only be one of global_config: {}, but get: {}, please check your running command"
.
format
(
global_config
.
keys
(),
sub_keys
[
0
])
cur
=
global_config
[
sub_keys
[
0
]]
for
idx
,
sub_key
in
enumerate
(
sub_keys
[
1
:]):
...
...
@@ -87,22 +88,27 @@ def merge_config(global_config,new_config):
cur
[
sub_key
]
=
value
else
:
cur
=
cur
[
sub_key
]
return
global_config
if
__name__
==
'__main__'
:
FLAGS
=
ArgsParser
().
parse_args
()
global_config
=
merge_config
(
global_config
,
FLAGS
.
opt
)
global_config
=
merge_config
(
global_config
,
FLAGS
.
config
)
merge_config
(
FLAGS
.
opt
)
if
FLAGS
.
train
:
global_config
[
'Train'
][
'dataset'
][
'label_file_list'
]
=
[
FLAGS
.
train
]
if
FLAGS
.
val
:
global_config
[
'Eval'
][
'dataset'
][
'label_file_list'
]
=
[
FLAGS
.
val
]
if
FLAGS
.
dict
:
global_config
[
'Global'
][
'character_dict_path'
]
=
FLAGS
.
dict
if
FLAGS
.
dataset_root_path
:
global_config
[
'Eval'
][
'dataset'
][
'data_dir'
]
=
FLAGS
.
dataset_root_path
global_config
[
'Train'
][
'dataset'
][
'data_dir'
]
=
FLAGS
.
dataset_root_path
print
(
"train list path set to:{}"
.
format
(
global_config
[
'Train'
][
'dataset'
][
'label_file_list'
][
0
])
)
print
(
"Eval list path set to :{}"
.
format
(
global_config
[
'Eval'
][
'dataset'
][
'label_file_list'
][
0
]))
print
(
"dict path set to :{}"
.
format
(
global_config
[
'Global'
][
'character_dict_path'
])
)
with
open
(
'rec_{}_lite_train.yml'
.
format
(
FLAGS
.
type
)
,
'w'
)
as
f
:
save_file_path
=
'rec_{}_lite_train.yml'
.
format
(
FLAGS
.
language
)
if
os
.
path
.
isfile
(
save_file_path
):
os
.
remove
(
save_file_path
)
with
open
(
save_file_path
,
'w'
)
as
f
:
yaml
.
dump
(
dict
(
global_config
),
f
,
default_flow_style
=
False
,
sort_keys
=
False
)
print
(
"config file set to :configs/rec/multi_language/rec_{}_lite_train.yml"
.
format
(
FLAGS
.
type
))
print
(
"Train list path set to :{}"
.
format
(
global_config
[
'Train'
][
'dataset'
][
'label_file_list'
][
0
]))
print
(
"Eval list path set to :{}"
.
format
(
global_config
[
'Eval'
][
'dataset'
][
'label_file_list'
][
0
]))
print
(
"Dataset root path set to :{}"
.
format
(
global_config
[
'Eval'
][
'dataset'
][
'data_dir'
]))
print
(
"Dict path set to :{}"
.
format
(
global_config
[
'Global'
][
'character_dict_path'
]))
print
(
"Config file set to :configs/rec/multi_language/{}"
.
format
(
save_file_path
))
ppocr/utils/dict/german_dict.txt
浏览文件 @
46ac85ad
!
"
#
$
%
&
...
...
@@ -72,7 +74,7 @@ l
m
n
o
p
p
q
r
s
...
...
@@ -83,45 +85,59 @@ w
x
y
z
¡
¢
£
¤
¥
¦
§
¨
©
ª
«
¬
®
¯
°
±
²
³
´
µ
¶
·
¸
¹
º
»
¼
½
¿
Â
Ã
Á
Ä
Å
Ê
Î
Ð
É
Ï
Ô
Ö
Ü
ß
à
á
â
ã
ä
å
æ
ç
è
é
ê
ë
í
ï
ñ
ò
ó
ô
ö
ø
ù
ú
û
ü
ō
Š
Ÿ
ʒ
β
δ
з
Ṡ
‘
€
店
旺
潮
酒
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录