Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
weixin_41840029
PaddleOCR
提交
68dd6afa
P
PaddleOCR
项目概览
weixin_41840029
/
PaddleOCR
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleOCR
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleOCR
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
68dd6afa
编写于
3月 15, 2021
作者:
T
tink2123
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
rename language abbreviations
上级
c7be8856
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
90 addition
and
42 deletion
+90
-42
configs/rec/multi_language/generate_multi_language_configs.py
...igs/rec/multi_language/generate_multi_language_configs.py
+89
-41
ppocr/data/imaug/label_ops.py
ppocr/data/imaug/label_ops.py
+1
-1
ppocr/utils/dict/es_dict.txt
ppocr/utils/dict/es_dict.txt
+0
-0
ppocr/utils/dict/pt_dict.txt
ppocr/utils/dict/pt_dict.txt
+0
-0
未找到文件。
configs/rec/multi_language/generate_multi_language_configs.py
浏览文件 @
68dd6afa
...
...
@@ -19,21 +19,38 @@ import logging
logging
.
basicConfig
(
level
=
logging
.
INFO
)
support_list
=
{
'it'
:
'italian'
,
'xi'
:
'spanish'
,
'pu'
:
'portuguese'
,
'ru'
:
'russian'
,
'ar'
:
'arabic'
,
'ta'
:
'tamil'
,
'ug'
:
'uyghur'
,
'fa'
:
'persian'
,
'ur'
:
'urdu'
,
'rs'
:
'serbian latin'
,
'oc'
:
'occitan'
,
'rsc'
:
'serbian cyrillic'
,
'bg'
:
'bulgarian'
,
'uk'
:
'ukranian'
,
'be'
:
'belarusian'
,
'te'
:
'telugu'
,
'ka'
:
'kannada'
,
'chinese_cht'
:
'chinese tradition'
,
'hi'
:
'hindi'
,
'mr'
:
'marathi'
,
'ne'
:
'nepali'
,
'it'
:
'italian'
,
'es'
:
'spanish'
,
'pt'
:
'portuguese'
,
'ru'
:
'russian'
,
'ar'
:
'arabic'
,
'ta'
:
'tamil'
,
'ug'
:
'uyghur'
,
'fa'
:
'persian'
,
'ur'
:
'urdu'
,
'rs'
:
'serbian latin'
,
'oc'
:
'occitan'
,
'rsc'
:
'serbian cyrillic'
,
'bg'
:
'bulgarian'
,
'uk'
:
'ukranian'
,
'be'
:
'belarusian'
,
'te'
:
'telugu'
,
'ka'
:
'kannada'
,
'chinese_cht'
:
'chinese tradition'
,
'hi'
:
'hindi'
,
'mr'
:
'marathi'
,
'ne'
:
'nepali'
,
}
assert
(
os
.
path
.
isfile
(
"./rec_multi_language_lite_train.yml"
)
),
"Loss basic configuration file rec_multi_language_lite_train.yml.
\
assert
(
os
.
path
.
isfile
(
"./rec_multi_language_lite_train.yml"
)
),
"Loss basic configuration file rec_multi_language_lite_train.yml.
\
You can download it from
\
https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/configs/rec/multi_language/"
global_config
=
yaml
.
load
(
open
(
"./rec_multi_language_lite_train.yml"
,
'rb'
),
Loader
=
yaml
.
Loader
)
global_config
=
yaml
.
load
(
open
(
"./rec_multi_language_lite_train.yml"
,
'rb'
),
Loader
=
yaml
.
Loader
)
project_path
=
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
getcwd
(),
"../../../"
))
class
ArgsParser
(
ArgumentParser
):
def
__init__
(
self
):
super
(
ArgsParser
,
self
).
__init__
(
...
...
@@ -41,15 +58,30 @@ class ArgsParser(ArgumentParser):
self
.
add_argument
(
"-o"
,
"--opt"
,
nargs
=
'+'
,
help
=
"set configuration options"
)
self
.
add_argument
(
"-l"
,
"--language"
,
nargs
=
'+'
,
help
=
"set language type, support {}"
.
format
(
support_list
))
"-l"
,
"--language"
,
nargs
=
'+'
,
help
=
"set language type, support {}"
.
format
(
support_list
))
self
.
add_argument
(
"--train"
,
type
=
str
,
help
=
"you can use this command to change the train dataset default path"
)
"--train"
,
type
=
str
,
help
=
"you can use this command to change the train dataset default path"
)
self
.
add_argument
(
"--val"
,
type
=
str
,
help
=
"you can use this command to change the eval dataset default path"
)
"--val"
,
type
=
str
,
help
=
"you can use this command to change the eval dataset default path"
)
self
.
add_argument
(
"--dict"
,
type
=
str
,
help
=
"you can use this command to change the dictionary default path"
)
"--dict"
,
type
=
str
,
help
=
"you can use this command to change the dictionary default path"
)
self
.
add_argument
(
"--data_dir"
,
type
=
str
,
help
=
"you can use this command to change the dataset default root path"
)
"--data_dir"
,
type
=
str
,
help
=
"you can use this command to change the dataset default root path"
)
def
parse_args
(
self
,
argv
=
None
):
args
=
super
(
ArgsParser
,
self
).
parse_args
(
argv
)
...
...
@@ -68,20 +100,28 @@ class ArgsParser(ArgumentParser):
return
config
def
_set_language
(
self
,
type
):
assert
(
type
),
"please use -l or --language to choose language type"
assert
(
type
),
"please use -l or --language to choose language type"
assert
(
type
[
0
]
in
support_list
.
keys
()
),
"the sub_keys(-l or --language) can only be one of support list:
\n
{},
\n
but get: {}, "
\
"please check your running command"
.
format
(
support_list
,
type
)
global_config
[
'Global'
][
'character_dict_path'
]
=
'ppocr/utils/dict/{}_dict.txt'
.
format
(
type
[
0
])
global_config
[
'Global'
][
'save_model_dir'
]
=
'./output/rec_{}_lite'
.
format
(
type
[
0
])
global_config
[
'Train'
][
'dataset'
][
'label_file_list'
]
=
[
"train_data/{}_train.txt"
.
format
(
type
[
0
])]
global_config
[
'Eval'
][
'dataset'
][
'label_file_list'
]
=
[
"train_data/{}_val.txt"
.
format
(
type
[
0
])]
global_config
[
'Global'
][
'character_dict_path'
]
=
'ppocr/utils/dict/{}_dict.txt'
.
format
(
type
[
0
])
global_config
[
'Global'
][
'save_model_dir'
]
=
'./output/rec_{}_lite'
.
format
(
type
[
0
])
global_config
[
'Train'
][
'dataset'
][
'label_file_list'
]
=
[
"train_data/{}_train.txt"
.
format
(
type
[
0
])]
global_config
[
'Eval'
][
'dataset'
][
'label_file_list'
]
=
[
"train_data/{}_val.txt"
.
format
(
type
[
0
])]
global_config
[
'Global'
][
'character_type'
]
=
type
[
0
]
assert
(
os
.
path
.
isfile
(
os
.
path
.
join
(
project_path
,
global_config
[
'Global'
][
'character_dict_path'
]))
),
"Loss default dictionary file {}_dict.txt.You can download it from
\
https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/ppocr/utils/dict/"
.
format
(
type
[
0
])
assert
(
os
.
path
.
isfile
(
os
.
path
.
join
(
project_path
,
global_config
[
'Global'
][
'character_dict_path'
]))
),
"Loss default dictionary file {}_dict.txt.You can download it from
\
https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/ppocr/utils/dict/"
.
format
(
type
[
0
])
return
type
[
0
]
...
...
@@ -110,43 +150,51 @@ def merge_config(config):
cur
[
sub_key
]
=
value
else
:
cur
=
cur
[
sub_key
]
def
loss_file
(
path
):
assert
(
os
.
path
.
exists
(
path
)
),
"There is no such file:{},Please do not forget to put in the specified file"
.
format
(
path
)
assert
(
os
.
path
.
exists
(
path
)
),
"There is no such file:{},Please do not forget to put in the specified file"
.
format
(
path
)
if
__name__
==
'__main__'
:
FLAGS
=
ArgsParser
().
parse_args
()
merge_config
(
FLAGS
.
opt
)
save_file_path
=
'rec_{}_lite_train.yml'
.
format
(
FLAGS
.
language
)
if
os
.
path
.
isfile
(
save_file_path
):
os
.
remove
(
save_file_path
)
if
FLAGS
.
train
:
global_config
[
'Train'
][
'dataset'
][
'label_file_list'
]
=
[
FLAGS
.
train
]
train_label_path
=
os
.
path
.
join
(
project_path
,
FLAGS
.
train
)
train_label_path
=
os
.
path
.
join
(
project_path
,
FLAGS
.
train
)
loss_file
(
train_label_path
)
if
FLAGS
.
val
:
global_config
[
'Eval'
][
'dataset'
][
'label_file_list'
]
=
[
FLAGS
.
val
]
eval_label_path
=
os
.
path
.
join
(
project_path
,
FLAGS
.
val
)
eval_label_path
=
os
.
path
.
join
(
project_path
,
FLAGS
.
val
)
loss_file
(
eval_label_path
)
if
FLAGS
.
dict
:
global_config
[
'Global'
][
'character_dict_path'
]
=
FLAGS
.
dict
dict_path
=
os
.
path
.
join
(
project_path
,
FLAGS
.
dict
)
dict_path
=
os
.
path
.
join
(
project_path
,
FLAGS
.
dict
)
loss_file
(
dict_path
)
if
FLAGS
.
data_dir
:
global_config
[
'Eval'
][
'dataset'
][
'data_dir'
]
=
FLAGS
.
data_dir
global_config
[
'Train'
][
'dataset'
][
'data_dir'
]
=
FLAGS
.
data_dir
data_dir
=
os
.
path
.
join
(
project_path
,
FLAGS
.
data_dir
)
data_dir
=
os
.
path
.
join
(
project_path
,
FLAGS
.
data_dir
)
loss_file
(
data_dir
)
with
open
(
save_file_path
,
'w'
)
as
f
:
yaml
.
dump
(
dict
(
global_config
),
f
,
default_flow_style
=
False
,
sort_keys
=
False
)
yaml
.
dump
(
dict
(
global_config
),
f
,
default_flow_style
=
False
,
sort_keys
=
False
)
logging
.
info
(
"Project path is :{}"
.
format
(
project_path
))
logging
.
info
(
"Train list path set to :{}"
.
format
(
global_config
[
'Train'
][
'dataset'
][
'label_file_list'
][
0
]))
logging
.
info
(
"Eval list path set to :{}"
.
format
(
global_config
[
'Eval'
][
'dataset'
][
'label_file_list'
][
0
]))
logging
.
info
(
"Dataset root path set to :{}"
.
format
(
global_config
[
'Eval'
][
'dataset'
][
'data_dir'
]))
logging
.
info
(
"Dict path set to :{}"
.
format
(
global_config
[
'Global'
][
'character_dict_path'
]))
logging
.
info
(
"Config file set to :configs/rec/multi_language/{}"
.
format
(
save_file_path
))
logging
.
info
(
"Train list path set to :{}"
.
format
(
global_config
[
'Train'
][
'dataset'
][
'label_file_list'
][
0
]))
logging
.
info
(
"Eval list path set to :{}"
.
format
(
global_config
[
'Eval'
][
'dataset'
][
'label_file_list'
][
0
]))
logging
.
info
(
"Dataset root path set to :{}"
.
format
(
global_config
[
'Eval'
][
'dataset'
][
'data_dir'
]))
logging
.
info
(
"Dict path set to :{}"
.
format
(
global_config
[
'Global'
][
'character_dict_path'
]))
logging
.
info
(
"Config file set to :configs/rec/multi_language/{}"
.
format
(
save_file_path
))
ppocr/data/imaug/label_ops.py
浏览文件 @
68dd6afa
...
...
@@ -94,7 +94,7 @@ class BaseRecLabelEncode(object):
use_space_char
=
False
):
support_character_type
=
[
'ch'
,
'en'
,
'EN_symbol'
,
'french'
,
'german'
,
'japan'
,
'korean'
,
'EN'
,
'it'
,
'
xi'
,
'pu
'
,
'ru'
,
'ar'
,
'ta'
,
'ug'
,
'fa'
,
'ur'
,
'rs'
,
'EN'
,
'it'
,
'
es'
,
'pt
'
,
'ru'
,
'ar'
,
'ta'
,
'ug'
,
'fa'
,
'ur'
,
'rs'
,
'oc'
,
'rsc'
,
'bg'
,
'uk'
,
'be'
,
'te'
,
'ka'
,
'chinese_cht'
,
'hi'
,
'mr'
,
'ne'
]
...
...
ppocr/utils/dict/
xi
_dict.txt
→
ppocr/utils/dict/
es
_dict.txt
浏览文件 @
68dd6afa
文件已移动
ppocr/utils/dict/p
u
_dict.txt
→
ppocr/utils/dict/p
t
_dict.txt
浏览文件 @
68dd6afa
文件已移动
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录