未验证 提交 21292bb3 编写于 作者: X xiaoting 提交者: GitHub

Merge pull request #1767 from xmy0916/dygraph

add multi language config file imgs and dict
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import yaml
from argparse import ArgumentParser, RawDescriptionHelpFormatter
import os.path
import logging
logging.basicConfig(level=logging.INFO)
support_list = {
'it':'italian', 'xi':'spanish', 'pu':'portuguese', 'ru':'russian', 'ar':'arabic',
'ta':'tamil', 'ug':'uyghur', 'fa':'persian', 'ur':'urdu', 'rs':'serbian latin',
'oc':'occitan', 'rsc':'serbian cyrillic', 'bg':'bulgarian', 'uk':'ukranian', 'be':'belarusian',
'te':'telugu', 'ka':'kannada', 'chinese_cht':'chinese tradition','hi':'hindi','mr':'marathi',
'ne':'nepali',
}
assert(
os.path.isfile("./rec_multi_language_lite_train.yml")
),"Loss basic configuration file rec_multi_language_lite_train.yml.\
You can download it from \
https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/configs/rec/multi_language/"
global_config = yaml.load(open("./rec_multi_language_lite_train.yml", 'rb'), Loader=yaml.Loader)
project_path = os.path.abspath(os.path.join(os.getcwd(), "../../../"))
class ArgsParser(ArgumentParser):
def __init__(self):
super(ArgsParser, self).__init__(
formatter_class=RawDescriptionHelpFormatter)
self.add_argument(
"-o", "--opt", nargs='+', help="set configuration options")
self.add_argument(
"-l", "--language", nargs='+', help="set language type, support {}".format(support_list))
self.add_argument(
"--train",type=str,help="you can use this command to change the train dataset default path")
self.add_argument(
"--val",type=str,help="you can use this command to change the eval dataset default path")
self.add_argument(
"--dict",type=str,help="you can use this command to change the dictionary default path")
self.add_argument(
"--data_dir",type=str,help="you can use this command to change the dataset default root path")
def parse_args(self, argv=None):
args = super(ArgsParser, self).parse_args(argv)
args.opt = self._parse_opt(args.opt)
args.language = self._set_language(args.language)
return args
def _parse_opt(self, opts):
config = {}
if not opts:
return config
for s in opts:
s = s.strip()
k, v = s.split('=')
config[k] = yaml.load(v, Loader=yaml.Loader)
return config
def _set_language(self, type):
assert(type),"please use -l or --language to choose language type"
assert(
type[0] in support_list.keys()
),"the sub_keys(-l or --language) can only be one of support list: \n{},\nbut get: {}, " \
"please check your running command".format(support_list, type)
global_config['Global']['character_dict_path'] = 'ppocr/utils/dict/{}_dict.txt'.format(type[0])
global_config['Global']['save_model_dir'] = './output/rec_{}_lite'.format(type[0])
global_config['Train']['dataset']['label_file_list'] = ["train_data/{}_train.txt".format(type[0])]
global_config['Eval']['dataset']['label_file_list'] = ["train_data/{}_val.txt".format(type[0])]
global_config['Global']['character_type'] = type[0]
assert(
os.path.isfile(os.path.join(project_path,global_config['Global']['character_dict_path']))
),"Loss default dictionary file {}_dict.txt.You can download it from \
https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/ppocr/utils/dict/".format(type[0])
return type[0]
def merge_config(config):
"""
Merge config into global config.
Args:
config (dict): Config to be merged.
Returns: global config
"""
for key, value in config.items():
if "." not in key:
if isinstance(value, dict) and key in global_config:
global_config[key].update(value)
else:
global_config[key] = value
else:
sub_keys = key.split('.')
assert (
sub_keys[0] in global_config
), "the sub_keys can only be one of global_config: {}, but get: {}, please check your running command".format(
global_config.keys(), sub_keys[0])
cur = global_config[sub_keys[0]]
for idx, sub_key in enumerate(sub_keys[1:]):
if idx == len(sub_keys) - 2:
cur[sub_key] = value
else:
cur = cur[sub_key]
def loss_file(path):
assert(
os.path.exists(path)
),"There is no such file:{},Please do not forget to put in the specified file".format(path)
if __name__ == '__main__':
FLAGS = ArgsParser().parse_args()
merge_config(FLAGS.opt)
save_file_path = 'rec_{}_lite_train.yml'.format(FLAGS.language)
if os.path.isfile(save_file_path):
os.remove(save_file_path)
if FLAGS.train:
global_config['Train']['dataset']['label_file_list'] = [FLAGS.train]
train_label_path = os.path.join(project_path,FLAGS.train)
loss_file(train_label_path)
if FLAGS.val:
global_config['Eval']['dataset']['label_file_list'] = [FLAGS.val]
eval_label_path = os.path.join(project_path,FLAGS.val)
loss_file(Eval_label_path)
if FLAGS.dict:
global_config['Global']['character_dict_path'] = FLAGS.dict
dict_path = os.path.join(project_path,FLAGS.dict)
loss_file(dict_path)
if FLAGS.data_dir:
global_config['Eval']['dataset']['data_dir'] = FLAGS.data_dir
global_config['Train']['dataset']['data_dir'] = FLAGS.data_dir
data_dir = os.path.join(project_path,FLAGS.data_dir)
loss_file(data_dir)
with open(save_file_path, 'w') as f:
yaml.dump(dict(global_config), f, default_flow_style=False, sort_keys=False)
logging.info("Project path is :{}".format(project_path))
logging.info("Train list path set to :{}".format(global_config['Train']['dataset']['label_file_list'][0]))
logging.info("Eval list path set to :{}".format(global_config['Eval']['dataset']['label_file_list'][0]))
logging.info("Dataset root path set to :{}".format(global_config['Eval']['dataset']['data_dir']))
logging.info("Dict path set to :{}".format(global_config['Global']['character_dict_path']))
logging.info("Config file set to :configs/rec/multi_language/{}".format(save_file_path))
Global:
use_gpu: True
epoch_num: 500
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec_multi_language_lite
save_epoch_step: 3
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step: [0, 2000]
# if pretrained_model is saved in static mode, load_static_weights must set to True
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img:
# for data or label process
character_dict_path:
# Set the language of training, if set, select the default dictionary file
character_type:
max_text_length: 25
infer_mode: False
use_space_char: True
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
name: Cosine
learning_rate: 0.001
regularizer:
name: 'L2'
factor: 0.00001
Architecture:
model_type: rec
algorithm: CRNN
Transform:
Backbone:
name: MobileNetV3
scale: 0.5
model_name: small
small_stride: [1, 2, 2, 2]
Neck:
name: SequenceEncoder
encoder_type: rnn
hidden_size: 48
Head:
name: CTCHead
fc_decay: 0.00001
Loss:
name: CTCLoss
PostProcess:
name: CTCLabelDecode
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: SimpleDataSet
data_dir: train_data/
label_file_list: ["./train_data/train_list.txt"]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- RecAug:
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 320]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: True
batch_size_per_card: 256
drop_last: True
num_workers: 8
Eval:
dataset:
name: SimpleDataSet
data_dir: train_data/
label_file_list: ["./train_data/val_list.txt"]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 320]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 256
num_workers: 8
...@@ -60,6 +60,7 @@ PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训 ...@@ -60,6 +60,7 @@ PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训
| japan_mobile_v2.0_rec |日文识别|[rec_japan_lite_train.yml](../../configs/rec/multi_language/rec_japan_lite_train.yml)|4.23M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_train.tar) | | japan_mobile_v2.0_rec |日文识别|[rec_japan_lite_train.yml](../../configs/rec/multi_language/rec_japan_lite_train.yml)|4.23M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_train.tar) |
<a name="文本方向分类模型"></a> <a name="文本方向分类模型"></a>
### 三、文本方向分类模型 ### 三、文本方向分类模型
......
...@@ -24,7 +24,9 @@ class BaseRecLabelDecode(object): ...@@ -24,7 +24,9 @@ class BaseRecLabelDecode(object):
character_type='ch', character_type='ch',
use_space_char=False): use_space_char=False):
support_character_type = [ support_character_type = [
'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean' 'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean', 'it',
'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'oc', 'rsc', 'bg',
'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'mr', 'ne'
] ]
assert character_type in support_character_type, "Only {} are supported now but get {}".format( assert character_type in support_character_type, "Only {} are supported now but get {}".format(
support_character_type, character_type) support_character_type, character_type)
......
a
r
b
i
c
_
m
g
/
1
0
I
L
S
V
R
C
2
v
l
6
3
9
.
j
p
ا
ل
م
ر
ج
و
ح
ي
ة
5
8
7
أ
ب
ض
4
ك
س
ه
ث
ن
ط
ع
ت
غ
خ
ف
ئ
ز
إ
د
ص
ظ
ذ
ش
ى
ق
ؤ
آ
ء
s
e
n
w
t
u
z
d
A
N
G
h
o
E
T
H
O
B
y
F
U
J
X
W
P
Z
M
k
q
Y
Q
D
f
K
x
'
%
-
#
@
!
&
$
,
:
é
?
+
É
(
b
e
_
i
m
g
/
2
0
I
L
S
V
R
C
1
v
a
l
6
9
4
3
.
j
p
п
а
з
б
у
г
н
ц
ь
8
м
л
і
о
ў
ы
7
5
М
х
с
р
ф
я
е
д
ж
ю
ч
й
к
Д
в
Б
т
І
ш
ё
э
К
Л
Н
А
Ж
Г
В
П
З
Е
О
Р
С
У
Ё
Й
Т
Ч
Э
Ц
Ю
Ш
Ф
Х
Я
Ь
Ы
Ў
s
c
n
w
M
o
t
T
E
A
B
u
h
y
k
r
H
d
Y
O
U
F
f
x
D
G
N
K
P
z
J
X
W
Z
Q
%
-
q
@
'
!
#
&
,
:
$
(
?
é
+
É
!
#
$
%
&
'
(
+
,
-
.
/
0
1
2
3
4
5
6
7
8
9
:
?
@
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
_
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
É
é
А
Б
В
Г
Д
Е
Ж
З
И
Й
К
Л
М
Н
О
П
Р
С
Т
У
Ф
Х
Ц
Ч
Ш
Щ
Ъ
Ю
Я
а
б
в
г
д
е
ж
з
и
й
к
л
м
н
о
п
р
с
т
у
ф
х
ц
ч
ш
щ
ъ
ь
ю
я
此差异已折叠。
f
a
_
i
m
g
/
1
3
I
L
S
V
R
C
2
0
v
l
6
8
5
.
j
p
و
د
ر
ك
ن
ش
ه
ا
4
9
ی
ج
ِ
7
غ
ل
س
ز
ّ
ت
ک
گ
ي
م
ب
ف
چ
خ
ق
ژ
آ
ص
پ
َ
ع
ئ
ح
ٔ
ض
ُ
ذ
أ
ى
ط
ظ
ث
ة
ً
ء
ؤ
ْ
ۀ
إ
ٍ
ٌ
ٰ
ٓ
ٱ
s
c
e
n
w
N
E
W
Y
D
O
H
A
d
z
r
T
G
o
t
x
h
b
B
M
Z
u
P
F
y
q
U
K
k
J
Q
'
X
#
?
%
$
,
:
&
!
-
(
É
@
é
+
! !
" "
#
$ $
% %
& &
...@@ -83,45 +85,59 @@ w ...@@ -83,45 +85,59 @@ w
x x
y y
z z
¡
¢
£ £
¤
¥
¦
§ §
¨
©
ª
«
¬
­ ­
®
¯
° °
±
²
³
´ ´
µ µ
· ·
¸
¹
º º
»
¼
½
¿ ¿
 Á
à Ä
Å Å
Ê É
Î Ï
Ð Ô
Ö
Ü
ß
à
á á
â â
ã
ä
å å
æ æ
ç
è
é é
ê
ë
í
ï
ñ
ò
ó
ô
ö
ø
ù
ú
û
ü
ō
Š
Ÿ
ʒ
β
δ
з
©
ª
«
¬
!
#
$
%
&
'
(
+
,
-
.
/
0
1
2
3
4
5
6
7
8
9
:
?
@
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
_
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
É
é
ि
i
t
_
m
g
/
5
I
L
S
V
R
C
2
0
1
v
a
l
7
8
9
6
.
j
p
e
r
o
d
s
n
3
4
P
u
c
A
-
,
"
z
h
f
b
q
ì
'
à
O
è
G
ù
é
ò
;
F
E
B
N
H
k
:
U
T
X
D
K
?
[
M
­
x
y
(
)
W
ö
º
w
]
Q
J
+
ü
!
È
á
%
=
»
ñ
Ö
Y
ä
í
Z
«
@
ó
ø
ï
ú
ê
ç
Á
É
Å
ß
{
}
&
`
û
î
#
$
k
a
_
i
m
g
/
1
2
I
L
S
V
R
C
0
v
l
6
4
8
.
j
p
ಿ
7
5
3
9
-
,
s
c
e
n
w
o
u
t
d
E
A
T
B
Z
N
G
O
q
z
r
x
P
K
M
J
U
D
f
F
h
b
W
Y
y
H
X
Q
'
#
&
!
@
$
:
%
é
É
(
?
+
!
#
$
%
&
'
(
+
,
-
.
/
0
1
2
3
4
5
6
7
8
9
:
?
@
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
_
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
É
é
ि
!
#
$
%
&
'
(
+
,
-
.
/
0
1
2
3
4
5
6
7
8
9
:
?
@
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
_
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
É
é
ि
o
c
_
i
m
g
/
2
0
I
L
S
V
R
C
1
v
a
l
4
3
.
j
p
r
e
è
t
9
7
5
8
n
'
b
s
6
q
u
á
d
ò
à
h
z
f
ï
í
A
ç
x
ó
é
P
O
Ò
ü
k
À
F
-
ú
­
æ
Á
D
E
w
K
T
N
y
U
Z
G
B
J
H
M
W
Y
X
Q
%
$
,
@
&
!
:
(
#
?
+
É
p
u
_
i
m
g
/
8
I
L
S
V
R
C
2
0
1
v
a
l
6
7
4
5
.
j
q
e
s
t
ã
o
x
9
c
n
r
z
ç
õ
3
A
U
d
º
ô
­
,
E
;
ó
á
b
D
?
ú
ê
-
h
P
f
à
N
í
O
M
G
É
é
â
F
:
T
Á
"
Q
)
W
J
B
H
(
ö
%
Ö
«
w
K
y
!
k
]
'
Z
+
Ç
Õ
Y
À
X
µ
»
ª
Í
ü
ä
´
è
ñ
ß
ï
Ú
ë
Ô
Ï
Ó
[
Ì
<
Â
ò
§
³
ø
å
#
$
&
@
r
s
_
i
m
g
/
1
I
L
S
V
R
C
2
0
v
a
l
7
5
8
6
.
j
p
t
d
9
3
e
š
4
k
u
ć
c
n
đ
o
z
č
b
ž
f
Z
T
h
M
F
O
Š
B
H
A
E
Đ
Ž
D
P
G
Č
K
U
N
J
Ć
w
y
W
x
Y
X
q
Q
#
&
$
,
-
%
'
@
!
:
?
(
É
é
+
r
s
c
_
i
m
g
/
5
I
L
S
V
R
C
2
0
1
v
a
l
9
7
8
.
j
p
м
а
с
и
р
ћ
е
ш
3
4
о
г
н
з
в
л
6
т
ж
у
к
п
њ
д
ч
С
ј
ф
ц
љ
х
О
И
А
б
Ш
К
ђ
џ
М
В
З
Д
Р
У
Н
Т
Б
?
П
Х
Ј
Ц
Г
Љ
Л
Ф
e
n
w
E
F
A
N
f
o
b
M
G
t
y
W
k
P
u
H
B
T
z
h
O
Y
d
U
K
D
x
X
J
Z
Q
q
'
-
@
é
#
!
,
%
$
:
&
+
(
É
к
в
а
з
и
у
р
о
н
я
х
п
л
ы
г
е
т
м
д
ж
ш
ь
с
ё
б
й
ч
ю
ц
щ
М
э
ф
А
ъ
С
Ф
Ю
В
К
Т
Н
О
Э
У
И
Г
Л
Р
Д
Б
Ш
П
З
Х
Е
Ж
Я
Ц
Ч
Й
Щ
0
1
2
3
4
5
6
7
8
9
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
t
a
_
i
m
g
/
3
I
L
S
V
R
C
2
0
1
v
l
9
7
8
.
j
p
ி
6
5
4
s
c
e
n
w
F
T
O
P
K
A
N
G
Y
E
M
H
U
B
o
b
D
d
r
W
u
y
f
X
k
q
h
J
z
Z
Q
x
-
'
$
,
%
@
é
!
#
+
É
&
:
(
?
t
e
_
i
m
g
/
5
I
L
S
V
R
C
2
0
1
v
a
l
3
4
8
9
.
j
p
ి
7
6
'
[
;
-
,
|
?
:
"
(
!
+
)
*
=
&
]
£
$
s
c
n
w
k
J
G
u
d
r
E
o
h
y
b
f
B
M
O
T
N
D
P
A
F
x
W
Y
U
H
K
X
z
Z
Q
q
É
%
#
@
é
u
g
_
i
m
/
1
I
L
S
V
R
C
2
0
v
a
l
8
5
3
6
9
.
j
p
ق
ا
پ
ل
4
7
ئ
ى
ش
ت
ي
ك
د
ف
ر
و
ن
ب
ە
خ
ې
چ
ۇ
ز
س
م
ۋ
گ
ڭ
ۆ
ۈ
ج
غ
ھ
ژ
s
c
e
n
w
P
E
D
U
d
r
b
y
B
o
O
Y
N
T
k
t
h
A
H
F
z
W
K
G
M
f
Z
X
Q
J
x
q
-
!
%
#
?
:
$
,
&
'
É
@
é
(
+
u
k
_
i
m
g
/
1
6
I
L
S
V
R
C
2
0
v
a
l
7
9
.
j
p
в
і
д
п
о
н
с
т
ю
4
5
3
а
и
м
е
р
ч
у
Б
з
л
к
8
А
В
г
є
б
ь
х
ґ
ш
ц
ф
я
щ
ж
Г
Х
У
Т
Е
І
Н
П
З
Л
Ю
С
Д
М
К
Р
Ф
О
Ц
И
Я
Ч
Ш
Ж
Є
Ґ
Ь
s
c
e
n
w
A
P
r
E
t
o
h
d
y
M
G
N
F
B
T
D
U
O
W
Z
f
H
Y
b
K
z
x
Q
X
q
J
$
-
'
#
&
%
?
:
!
,
+
@
(
é
É
u
r
_
i
m
g
/
3
I
L
S
V
R
C
2
0
1
v
a
l
9
7
8
.
j
p
چ
ٹ
پ
ا
ئ
ی
ے
4
6
و
ل
ن
ڈ
ھ
ک
ت
ش
ف
ق
ر
د
5
ب
ج
خ
ہ
س
ز
غ
ڑ
ں
آ
م
ؤ
ط
ص
ح
ع
گ
ث
ض
ذ
ۓ
ِ
ء
ظ
ً
ي
ُ
ۃ
أ
ٰ
ە
ژ
ۂ
ة
ّ
ك
ه
s
c
e
n
w
o
d
t
D
M
T
U
E
b
P
h
y
W
H
A
x
B
O
N
G
Y
Q
F
k
K
q
J
Z
f
z
X
'
@
&
!
,
:
$
-
#
?
%
é
+
(
É
x
i
_
m
g
/
1
0
I
L
S
V
R
C
2
v
a
l
3
6
4
5
.
j
p
Q
u
e
r
o
8
7
n
c
9
t
b
é
q
d
ó
y
F
s
,
O
í
T
f
"
U
M
h
:
P
H
A
E
D
z
N
á
ñ
ú
%
;
è
+
Y
-
B
G
(
)
¿
?
w
¡
!
X
É
K
k
Á
ü
Ú
«
»
J
'
ö
W
Z
º
Ö
­
[
]
Ç
ç
à
ä
û
ò
Í
ê
ô
ø
ª
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册