提交 37cd4ed3 编写于 作者: A andyjpaddle

delete dict90, update sar postprocess, update sar.yml

上级 ea019601
......@@ -15,7 +15,7 @@ Global:
infer_img:
# for data or label process
character_dict_path: ppocr/utils/dict90.txt
character_type: ch
character_type: EN_symbol
max_text_length: 30
infer_mode: False
use_space_char: False
......@@ -47,6 +47,7 @@ Loss:
PostProcess:
name: SARLabelDecode
rm_symbol: True
Metric:
name: RecMetric
......@@ -56,8 +57,8 @@ Train:
dataset:
name: SimpleDataSet
delimiter: ' '
label_file_list: ['/paddle/data/concat_data/train_list.txt']
data_dir: /paddle/data/concat_data/
label_file_list: ['./train_data/train_list.txt']
data_dir: ./train_data/
ratio_list: 1.0
transforms:
- DecodeImage: # load image
......@@ -79,7 +80,7 @@ Train:
Eval:
dataset:
name: LMDBDataSet
data_dir: /paddle/data/ocr_data/evaluation/
data_dir: ./eval_data/evaluation/
transforms:
- DecodeImage: # load image
img_mode: BGR
......
......@@ -526,6 +526,7 @@ class SARLabelDecode(BaseRecLabelDecode):
character_dict_path=None,
character_type='ch',
use_space_char=False,
rm_symbol=True,
**kwargs):
super(SARLabelDecode, self).__init__(character_dict_path,
character_type, use_space_char)
......@@ -572,9 +573,10 @@ class SARLabelDecode(BaseRecLabelDecode):
else:
conf_list.append(1)
text = ''.join(char_list)
comp = re.compile('[^A-Z^a-z^0-9^\u4e00-\u9fa5]')
text = text.lower()
text = comp.sub('', text)
if self.rm_symbol:
comp = re.compile('[^A-Z^a-z^0-9^\u4e00-\u9fa5]')
text = text.lower()
text = comp.sub('', text)
result_list.append((text, np.mean(conf_list)))
return result_list
......
0
1
2
3
4
5
6
7
8
9
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
!
"
#
$
%
&
'
(
)
*
+
,
-
.
/
:
;
<
=
>
?
@
[
\
]
_
`
~
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册