Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
weixin_41840029
PaddleOCR
提交
bf4863c9
P
PaddleOCR
项目概览
weixin_41840029
/
PaddleOCR
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleOCR
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleOCR
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
bf4863c9
编写于
8月 15, 2020
作者:
T
tink2123
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update infer_rec for srn
上级
6832ca02
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
197 addition
and
161 deletion
+197
-161
ppocr/data/rec/dataset_traversal.py
ppocr/data/rec/dataset_traversal.py
+19
-22
ppocr/modeling/architectures/rec_model.py
ppocr/modeling/architectures/rec_model.py
+4
-1
ppocr/modeling/backbones/rec_resnet_vd.py
ppocr/modeling/backbones/rec_resnet_vd.py
+1
-1
ppocr/modeling/heads/rec_srn_all_head.py
ppocr/modeling/heads/rec_srn_all_head.py
+103
-91
ppocr/modeling/losses/rec_srn_loss.py
ppocr/modeling/losses/rec_srn_loss.py
+18
-21
ppocr/utils/character.py
ppocr/utils/character.py
+8
-17
tools/eval_utils/eval_rec_utils.py
tools/eval_utils/eval_rec_utils.py
+2
-2
tools/infer_rec.py
tools/infer_rec.py
+42
-6
未找到文件。
ppocr/data/rec/dataset_traversal.py
浏览文件 @
bf4863c9
...
...
@@ -40,10 +40,12 @@ class LMDBReader(object):
self
.
image_shape
=
params
[
'image_shape'
]
self
.
loss_type
=
params
[
'loss_type'
]
self
.
max_text_length
=
params
[
'max_text_length'
]
self
.
num_heads
=
params
[
'num_heads'
]
self
.
mode
=
params
[
'mode'
]
self
.
drop_last
=
False
self
.
use_tps
=
False
self
.
num_heads
=
None
if
"num_heads"
in
params
:
self
.
num_heads
=
params
[
'num_heads'
]
if
"tps"
in
params
:
self
.
ues_tps
=
True
self
.
use_distort
=
False
...
...
@@ -134,20 +136,6 @@ class LMDBReader(object):
tps
=
self
.
use_tps
,
infer_mode
=
True
)
yield
norm_img
#elif self.mode == 'eval':
# image_file_list = get_image_file_list(self.infer_img)
# for single_img in image_file_list:
# img = cv2.imread(single_img)
# if img.shape[-1]==1 or len(list(img.shape))==2:
# img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
# norm_img = process_image(
# img=img,
# image_shape=self.image_shape,
# char_ops=self.char_ops,
# tps=self.use_tps,
# infer_mode=True
# )
# yield norm_img
else
:
lmdb_sets
=
self
.
load_hierarchical_lmdb_dataset
()
if
process_id
==
0
:
...
...
@@ -169,14 +157,22 @@ class LMDBReader(object):
outs
=
[]
if
self
.
loss_type
==
"srn"
:
outs
=
process_image_srn
(
img
,
self
.
image_shape
,
self
.
num_heads
,
self
.
max_text_length
,
label
,
self
.
char_ops
,
self
.
loss_type
)
img
=
img
,
image_shape
=
self
.
image_shape
,
num_heads
=
self
.
num_heads
,
max_text_length
=
self
.
max_text_length
,
label
=
label
,
char_ops
=
self
.
char_ops
,
loss_type
=
self
.
loss_type
)
else
:
outs
=
process_image
(
img
,
self
.
image_shape
,
label
,
self
.
char_ops
,
self
.
loss_type
,
self
.
max_text_length
)
img
=
img
,
image_shape
=
self
.
image_shape
,
label
=
label
,
char_ops
=
self
.
char_ops
,
loss_type
=
self
.
loss_type
,
max_text_length
=
self
.
max_text_length
)
if
outs
is
None
:
continue
yield
outs
...
...
@@ -192,8 +188,9 @@ class LMDBReader(object):
if
len
(
batch_outs
)
==
self
.
batch_size
:
yield
batch_outs
batch_outs
=
[]
if
len
(
batch_outs
)
!=
0
:
yield
batch_outs
if
not
self
.
drop_last
:
if
len
(
batch_outs
)
!=
0
:
yield
batch_outs
if
self
.
infer_img
is
None
:
return
batch_iter_reader
...
...
ppocr/modeling/architectures/rec_model.py
浏览文件 @
bf4863c9
...
...
@@ -58,7 +58,10 @@ class RecModel(object):
self
.
loss_type
=
global_params
[
'loss_type'
]
self
.
image_shape
=
global_params
[
'image_shape'
]
self
.
max_text_length
=
global_params
[
'max_text_length'
]
self
.
num_heads
=
global_params
[
"num_heads"
]
if
"num_heads"
in
params
:
self
.
num_heads
=
global_params
[
"num_heads"
]
else
:
self
.
num_heads
=
None
def
create_feed
(
self
,
mode
):
image_shape
=
deepcopy
(
self
.
image_shape
)
...
...
ppocr/modeling/backbones/rec_resnet_vd.py
浏览文件 @
bf4863c9
...
...
@@ -32,7 +32,7 @@ class ResNet():
def
__init__
(
self
,
params
):
self
.
layers
=
params
[
'layers'
]
self
.
is_3x3
=
True
supported_layers
=
[
18
,
34
,
50
,
101
,
152
]
supported_layers
=
[
18
,
34
,
50
,
101
,
152
,
200
]
assert
self
.
layers
in
supported_layers
,
\
"supported layers are {} but input layer is {}"
.
format
(
supported_layers
,
self
.
layers
)
...
...
ppocr/modeling/heads/rec_srn_all_head.py
浏览文件 @
bf4863c9
...
...
@@ -21,15 +21,12 @@ import math
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid.param_attr
import
ParamAttr
#from .rec_seq_encoder import SequenceEncoder
#from ..common_functions import get_para_bias_attr
import
numpy
as
np
from
.self_attention.model
import
wrap_encoder
from
.self_attention.model
import
wrap_encoder_forFeature
gradient_clip
=
10
class
SRNPredict
(
object
):
def
__init__
(
self
,
params
):
super
(
SRNPredict
,
self
).
__init__
()
...
...
@@ -41,7 +38,6 @@ class SRNPredict(object):
self
.
num_decoder_TUs
=
params
[
'num_decoder_TUs'
]
self
.
hidden_dims
=
params
[
'hidden_dims'
]
def
pvam
(
self
,
inputs
,
others
):
b
,
c
,
h
,
w
=
inputs
.
shape
...
...
@@ -53,52 +49,62 @@ class SRNPredict(object):
encoder_word_pos
=
others
[
"encoder_word_pos"
]
gsrm_word_pos
=
others
[
"gsrm_word_pos"
]
enc_inputs
=
[
conv_features
,
encoder_word_pos
,
None
]
word_features
=
wrap_encoder_forFeature
(
src_vocab_size
=-
1
,
max_length
=
t
,
n_layer
=
self
.
num_encoder_TUs
,
n_head
=
self
.
num_heads
,
d_key
=
int
(
self
.
hidden_dims
/
self
.
num_heads
),
d_value
=
int
(
self
.
hidden_dims
/
self
.
num_heads
),
d_model
=
self
.
hidden_dims
,
d_inner_hid
=
self
.
hidden_dims
,
prepostprocess_dropout
=
0.1
,
attention_dropout
=
0.1
,
relu_dropout
=
0.1
,
preprocess_cmd
=
"n"
,
postprocess_cmd
=
"da"
,
weight_sharing
=
True
,
enc_inputs
=
enc_inputs
,
)
fluid
.
clip
.
set_gradient_clip
(
fluid
.
clip
.
GradientClipByValue
(
gradient_clip
))
word_features
=
wrap_encoder_forFeature
(
src_vocab_size
=-
1
,
max_length
=
t
,
n_layer
=
self
.
num_encoder_TUs
,
n_head
=
self
.
num_heads
,
d_key
=
int
(
self
.
hidden_dims
/
self
.
num_heads
),
d_value
=
int
(
self
.
hidden_dims
/
self
.
num_heads
),
d_model
=
self
.
hidden_dims
,
d_inner_hid
=
self
.
hidden_dims
,
prepostprocess_dropout
=
0.1
,
attention_dropout
=
0.1
,
relu_dropout
=
0.1
,
preprocess_cmd
=
"n"
,
postprocess_cmd
=
"da"
,
weight_sharing
=
True
,
enc_inputs
=
enc_inputs
,
)
fluid
.
clip
.
set_gradient_clip
(
fluid
.
clip
.
GradientClipByValue
(
gradient_clip
))
#===== Parallel Visual Attention Module =====
b
,
t
,
c
=
word_features
.
shape
word_features
=
fluid
.
layers
.
fc
(
word_features
,
c
,
num_flatten_dims
=
2
)
word_features
=
fluid
.
layers
.
fc
(
word_features
,
c
,
num_flatten_dims
=
2
)
word_features_
=
fluid
.
layers
.
reshape
(
word_features
,
[
-
1
,
1
,
t
,
c
])
word_features_
=
fluid
.
layers
.
expand
(
word_features_
,
[
1
,
self
.
max_length
,
1
,
1
])
word_pos_feature
=
fluid
.
layers
.
embedding
(
gsrm_word_pos
,
[
self
.
max_length
,
c
])
word_pos_
=
fluid
.
layers
.
reshape
(
word_pos_feature
,
[
-
1
,
self
.
max_length
,
1
,
c
])
word_features_
=
fluid
.
layers
.
expand
(
word_features_
,
[
1
,
self
.
max_length
,
1
,
1
])
word_pos_feature
=
fluid
.
layers
.
embedding
(
gsrm_word_pos
,
[
self
.
max_length
,
c
])
word_pos_
=
fluid
.
layers
.
reshape
(
word_pos_feature
,
[
-
1
,
self
.
max_length
,
1
,
c
])
word_pos_
=
fluid
.
layers
.
expand
(
word_pos_
,
[
1
,
1
,
t
,
1
])
temp
=
fluid
.
layers
.
elementwise_add
(
word_features_
,
word_pos_
,
act
=
'tanh'
)
temp
=
fluid
.
layers
.
elementwise_add
(
word_features_
,
word_pos_
,
act
=
'tanh'
)
attention_weight
=
fluid
.
layers
.
fc
(
input
=
temp
,
size
=
1
,
num_flatten_dims
=
3
,
bias_attr
=
False
)
attention_weight
=
fluid
.
layers
.
reshape
(
x
=
attention_weight
,
shape
=
[
-
1
,
self
.
max_length
,
t
])
attention_weight
=
fluid
.
layers
.
softmax
(
input
=
attention_weight
,
axis
=-
1
)
attention_weight
=
fluid
.
layers
.
fc
(
input
=
temp
,
size
=
1
,
num_flatten_dims
=
3
,
bias_attr
=
False
)
attention_weight
=
fluid
.
layers
.
reshape
(
x
=
attention_weight
,
shape
=
[
-
1
,
self
.
max_length
,
t
])
attention_weight
=
fluid
.
layers
.
softmax
(
input
=
attention_weight
,
axis
=-
1
)
pvam_features
=
fluid
.
layers
.
matmul
(
attention_weight
,
word_features
)
#[b, max_length, c]
pvam_features
=
fluid
.
layers
.
matmul
(
attention_weight
,
word_features
)
#[b, max_length, c]
return
pvam_features
def
gsrm
(
self
,
pvam_features
,
others
):
#===== GSRM Visual-to-semantic embedding block =====
b
,
t
,
c
=
pvam_features
.
shape
word_out
=
fluid
.
layers
.
fc
(
input
=
fluid
.
layers
.
reshape
(
pvam_features
,
[
-
1
,
c
]),
size
=
self
.
char_num
,
act
=
"softmax"
)
word_out
=
fluid
.
layers
.
fc
(
input
=
fluid
.
layers
.
reshape
(
pvam_features
,
[
-
1
,
c
]),
size
=
self
.
char_num
,
act
=
"softmax"
)
#word_out.stop_gradient = True
word_ids
=
fluid
.
layers
.
argmax
(
word_out
,
axis
=
1
)
word_ids
.
stop_gradient
=
True
...
...
@@ -106,7 +112,7 @@ class SRNPredict(object):
#===== GSRM Semantic reasoning block =====
"""
This module is achieved through bi-transformers,
This module is achieved through bi-transformers,
ngram_feature1 is the froward one, ngram_fetaure2 is the backward one
"""
pad_idx
=
self
.
char_num
...
...
@@ -120,7 +126,8 @@ class SRNPredict(object):
word1 for forward; word2 for backward
"""
word1
=
fluid
.
layers
.
cast
(
word_ids
,
"float32"
)
word1
=
fluid
.
layers
.
pad
(
word1
,
[
0
,
0
,
1
,
0
,
0
,
0
],
pad_value
=
1.0
*
pad_idx
)
word1
=
fluid
.
layers
.
pad
(
word1
,
[
0
,
0
,
1
,
0
,
0
,
0
],
pad_value
=
1.0
*
pad_idx
)
word1
=
fluid
.
layers
.
cast
(
word1
,
"int64"
)
word1
=
word1
[:,
:
-
1
,
:]
word2
=
word_ids
...
...
@@ -132,39 +139,40 @@ class SRNPredict(object):
enc_inputs_1
=
[
word1
,
gsrm_word_pos
,
gsrm_slf_attn_bias1
]
enc_inputs_2
=
[
word2
,
gsrm_word_pos
,
gsrm_slf_attn_bias2
]
gsrm_feature1
=
wrap_encoder
(
src_vocab_size
=
self
.
char_num
+
1
,
max_length
=
self
.
max_length
,
n_layer
=
self
.
num_decoder_TUs
,
n_head
=
self
.
num_heads
,
d_key
=
int
(
self
.
hidden_dims
/
self
.
num_heads
),
d_value
=
int
(
self
.
hidden_dims
/
self
.
num_heads
),
d_model
=
self
.
hidden_dims
,
d_inner_hid
=
self
.
hidden_dims
,
prepostprocess_dropout
=
0.1
,
attention_dropout
=
0.1
,
relu_dropout
=
0.1
,
preprocess_cmd
=
"n"
,
postprocess_cmd
=
"da"
,
weight_sharing
=
True
,
enc_inputs
=
enc_inputs_1
,
)
gsrm_feature2
=
wrap_encoder
(
src_vocab_size
=
self
.
char_num
+
1
,
max_length
=
self
.
max_length
,
n_layer
=
self
.
num_decoder_TUs
,
n_head
=
self
.
num_heads
,
d_key
=
int
(
self
.
hidden_dims
/
self
.
num_heads
),
d_value
=
int
(
self
.
hidden_dims
/
self
.
num_heads
),
d_model
=
self
.
hidden_dims
,
d_inner_hid
=
self
.
hidden_dims
,
prepostprocess_dropout
=
0.1
,
attention_dropout
=
0.1
,
relu_dropout
=
0.1
,
preprocess_cmd
=
"n"
,
postprocess_cmd
=
"da"
,
weight_sharing
=
True
,
enc_inputs
=
enc_inputs_2
,
)
gsrm_feature2
=
fluid
.
layers
.
pad
(
gsrm_feature2
,
[
0
,
0
,
0
,
1
,
0
,
0
],
pad_value
=
0.
)
gsrm_feature1
=
wrap_encoder
(
src_vocab_size
=
self
.
char_num
+
1
,
max_length
=
self
.
max_length
,
n_layer
=
self
.
num_decoder_TUs
,
n_head
=
self
.
num_heads
,
d_key
=
int
(
self
.
hidden_dims
/
self
.
num_heads
),
d_value
=
int
(
self
.
hidden_dims
/
self
.
num_heads
),
d_model
=
self
.
hidden_dims
,
d_inner_hid
=
self
.
hidden_dims
,
prepostprocess_dropout
=
0.1
,
attention_dropout
=
0.1
,
relu_dropout
=
0.1
,
preprocess_cmd
=
"n"
,
postprocess_cmd
=
"da"
,
weight_sharing
=
True
,
enc_inputs
=
enc_inputs_1
,
)
gsrm_feature2
=
wrap_encoder
(
src_vocab_size
=
self
.
char_num
+
1
,
max_length
=
self
.
max_length
,
n_layer
=
self
.
num_decoder_TUs
,
n_head
=
self
.
num_heads
,
d_key
=
int
(
self
.
hidden_dims
/
self
.
num_heads
),
d_value
=
int
(
self
.
hidden_dims
/
self
.
num_heads
),
d_model
=
self
.
hidden_dims
,
d_inner_hid
=
self
.
hidden_dims
,
prepostprocess_dropout
=
0.1
,
attention_dropout
=
0.1
,
relu_dropout
=
0.1
,
preprocess_cmd
=
"n"
,
postprocess_cmd
=
"da"
,
weight_sharing
=
True
,
enc_inputs
=
enc_inputs_2
,
)
gsrm_feature2
=
fluid
.
layers
.
pad
(
gsrm_feature2
,
[
0
,
0
,
0
,
1
,
0
,
0
],
pad_value
=
0.
)
gsrm_feature2
=
gsrm_feature2
[:,
1
:,
]
gsrm_features
=
gsrm_feature1
+
gsrm_feature2
...
...
@@ -172,10 +180,12 @@ class SRNPredict(object):
gsrm_out
=
fluid
.
layers
.
matmul
(
x
=
gsrm_features
,
y
=
fluid
.
default_main_program
().
global_block
().
var
(
"src_word_emb_table"
),
y
=
fluid
.
default_main_program
().
global_block
().
var
(
"src_word_emb_table"
),
transpose_y
=
True
)
b
,
t
,
c
=
gsrm_out
.
shape
gsrm_out
=
fluid
.
layers
.
softmax
(
input
=
fluid
.
layers
.
reshape
(
gsrm_out
,
[
-
1
,
c
]))
b
,
t
,
c
=
gsrm_out
.
shape
gsrm_out
=
fluid
.
layers
.
softmax
(
input
=
fluid
.
layers
.
reshape
(
gsrm_out
,
[
-
1
,
c
]))
return
gsrm_features
,
word_out
,
gsrm_out
...
...
@@ -184,19 +194,25 @@ class SRNPredict(object):
#===== Visual-Semantic Fusion Decoder Module =====
b
,
t
,
c1
=
pvam_features
.
shape
b
,
t
,
c2
=
gsrm_features
.
shape
combine_features_
=
fluid
.
layers
.
concat
([
pvam_features
,
gsrm_features
],
axis
=
2
)
img_comb_features_
=
fluid
.
layers
.
reshape
(
x
=
combine_features_
,
shape
=
[
-
1
,
c1
+
c2
])
img_comb_features_map
=
fluid
.
layers
.
fc
(
input
=
img_comb_features_
,
size
=
c1
,
act
=
"sigmoid"
)
img_comb_features_map
=
fluid
.
layers
.
reshape
(
x
=
img_comb_features_map
,
shape
=
[
-
1
,
t
,
c1
])
combine_features
=
img_comb_features_map
*
pvam_features
+
(
1.0
-
img_comb_features_map
)
*
gsrm_features
img_comb_features
=
fluid
.
layers
.
reshape
(
x
=
combine_features
,
shape
=
[
-
1
,
c1
])
combine_features_
=
fluid
.
layers
.
concat
(
[
pvam_features
,
gsrm_features
],
axis
=
2
)
img_comb_features_
=
fluid
.
layers
.
reshape
(
x
=
combine_features_
,
shape
=
[
-
1
,
c1
+
c2
])
img_comb_features_map
=
fluid
.
layers
.
fc
(
input
=
img_comb_features_
,
size
=
c1
,
act
=
"sigmoid"
)
img_comb_features_map
=
fluid
.
layers
.
reshape
(
x
=
img_comb_features_map
,
shape
=
[
-
1
,
t
,
c1
])
combine_features
=
img_comb_features_map
*
pvam_features
+
(
1.0
-
img_comb_features_map
)
*
gsrm_features
img_comb_features
=
fluid
.
layers
.
reshape
(
x
=
combine_features
,
shape
=
[
-
1
,
c1
])
fc_out
=
fluid
.
layers
.
fc
(
input
=
img_comb_features
,
size
=
self
.
char_num
,
act
=
"softmax"
)
return
fc_out
def
__call__
(
self
,
inputs
,
others
,
mode
=
None
):
pvam_features
=
self
.
pvam
(
inputs
,
others
)
...
...
@@ -204,15 +220,11 @@ class SRNPredict(object):
final_out
=
self
.
vsfd
(
pvam_features
,
gsrm_features
)
_
,
decoded_out
=
fluid
.
layers
.
topk
(
input
=
final_out
,
k
=
1
)
predicts
=
{
'predict'
:
final_out
,
'decoded_out'
:
decoded_out
,
'word_out'
:
word_out
,
'gsrm_out'
:
gsrm_out
}
predicts
=
{
'predict'
:
final_out
,
'decoded_out'
:
decoded_out
,
'word_out'
:
word_out
,
'gsrm_out'
:
gsrm_out
}
return
predicts
ppocr/modeling/losses/rec_srn_loss.py
浏览文件 @
bf4863c9
...
...
@@ -35,24 +35,21 @@ class SRNLoss(object):
lbl_weight
=
others
[
'lbl_weight'
]
casted_label
=
fluid
.
layers
.
cast
(
x
=
label
,
dtype
=
'int64'
)
cost_word
=
fluid
.
layers
.
cross_entropy
(
input
=
word_predict
,
label
=
casted_label
)
cost_gsrm
=
fluid
.
layers
.
cross_entropy
(
input
=
gsrm_predict
,
label
=
casted_label
)
cost_vsfd
=
fluid
.
layers
.
cross_entropy
(
input
=
predict
,
label
=
casted_label
)
#cost_word = cost_word * lbl_weight
#cost_gsrm = cost_gsrm * lbl_weight
#cost_vsfd = cost_vsfd * lbl_weight
cost_word
=
fluid
.
layers
.
reshape
(
x
=
fluid
.
layers
.
reduce_sum
(
cost_word
),
shape
=
[
1
])
cost_gsrm
=
fluid
.
layers
.
reshape
(
x
=
fluid
.
layers
.
reduce_sum
(
cost_gsrm
),
shape
=
[
1
])
cost_vsfd
=
fluid
.
layers
.
reshape
(
x
=
fluid
.
layers
.
reduce_sum
(
cost_vsfd
),
shape
=
[
1
])
sum_cost
=
fluid
.
layers
.
sum
([
cost_word
,
cost_vsfd
*
2.0
,
cost_gsrm
*
0.15
])
#sum_cost = fluid.layers.sum([cost_word * 3.0, cost_vsfd, cost_gsrm * 0.15])
#sum_cost = cost_word
#fluid.layers.Print(cost_word,message="word_cost")
#fluid.layers.Print(cost_vsfd,message="img_cost")
return
[
sum_cost
,
cost_vsfd
,
cost_word
]
#return [sum_cost, cost_vsfd, cost_word]
cost_word
=
fluid
.
layers
.
cross_entropy
(
input
=
word_predict
,
label
=
casted_label
)
cost_gsrm
=
fluid
.
layers
.
cross_entropy
(
input
=
gsrm_predict
,
label
=
casted_label
)
cost_vsfd
=
fluid
.
layers
.
cross_entropy
(
input
=
predict
,
label
=
casted_label
)
cost_word
=
fluid
.
layers
.
reshape
(
x
=
fluid
.
layers
.
reduce_sum
(
cost_word
),
shape
=
[
1
])
cost_gsrm
=
fluid
.
layers
.
reshape
(
x
=
fluid
.
layers
.
reduce_sum
(
cost_gsrm
),
shape
=
[
1
])
cost_vsfd
=
fluid
.
layers
.
reshape
(
x
=
fluid
.
layers
.
reduce_sum
(
cost_vsfd
),
shape
=
[
1
])
sum_cost
=
fluid
.
layers
.
sum
(
[
cost_word
,
cost_vsfd
*
2.0
,
cost_gsrm
*
0.15
])
return
[
sum_cost
,
cost_vsfd
,
cost_word
]
ppocr/utils/character.py
浏览文件 @
bf4863c9
...
...
@@ -149,38 +149,29 @@ def cal_predicts_accuracy(char_ops,
acc
=
acc_num
*
1.0
/
img_num
return
acc
,
acc_num
,
img_num
def
cal_predicts_accuracy_srn
(
char_ops
,
preds
,
labels
,
max_text_len
,
is_debug
=
False
):
preds
,
labels
,
max_text_len
,
is_debug
=
False
):
acc_num
=
0
img_num
=
0
total_len
=
preds
.
shape
[
0
]
img_num
=
int
(
total_len
/
max_text_len
)
#print (img_num)
for
i
in
range
(
img_num
):
cur_label
=
[]
cur_pred
=
[]
for
j
in
range
(
max_text_len
):
if
labels
[
j
+
i
*
max_text_len
]
!=
37
:
#0
if
labels
[
j
+
i
*
max_text_len
]
!=
37
:
#0
cur_label
.
append
(
labels
[
j
+
i
*
max_text_len
][
0
])
else
:
break
if
is_debug
:
for
j
in
range
(
max_text_len
):
if
preds
[
j
+
i
*
max_text_len
]
!=
37
:
#0
cur_pred
.
append
(
preds
[
j
+
i
*
max_text_len
][
0
])
else
:
break
print
(
"cur_label: "
,
cur_label
)
print
(
"cur_pred: "
,
cur_pred
)
for
j
in
range
(
max_text_len
+
1
):
if
j
<
len
(
cur_label
)
and
preds
[
j
+
i
*
max_text_len
][
0
]
!=
cur_label
[
j
]:
if
j
<
len
(
cur_label
)
and
preds
[
j
+
i
*
max_text_len
][
0
]
!=
cur_label
[
j
]:
break
elif
j
==
len
(
cur_label
)
and
j
==
max_text_len
:
acc_num
+=
1
...
...
tools/eval_utils/eval_rec_utils.py
浏览文件 @
bf4863c9
...
...
@@ -123,8 +123,8 @@ def eval_rec_run(exe, config, eval_info_dict, mode):
def
test_rec_benchmark
(
exe
,
config
,
eval_info_dict
):
" Evaluate lmdb dataset "
eval_data_list
=
[
'IIIT5k_3000'
,
'SVT'
,
'IC03_860'
,
\
'IC13_857'
,
'IC15_1811'
,
'IC15_2077'
,
'SVTP'
,
'CUTE80'
]
eval_data_list
=
[
'IIIT5k_3000'
,
'SVT'
,
'IC03_860'
,
'IC03_867'
,
\
'IC13_857'
,
'IC1
3_1015'
,
'IC1
5_1811'
,
'IC15_2077'
,
'SVTP'
,
'CUTE80'
]
eval_data_dir
=
config
[
'TestReader'
][
'lmdb_sets_dir'
]
total_evaluation_data_number
=
0
total_correct_number
=
0
...
...
tools/infer_rec.py
浏览文件 @
bf4863c9
...
...
@@ -64,7 +64,6 @@ def main():
exe
=
fluid
.
Executor
(
place
)
rec_model
=
create_module
(
config
[
'Architecture'
][
'function'
])(
params
=
config
)
startup_prog
=
fluid
.
Program
()
eval_prog
=
fluid
.
Program
()
with
fluid
.
program_guard
(
eval_prog
,
startup_prog
):
...
...
@@ -86,10 +85,36 @@ def main():
for
i
in
range
(
max_img_num
):
logger
.
info
(
"infer_img:%s"
%
infer_list
[
i
])
img
=
next
(
blobs
)
predict
=
exe
.
run
(
program
=
eval_prog
,
feed
=
{
"image"
:
img
},
fetch_list
=
fetch_varname_list
,
return_numpy
=
False
)
if
loss_type
!=
"srn"
:
predict
=
exe
.
run
(
program
=
eval_prog
,
feed
=
{
"image"
:
img
},
fetch_list
=
fetch_varname_list
,
return_numpy
=
False
)
else
:
encoder_word_pos_list
=
[]
gsrm_word_pos_list
=
[]
gsrm_slf_attn_bias1_list
=
[]
gsrm_slf_attn_bias2_list
=
[]
encoder_word_pos_list
.
append
(
img
[
1
])
gsrm_word_pos_list
.
append
(
img
[
2
])
gsrm_slf_attn_bias1_list
.
append
(
img
[
3
])
gsrm_slf_attn_bias2_list
.
append
(
img
[
4
])
encoder_word_pos_list
=
np
.
concatenate
(
encoder_word_pos_list
,
axis
=
0
).
astype
(
np
.
int64
)
gsrm_word_pos_list
=
np
.
concatenate
(
gsrm_word_pos_list
,
axis
=
0
).
astype
(
np
.
int64
)
gsrm_slf_attn_bias1_list
=
np
.
concatenate
(
gsrm_slf_attn_bias1_list
,
axis
=
0
).
astype
(
np
.
float32
)
gsrm_slf_attn_bias2_list
=
np
.
concatenate
(
gsrm_slf_attn_bias2_list
,
axis
=
0
).
astype
(
np
.
float32
)
predict
=
exe
.
run
(
program
=
eval_prog
,
\
feed
=
{
'image'
:
img
[
0
],
'encoder_word_pos'
:
encoder_word_pos_list
,
'gsrm_word_pos'
:
gsrm_word_pos_list
,
'gsrm_slf_attn_bias1'
:
gsrm_slf_attn_bias1_list
,
'gsrm_slf_attn_bias2'
:
gsrm_slf_attn_bias2_list
},
\
fetch_list
=
fetch_varname_list
,
\
return_numpy
=
False
)
if
loss_type
==
"ctc"
:
preds
=
np
.
array
(
predict
[
0
])
preds
=
preds
.
reshape
(
-
1
)
...
...
@@ -114,7 +139,18 @@ def main():
score
=
np
.
mean
(
probs
[
0
,
1
:
end_pos
[
1
]])
preds
=
preds
.
reshape
(
-
1
)
preds_text
=
char_ops
.
decode
(
preds
)
elif
loss_type
==
"srn"
:
cur_pred
=
[]
preds
=
np
.
array
(
predict
[
0
])
preds
=
preds
.
reshape
(
-
1
)
probs
=
np
.
array
(
predict
[
1
])
ind
=
np
.
argmax
(
probs
,
axis
=
1
)
valid_ind
=
np
.
where
(
preds
!=
37
)[
0
]
if
len
(
valid_ind
)
==
0
:
continue
score
=
np
.
mean
(
probs
[
valid_ind
,
ind
[
valid_ind
]])
preds
=
preds
[:
valid_ind
[
-
1
]
+
1
]
preds_text
=
char_ops
.
decode
(
preds
)
logger
.
info
(
"
\t
index: {}"
.
format
(
preds
))
logger
.
info
(
"
\t
word : {}"
.
format
(
preds_text
))
logger
.
info
(
"
\t
score: {}"
.
format
(
score
))
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录