Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleOCR
提交
1cda437c
P
PaddleOCR
项目概览
PaddlePaddle
/
PaddleOCR
1 年多 前同步成功
通知
1534
Star
32963
Fork
6643
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
108
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleOCR
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
108
Issue
108
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1cda437c
编写于
7月 10, 2022
作者:
xuyang2233
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
modified pr
上级
bbca1e0d
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
13 addition
and
119 deletion
+13
-119
.gitignore
.gitignore
+1
-0
configs/rec/rec_r32_gaspin_bilstm_att.yml
configs/rec/rec_r32_gaspin_bilstm_att.yml
+0
-1
ppocr/losses/rec_spin_att_loss.py
ppocr/losses/rec_spin_att_loss.py
+5
-1
ppocr/modeling/heads/rec_spin_att_head.py
ppocr/modeling/heads/rec_spin_att_head.py
+1
-93
ppocr/modeling/necks/rnn.py
ppocr/modeling/necks/rnn.py
+0
-11
ppocr/modeling/transforms/gaspin_transformer.py
ppocr/modeling/transforms/gaspin_transformer.py
+6
-7
tools/export_model.py
tools/export_model.py
+0
-6
未找到文件。
.gitignore
浏览文件 @
1cda437c
...
...
@@ -11,6 +11,7 @@ inference/
inference_results/
output/
train_data/
log/
*.DS_Store
*.vs
*.user
...
...
configs/rec/rec_r32_gaspin_bilstm_att.yml
浏览文件 @
1cda437c
...
...
@@ -61,7 +61,6 @@ Loss:
PostProcess
:
name
:
SPINAttnLabelDecode
character_dict_path
:
./ppocr/utils/dict/spin_dict.txt
use_space_char
:
False
...
...
ppocr/losses/rec_spin_att_loss.py
浏览文件 @
1cda437c
# copyright (c) 202
1
PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 202
2
PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
...
...
@@ -19,6 +20,9 @@ from __future__ import print_function
import
paddle
from
paddle
import
nn
'''This code is refer from:
https://github.com/hikopensource/DAVAR-Lab-OCR
'''
class
SPINAttentionLoss
(
nn
.
Layer
):
def
__init__
(
self
,
reduction
=
'mean'
,
ignore_index
=-
100
,
**
kwargs
):
...
...
ppocr/modeling/heads/rec_spin_att_head.py
浏览文件 @
1cda437c
# copyright (c) 202
1
PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 202
2
PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -80,98 +80,6 @@ class SPINAttentionHead(nn.Layer):
return
probs
class
AttentionGRUCell
(
nn
.
Layer
):
def
__init__
(
self
,
input_size
,
hidden_size
,
num_embeddings
,
use_gru
=
False
):
super
(
AttentionGRUCell
,
self
).
__init__
()
self
.
i2h
=
nn
.
Linear
(
input_size
,
hidden_size
,
bias_attr
=
False
)
self
.
h2h
=
nn
.
Linear
(
hidden_size
,
hidden_size
)
self
.
score
=
nn
.
Linear
(
hidden_size
,
1
,
bias_attr
=
False
)
self
.
rnn
=
nn
.
GRUCell
(
input_size
=
input_size
+
num_embeddings
,
hidden_size
=
hidden_size
)
self
.
hidden_size
=
hidden_size
def
forward
(
self
,
prev_hidden
,
batch_H
,
char_onehots
):
batch_H_proj
=
self
.
i2h
(
batch_H
)
prev_hidden_proj
=
paddle
.
unsqueeze
(
self
.
h2h
(
prev_hidden
),
axis
=
1
)
res
=
paddle
.
add
(
batch_H_proj
,
prev_hidden_proj
)
res
=
paddle
.
tanh
(
res
)
e
=
self
.
score
(
res
)
alpha
=
F
.
softmax
(
e
,
axis
=
1
)
alpha
=
paddle
.
transpose
(
alpha
,
[
0
,
2
,
1
])
context
=
paddle
.
squeeze
(
paddle
.
mm
(
alpha
,
batch_H
),
axis
=
1
)
concat_context
=
paddle
.
concat
([
context
,
char_onehots
],
1
)
cur_hidden
=
self
.
rnn
(
concat_context
,
prev_hidden
)
return
cur_hidden
,
alpha
class
AttentionLSTM
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
hidden_size
,
**
kwargs
):
super
(
AttentionLSTM
,
self
).
__init__
()
self
.
input_size
=
in_channels
self
.
hidden_size
=
hidden_size
self
.
num_classes
=
out_channels
self
.
attention_cell
=
AttentionLSTMCell
(
in_channels
,
hidden_size
,
out_channels
,
use_gru
=
False
)
self
.
generator
=
nn
.
Linear
(
hidden_size
,
out_channels
)
def
_char_to_onehot
(
self
,
input_char
,
onehot_dim
):
input_ont_hot
=
F
.
one_hot
(
input_char
,
onehot_dim
)
return
input_ont_hot
def
forward
(
self
,
inputs
,
targets
=
None
,
batch_max_length
=
25
):
batch_size
=
inputs
.
shape
[
0
]
num_steps
=
batch_max_length
hidden
=
(
paddle
.
zeros
((
batch_size
,
self
.
hidden_size
)),
paddle
.
zeros
(
(
batch_size
,
self
.
hidden_size
)))
output_hiddens
=
[]
if
targets
is
not
None
:
for
i
in
range
(
num_steps
):
# one-hot vectors for a i-th char
char_onehots
=
self
.
_char_to_onehot
(
targets
[:,
i
],
onehot_dim
=
self
.
num_classes
)
hidden
,
alpha
=
self
.
attention_cell
(
hidden
,
inputs
,
char_onehots
)
hidden
=
(
hidden
[
1
][
0
],
hidden
[
1
][
1
])
output_hiddens
.
append
(
paddle
.
unsqueeze
(
hidden
[
0
],
axis
=
1
))
output
=
paddle
.
concat
(
output_hiddens
,
axis
=
1
)
probs
=
self
.
generator
(
output
)
else
:
targets
=
paddle
.
zeros
(
shape
=
[
batch_size
],
dtype
=
"int32"
)
probs
=
None
for
i
in
range
(
num_steps
):
char_onehots
=
self
.
_char_to_onehot
(
targets
,
onehot_dim
=
self
.
num_classes
)
hidden
,
alpha
=
self
.
attention_cell
(
hidden
,
inputs
,
char_onehots
)
probs_step
=
self
.
generator
(
hidden
[
0
])
hidden
=
(
hidden
[
1
][
0
],
hidden
[
1
][
1
])
if
probs
is
None
:
probs
=
paddle
.
unsqueeze
(
probs_step
,
axis
=
1
)
else
:
probs
=
paddle
.
concat
(
[
probs
,
paddle
.
unsqueeze
(
probs_step
,
axis
=
1
)],
axis
=
1
)
next_input
=
probs_step
.
argmax
(
axis
=
1
)
targets
=
next_input
return
probs
class
AttentionLSTMCell
(
nn
.
Layer
):
def
__init__
(
self
,
input_size
,
hidden_size
,
num_embeddings
,
use_gru
=
False
):
super
(
AttentionLSTMCell
,
self
).
__init__
()
...
...
ppocr/modeling/necks/rnn.py
浏览文件 @
1cda437c
...
...
@@ -70,17 +70,6 @@ class BidirectionalLSTM(nn.Layer):
self
.
linear
=
nn
.
Linear
(
hidden_size
*
2
,
output_size
)
def
forward
(
self
,
input_feature
):
"""
Args:
input_feature (Torch.Tensor): visual feature [batch_size x T x input_size]
Returns:
Torch.Tensor: LSTM output contextual feature [batch_size x T x output_size]
"""
# self.rnn.flatten_parameters() # error in export_model
recurrent
,
_
=
self
.
rnn
(
input_feature
)
# batch_size x T x input_size -> batch_size x T x (2*hidden_size)
if
self
.
with_linear
:
output
=
self
.
linear
(
recurrent
)
# batch_size x T x output_size
...
...
ppocr/modeling/transforms/gaspin_transformer.py
浏览文件 @
1cda437c
# copyright (c) 202
0
PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 202
2
PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -71,14 +71,14 @@ class SP_TransformerNetwork(nn.Layer):
"""
Args:
batch_I (
torch.
Tensor): batch of input images [batch_size x nc x I_height x I_width]
batch_I (Tensor): batch of input images [batch_size x nc x I_height x I_width]
weights:
offsets: the predicted offset by AIN, a scalar
lambda_color: the learnable update gate
\a
lpha in Equa. (5) as
g(x) = (1 -
\a
lpha) \odot x +
\a
lpha \odot x_{offsets}
Returns:
torch.
Tensor: transformed images by SPN as Equa. (4) in Ref. [1]
Tensor: transformed images by SPN as Equa. (4) in Ref. [1]
[batch_size x I_channel_num x I_r_height x I_r_width]
"""
...
...
@@ -114,8 +114,6 @@ class GA_SPIN_Transformer(nn.Layer):
in_channels (int): channel of input features,
set it to 1 if the grayscale images and 3 if RGB input
I_r_size (tuple): size of rectified images (used in STN transformations)
inputDataType (str): the type of input data,
only support 'torch.cuda.FloatTensor' this version
offsets (bool): set it to False if use SPN w.o. AIN,
and set it to True if use SPIN (both with SPN and AIN)
norm_type (str): the normalization type of the module,
...
...
@@ -123,6 +121,7 @@ class GA_SPIN_Transformer(nn.Layer):
default_type (int): the K chromatic space,
set it to 3/5/6 depend on the complexity of transformation intensities
loc_lr (float): learning rate of location network
stn (bool): whther to use stn.
"""
super
(
GA_SPIN_Transformer
,
self
).
__init__
()
...
...
@@ -233,12 +232,12 @@ class GA_SPIN_Transformer(nn.Layer):
def
forward
(
self
,
x
,
return_weight
=
False
):
"""
Args:
x (
torch.cuda.Float
Tensor): input image batch
x (Tensor): input image batch
return_weight (bool): set to False by default,
if set to True return the predicted offsets of AIN, denoted as x_{offsets}
Returns:
torch.
Tensor: rectified image [batch_size x I_channel_num x I_height x I_width], the same as the input size
Tensor: rectified image [batch_size x I_channel_num x I_height x I_width], the same as the input size
"""
if
self
.
spt
:
...
...
tools/export_model.py
浏览文件 @
1cda437c
...
...
@@ -73,12 +73,6 @@ def export_single_model(model, arch_config, save_path, logger, quanter=None):
shape
=
[
None
,
3
,
64
,
512
],
dtype
=
"float32"
),
]
model
=
to_static
(
model
,
input_spec
=
other_shape
)
elif
arch_config
[
"algorithm"
]
==
"SPIN"
:
other_shape
=
[
paddle
.
static
.
InputSpec
(
shape
=
[
None
,
1
,
32
,
100
],
dtype
=
"float32"
),
]
model
=
to_static
(
model
,
input_spec
=
other_shape
)
else
:
infer_shape
=
[
3
,
-
1
,
-
1
]
if
arch_config
[
"model_type"
]
==
"rec"
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录