Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleOCR
提交
2bf8ad9b
P
PaddleOCR
项目概览
PaddlePaddle
/
PaddleOCR
大约 1 年 前同步成功
通知
1528
Star
32962
Fork
6643
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
108
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleOCR
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
108
Issue
108
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
2bf8ad9b
编写于
8月 24, 2021
作者:
T
Topdu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
modify transformeroptim, resize
上级
73058cc0
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
34 addition
and
50 deletion
+34
-50
configs/rec/rec_mtb_nrtr.yml
configs/rec/rec_mtb_nrtr.yml
+5
-3
ppocr/data/imaug/__init__.py
ppocr/data/imaug/__init__.py
+1
-1
ppocr/data/imaug/rec_img_aug.py
ppocr/data/imaug/rec_img_aug.py
+10
-19
ppocr/losses/rec_nrtr_loss.py
ppocr/losses/rec_nrtr_loss.py
+7
-15
ppocr/modeling/heads/__init__.py
ppocr/modeling/heads/__init__.py
+2
-2
ppocr/modeling/heads/multiheadAttention.py
ppocr/modeling/heads/multiheadAttention.py
+2
-2
ppocr/modeling/heads/rec_nrtr_head.py
ppocr/modeling/heads/rec_nrtr_head.py
+7
-8
未找到文件。
configs/rec/rec_mtb_nrtr.yml
浏览文件 @
2bf8ad9b
...
@@ -43,7 +43,7 @@ Architecture:
...
@@ -43,7 +43,7 @@ Architecture:
name
:
MTB
name
:
MTB
cnn_num
:
2
cnn_num
:
2
Head
:
Head
:
name
:
Transformer
Optim
name
:
Transformer
d_model
:
512
d_model
:
512
num_encoder_layers
:
6
num_encoder_layers
:
6
beam_size
:
10
# When Beam size is greater than 0, it means to use beam search when evaluation.
beam_size
:
10
# When Beam size is greater than 0, it means to use beam search when evaluation.
...
@@ -69,8 +69,9 @@ Train:
...
@@ -69,8 +69,9 @@ Train:
img_mode
:
BGR
img_mode
:
BGR
channel_first
:
False
channel_first
:
False
-
NRTRLabelEncode
:
# Class handling label
-
NRTRLabelEncode
:
# Class handling label
-
PILResize
:
-
NRTRRecResizeImg
:
image_shape
:
[
100
,
32
]
image_shape
:
[
100
,
32
]
resize_type
:
PIL
# PIL or OpenCV
-
KeepKeys
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
label'
,
'
length'
]
# dataloader will return list in this order
keep_keys
:
[
'
image'
,
'
label'
,
'
length'
]
# dataloader will return list in this order
loader
:
loader
:
...
@@ -88,8 +89,9 @@ Eval:
...
@@ -88,8 +89,9 @@ Eval:
img_mode
:
BGR
img_mode
:
BGR
channel_first
:
False
channel_first
:
False
-
NRTRLabelEncode
:
# Class handling label
-
NRTRLabelEncode
:
# Class handling label
-
PILResize
:
-
NRTRRecResizeImg
:
image_shape
:
[
100
,
32
]
image_shape
:
[
100
,
32
]
resize_type
:
PIL
# PIL or OpenCV
-
KeepKeys
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
label'
,
'
length'
]
# dataloader will return list in this order
keep_keys
:
[
'
image'
,
'
label'
,
'
length'
]
# dataloader will return list in this order
loader
:
loader
:
...
...
ppocr/data/imaug/__init__.py
浏览文件 @
2bf8ad9b
...
@@ -21,7 +21,7 @@ from .make_border_map import MakeBorderMap
...
@@ -21,7 +21,7 @@ from .make_border_map import MakeBorderMap
from
.make_shrink_map
import
MakeShrinkMap
from
.make_shrink_map
import
MakeShrinkMap
from
.random_crop_data
import
EastRandomCropData
,
PSERandomCrop
from
.random_crop_data
import
EastRandomCropData
,
PSERandomCrop
from
.rec_img_aug
import
RecAug
,
RecResizeImg
,
ClsResizeImg
,
SRNRecResizeImg
,
PILResize
,
CVResize
from
.rec_img_aug
import
RecAug
,
RecResizeImg
,
ClsResizeImg
,
SRNRecResizeImg
,
NRTRRecResizeImg
from
.randaugment
import
RandAugment
from
.randaugment
import
RandAugment
from
.copy_paste
import
CopyPaste
from
.copy_paste
import
CopyPaste
from
.operators
import
*
from
.operators
import
*
...
...
ppocr/data/imaug/rec_img_aug.py
浏览文件 @
2bf8ad9b
...
@@ -42,30 +42,21 @@ class ClsResizeImg(object):
...
@@ -42,30 +42,21 @@ class ClsResizeImg(object):
data
[
'image'
]
=
norm_img
data
[
'image'
]
=
norm_img
return
data
return
data
class
PILResize
(
object
):
def
__init__
(
self
,
image_shape
,
**
kwargs
):
self
.
image_shape
=
image_shape
def
__call__
(
self
,
data
):
img
=
data
[
'image'
]
image_pil
=
Image
.
fromarray
(
np
.
uint8
(
img
))
norm_img
=
image_pil
.
resize
(
self
.
image_shape
,
Image
.
ANTIALIAS
)
norm_img
=
np
.
array
(
norm_img
)
norm_img
=
np
.
expand_dims
(
norm_img
,
-
1
)
norm_img
=
norm_img
.
transpose
((
2
,
0
,
1
))
data
[
'image'
]
=
norm_img
.
astype
(
np
.
float32
)
/
128.
-
1.
return
data
class
NRTRRecResizeImg
(
object
):
class
CVResize
(
object
):
def
__init__
(
self
,
image_shape
,
resize_type
,
**
kwargs
):
def
__init__
(
self
,
image_shape
,
**
kwargs
):
self
.
image_shape
=
image_shape
self
.
image_shape
=
image_shape
self
.
resize_type
=
resize_type
def
__call__
(
self
,
data
):
def
__call__
(
self
,
data
):
img
=
data
[
'image'
]
img
=
data
[
'image'
]
#print(img)
if
self
.
resize_type
==
'PIL'
:
norm_img
=
cv2
.
resize
(
img
,
self
.
image_shape
)
image_pil
=
Image
.
fromarray
(
np
.
uint8
(
img
))
norm_img
=
np
.
expand_dims
(
norm_img
,
-
1
)
img
=
image_pil
.
resize
(
self
.
image_shape
,
Image
.
ANTIALIAS
)
img
=
np
.
array
(
img
)
if
self
.
resize_type
==
'OpenCV'
:
img
=
cv2
.
resize
(
img
,
self
.
image_shape
)
norm_img
=
np
.
expand_dims
(
img
,
-
1
)
norm_img
=
norm_img
.
transpose
((
2
,
0
,
1
))
norm_img
=
norm_img
.
transpose
((
2
,
0
,
1
))
data
[
'image'
]
=
norm_img
.
astype
(
np
.
float32
)
/
128.
-
1.
data
[
'image'
]
=
norm_img
.
astype
(
np
.
float32
)
/
128.
-
1.
return
data
return
data
...
...
ppocr/losses/rec_nrtr_loss.py
浏览文件 @
2bf8ad9b
...
@@ -3,34 +3,26 @@ from paddle import nn
...
@@ -3,34 +3,26 @@ from paddle import nn
import
paddle.nn.functional
as
F
import
paddle.nn.functional
as
F
def
cal_performance
(
pred
,
tgt
):
pred
=
pred
.
max
(
1
)[
1
]
tgt
=
tgt
.
contiguous
().
view
(
-
1
)
non_pad_mask
=
tgt
.
ne
(
0
)
n_correct
=
pred
.
eq
(
tgt
)
n_correct
=
n_correct
.
masked_select
(
non_pad_mask
).
sum
().
item
()
return
n_correct
class
NRTRLoss
(
nn
.
Layer
):
class
NRTRLoss
(
nn
.
Layer
):
def
__init__
(
self
,
smoothing
=
True
,
**
kwargs
):
def
__init__
(
self
,
smoothing
=
True
,
**
kwargs
):
super
(
NRTRLoss
,
self
).
__init__
()
super
(
NRTRLoss
,
self
).
__init__
()
self
.
loss_func
=
nn
.
CrossEntropyLoss
(
reduction
=
'mean'
,
ignore_index
=
0
)
self
.
loss_func
=
nn
.
CrossEntropyLoss
(
reduction
=
'mean'
,
ignore_index
=
0
)
self
.
smoothing
=
smoothing
self
.
smoothing
=
smoothing
def
forward
(
self
,
pred
,
batch
):
def
forward
(
self
,
pred
,
batch
):
pred
=
pred
.
reshape
([
-
1
,
pred
.
shape
[
2
]])
pred
=
pred
.
reshape
([
-
1
,
pred
.
shape
[
2
]])
max_len
=
batch
[
2
].
max
()
max_len
=
batch
[
2
].
max
()
tgt
=
batch
[
1
][:,
1
:
2
+
max_len
]
tgt
=
batch
[
1
][:,
1
:
2
+
max_len
]
tgt
=
tgt
.
reshape
([
-
1
]
)
tgt
=
tgt
.
reshape
([
-
1
])
if
self
.
smoothing
:
if
self
.
smoothing
:
eps
=
0.1
eps
=
0.1
n_class
=
pred
.
shape
[
1
]
n_class
=
pred
.
shape
[
1
]
one_hot
=
F
.
one_hot
(
tgt
,
pred
.
shape
[
1
])
one_hot
=
F
.
one_hot
(
tgt
,
pred
.
shape
[
1
])
one_hot
=
one_hot
*
(
1
-
eps
)
+
(
1
-
one_hot
)
*
eps
/
(
n_class
-
1
)
one_hot
=
one_hot
*
(
1
-
eps
)
+
(
1
-
one_hot
)
*
eps
/
(
n_class
-
1
)
log_prb
=
F
.
log_softmax
(
pred
,
axis
=
1
)
log_prb
=
F
.
log_softmax
(
pred
,
axis
=
1
)
non_pad_mask
=
paddle
.
not_equal
(
tgt
,
paddle
.
zeros
(
tgt
.
shape
,
dtype
=
'int64'
))
non_pad_mask
=
paddle
.
not_equal
(
tgt
,
paddle
.
zeros
(
tgt
.
shape
,
dtype
=
'int64'
))
loss
=
-
(
one_hot
*
log_prb
).
sum
(
axis
=
1
)
loss
=
-
(
one_hot
*
log_prb
).
sum
(
axis
=
1
)
loss
=
loss
.
masked_select
(
non_pad_mask
).
mean
()
loss
=
loss
.
masked_select
(
non_pad_mask
).
mean
()
else
:
else
:
...
...
ppocr/modeling/heads/__init__.py
浏览文件 @
2bf8ad9b
...
@@ -26,13 +26,13 @@ def build_head(config):
...
@@ -26,13 +26,13 @@ def build_head(config):
from
.rec_ctc_head
import
CTCHead
from
.rec_ctc_head
import
CTCHead
from
.rec_att_head
import
AttentionHead
from
.rec_att_head
import
AttentionHead
from
.rec_srn_head
import
SRNHead
from
.rec_srn_head
import
SRNHead
from
.rec_nrtr_
optim_head
import
TransformerOptim
from
.rec_nrtr_
head
import
Transformer
# cls head
# cls head
from
.cls_head
import
ClsHead
from
.cls_head
import
ClsHead
support_dict
=
[
support_dict
=
[
'DBHead'
,
'EASTHead'
,
'SASTHead'
,
'CTCHead'
,
'ClsHead'
,
'AttentionHead'
,
'DBHead'
,
'EASTHead'
,
'SASTHead'
,
'CTCHead'
,
'ClsHead'
,
'AttentionHead'
,
'SRNHead'
,
'PGHead'
,
'Transformer
Optim
'
,
'TableAttentionHead'
'SRNHead'
,
'PGHead'
,
'Transformer'
,
'TableAttentionHead'
]
]
#table head
#table head
...
...
ppocr/modeling/heads/multiheadAttention.py
浏览文件 @
2bf8ad9b
...
@@ -24,7 +24,7 @@ zeros_ = constant_(value=0.)
...
@@ -24,7 +24,7 @@ zeros_ = constant_(value=0.)
ones_
=
constant_
(
value
=
1.
)
ones_
=
constant_
(
value
=
1.
)
class
MultiheadAttention
Optim
(
nn
.
Layer
):
class
MultiheadAttention
(
nn
.
Layer
):
"""Allows the model to jointly attend to information
"""Allows the model to jointly attend to information
from different representation subspaces.
from different representation subspaces.
See reference: Attention Is All You Need
See reference: Attention Is All You Need
...
@@ -46,7 +46,7 @@ class MultiheadAttentionOptim(nn.Layer):
...
@@ -46,7 +46,7 @@ class MultiheadAttentionOptim(nn.Layer):
bias
=
True
,
bias
=
True
,
add_bias_kv
=
False
,
add_bias_kv
=
False
,
add_zero_attn
=
False
):
add_zero_attn
=
False
):
super
(
MultiheadAttention
Optim
,
self
).
__init__
()
super
(
MultiheadAttention
,
self
).
__init__
()
self
.
embed_dim
=
embed_dim
self
.
embed_dim
=
embed_dim
self
.
num_heads
=
num_heads
self
.
num_heads
=
num_heads
self
.
dropout
=
dropout
self
.
dropout
=
dropout
...
...
ppocr/modeling/heads/rec_nrtr_
optim_
head.py
→
ppocr/modeling/heads/rec_nrtr_head.py
浏览文件 @
2bf8ad9b
...
@@ -21,7 +21,7 @@ from paddle.nn import LayerList
...
@@ -21,7 +21,7 @@ from paddle.nn import LayerList
from
paddle.nn.initializer
import
XavierNormal
as
xavier_uniform_
from
paddle.nn.initializer
import
XavierNormal
as
xavier_uniform_
from
paddle.nn
import
Dropout
,
Linear
,
LayerNorm
,
Conv2D
from
paddle.nn
import
Dropout
,
Linear
,
LayerNorm
,
Conv2D
import
numpy
as
np
import
numpy
as
np
from
ppocr.modeling.heads.multiheadAttention
import
MultiheadAttention
Optim
from
ppocr.modeling.heads.multiheadAttention
import
MultiheadAttention
from
paddle.nn.initializer
import
Constant
as
constant_
from
paddle.nn.initializer
import
Constant
as
constant_
from
paddle.nn.initializer
import
XavierNormal
as
xavier_normal_
from
paddle.nn.initializer
import
XavierNormal
as
xavier_normal_
...
@@ -29,7 +29,7 @@ zeros_ = constant_(value=0.)
...
@@ -29,7 +29,7 @@ zeros_ = constant_(value=0.)
ones_
=
constant_
(
value
=
1.
)
ones_
=
constant_
(
value
=
1.
)
class
Transformer
Optim
(
nn
.
Layer
):
class
Transformer
(
nn
.
Layer
):
"""A transformer model. User is able to modify the attributes as needed. The architechture
"""A transformer model. User is able to modify the attributes as needed. The architechture
is based on the paper "Attention Is All You Need". Ashish Vaswani, Noam Shazeer,
is based on the paper "Attention Is All You Need". Ashish Vaswani, Noam Shazeer,
Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and
Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and
...
@@ -63,7 +63,7 @@ class TransformerOptim(nn.Layer):
...
@@ -63,7 +63,7 @@ class TransformerOptim(nn.Layer):
out_channels
=
0
,
out_channels
=
0
,
dst_vocab_size
=
99
,
dst_vocab_size
=
99
,
scale_embedding
=
True
):
scale_embedding
=
True
):
super
(
Transformer
Optim
,
self
).
__init__
()
super
(
Transformer
,
self
).
__init__
()
self
.
embedding
=
Embeddings
(
self
.
embedding
=
Embeddings
(
d_model
=
d_model
,
d_model
=
d_model
,
vocab
=
dst_vocab_size
,
vocab
=
dst_vocab_size
,
...
@@ -215,8 +215,7 @@ class TransformerOptim(nn.Layer):
...
@@ -215,8 +215,7 @@ class TransformerOptim(nn.Layer):
n_curr_active_inst
=
len
(
curr_active_inst_idx
)
n_curr_active_inst
=
len
(
curr_active_inst_idx
)
new_shape
=
(
n_curr_active_inst
*
n_bm
,
*
d_hs
)
new_shape
=
(
n_curr_active_inst
*
n_bm
,
*
d_hs
)
beamed_tensor
=
beamed_tensor
.
reshape
(
beamed_tensor
=
beamed_tensor
.
reshape
([
n_prev_active_inst
,
-
1
])
[
n_prev_active_inst
,
-
1
])
beamed_tensor
=
beamed_tensor
.
index_select
(
beamed_tensor
=
beamed_tensor
.
index_select
(
paddle
.
to_tensor
(
curr_active_inst_idx
),
axis
=
0
)
paddle
.
to_tensor
(
curr_active_inst_idx
),
axis
=
0
)
beamed_tensor
=
beamed_tensor
.
reshape
([
*
new_shape
])
beamed_tensor
=
beamed_tensor
.
reshape
([
*
new_shape
])
...
@@ -486,7 +485,7 @@ class TransformerEncoderLayer(nn.Layer):
...
@@ -486,7 +485,7 @@ class TransformerEncoderLayer(nn.Layer):
attention_dropout_rate
=
0.0
,
attention_dropout_rate
=
0.0
,
residual_dropout_rate
=
0.1
):
residual_dropout_rate
=
0.1
):
super
(
TransformerEncoderLayer
,
self
).
__init__
()
super
(
TransformerEncoderLayer
,
self
).
__init__
()
self
.
self_attn
=
MultiheadAttention
Optim
(
self
.
self_attn
=
MultiheadAttention
(
d_model
,
nhead
,
dropout
=
attention_dropout_rate
)
d_model
,
nhead
,
dropout
=
attention_dropout_rate
)
self
.
conv1
=
Conv2D
(
self
.
conv1
=
Conv2D
(
...
@@ -555,9 +554,9 @@ class TransformerDecoderLayer(nn.Layer):
...
@@ -555,9 +554,9 @@ class TransformerDecoderLayer(nn.Layer):
attention_dropout_rate
=
0.0
,
attention_dropout_rate
=
0.0
,
residual_dropout_rate
=
0.1
):
residual_dropout_rate
=
0.1
):
super
(
TransformerDecoderLayer
,
self
).
__init__
()
super
(
TransformerDecoderLayer
,
self
).
__init__
()
self
.
self_attn
=
MultiheadAttention
Optim
(
self
.
self_attn
=
MultiheadAttention
(
d_model
,
nhead
,
dropout
=
attention_dropout_rate
)
d_model
,
nhead
,
dropout
=
attention_dropout_rate
)
self
.
multihead_attn
=
MultiheadAttention
Optim
(
self
.
multihead_attn
=
MultiheadAttention
(
d_model
,
nhead
,
dropout
=
attention_dropout_rate
)
d_model
,
nhead
,
dropout
=
attention_dropout_rate
)
self
.
conv1
=
Conv2D
(
self
.
conv1
=
Conv2D
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录