Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleOCR
提交
096fd271
P
PaddleOCR
项目概览
PaddlePaddle
/
PaddleOCR
1 年多 前同步成功
通知
1541
Star
32964
Fork
6643
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
108
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleOCR
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
108
Issue
108
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
096fd271
编写于
5月 23, 2023
作者:
X
xiaoting
提交者:
GitHub
5月 23, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix sr_telescope (#10004)
上级
2c0664b2
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
50 addition
and
30 deletion
+50
-30
ppocr/modeling/heads/sr_rensnet_transformer.py
ppocr/modeling/heads/sr_rensnet_transformer.py
+1
-1
ppocr/modeling/transforms/tbsrn.py
ppocr/modeling/transforms/tbsrn.py
+47
-28
tools/export_model.py
tools/export_model.py
+2
-1
未找到文件。
ppocr/modeling/heads/sr_rensnet_transformer.py
浏览文件 @
096fd271
...
...
@@ -78,7 +78,7 @@ class MultiHeadedAttention(nn.Layer):
def
forward
(
self
,
query
,
key
,
value
,
mask
=
None
,
attention_map
=
None
):
if
mask
is
not
None
:
mask
=
mask
.
unsqueeze
(
1
)
nbatches
=
query
.
shape
[
0
]
nbatches
=
paddle
.
shape
(
query
)
[
0
]
query
,
key
,
value
=
\
[
paddle
.
transpose
(
l
(
x
).
reshape
([
nbatches
,
-
1
,
self
.
h
,
self
.
d_k
]),
[
0
,
2
,
1
,
3
])
...
...
ppocr/modeling/transforms/tbsrn.py
浏览文件 @
096fd271
...
...
@@ -45,21 +45,24 @@ def positionalencoding2d(d_model, height, width):
pe
=
paddle
.
zeros
([
d_model
,
height
,
width
])
# Each dimension use half of d_model
d_model
=
int
(
d_model
/
2
)
div_term
=
paddle
.
exp
(
paddle
.
arange
(
0.
,
d_model
,
2
)
*
-
(
math
.
log
(
10000.0
)
/
d_model
))
div_term
=
paddle
.
exp
(
paddle
.
arange
(
0.
,
d_model
,
2
)
*
-
(
math
.
log
(
10000.0
)
/
d_model
))
pos_w
=
paddle
.
arange
(
0.
,
width
,
dtype
=
'float32'
).
unsqueeze
(
1
)
pos_h
=
paddle
.
arange
(
0.
,
height
,
dtype
=
'float32'
).
unsqueeze
(
1
)
pe
[
0
:
d_model
:
2
,
:,
:]
=
paddle
.
sin
(
pos_w
*
div_term
).
transpose
([
1
,
0
]).
unsqueeze
(
1
).
tile
([
1
,
height
,
1
])
pe
[
1
:
d_model
:
2
,
:,
:]
=
paddle
.
cos
(
pos_w
*
div_term
).
transpose
([
1
,
0
]).
unsqueeze
(
1
).
tile
([
1
,
height
,
1
])
pe
[
d_model
::
2
,
:,
:]
=
paddle
.
sin
(
pos_h
*
div_term
).
transpose
([
1
,
0
]).
unsqueeze
(
2
).
tile
([
1
,
1
,
width
])
pe
[
d_model
+
1
::
2
,
:,
:]
=
paddle
.
cos
(
pos_h
*
div_term
).
transpose
([
1
,
0
]).
unsqueeze
(
2
).
tile
([
1
,
1
,
width
])
pe
[
0
:
d_model
:
2
,
:,
:]
=
paddle
.
sin
(
pos_w
*
div_term
).
transpose
(
[
1
,
0
]).
unsqueeze
(
1
).
tile
([
1
,
height
,
1
])
pe
[
1
:
d_model
:
2
,
:,
:]
=
paddle
.
cos
(
pos_w
*
div_term
).
transpose
(
[
1
,
0
]).
unsqueeze
(
1
).
tile
([
1
,
height
,
1
])
pe
[
d_model
::
2
,
:,
:]
=
paddle
.
sin
(
pos_h
*
div_term
).
transpose
(
[
1
,
0
]).
unsqueeze
(
2
).
tile
([
1
,
1
,
width
])
pe
[
d_model
+
1
::
2
,
:,
:]
=
paddle
.
cos
(
pos_h
*
div_term
).
transpose
(
[
1
,
0
]).
unsqueeze
(
2
).
tile
([
1
,
1
,
width
])
return
pe
class
FeatureEnhancer
(
nn
.
Layer
):
def
__init__
(
self
):
super
(
FeatureEnhancer
,
self
).
__init__
()
...
...
@@ -77,13 +80,16 @@ class FeatureEnhancer(nn.Layer):
global_info: (batch, embedding_size, 1, 1)
conv_feature: (batch, channel, H, W)
'''
batch
=
conv_feature
.
shape
[
0
]
position2d
=
positionalencoding2d
(
64
,
16
,
64
).
cast
(
'float32'
).
unsqueeze
(
0
).
reshape
([
1
,
64
,
1024
])
batch
=
paddle
.
shape
(
conv_feature
)[
0
]
position2d
=
positionalencoding2d
(
64
,
16
,
64
).
cast
(
'float32'
).
unsqueeze
(
0
).
reshape
([
1
,
64
,
1024
])
position2d
=
position2d
.
tile
([
batch
,
1
,
1
])
conv_feature
=
paddle
.
concat
([
conv_feature
,
position2d
],
1
)
# batch, 128(64+64), 32, 128
conv_feature
=
paddle
.
concat
([
conv_feature
,
position2d
],
1
)
# batch, 128(64+64), 32, 128
result
=
conv_feature
.
transpose
([
0
,
2
,
1
])
origin_result
=
result
result
=
self
.
mul_layernorm1
(
origin_result
+
self
.
multihead
(
result
,
result
,
result
,
mask
=
None
)[
0
])
result
=
self
.
mul_layernorm1
(
origin_result
+
self
.
multihead
(
result
,
result
,
result
,
mask
=
None
)[
0
])
origin_result
=
result
result
=
self
.
mul_layernorm3
(
origin_result
+
self
.
pff
(
result
))
result
=
self
.
linear
(
result
)
...
...
@@ -124,23 +130,35 @@ class TBSRN(nn.Layer):
assert
math
.
log
(
scale_factor
,
2
)
%
1
==
0
upsample_block_num
=
int
(
math
.
log
(
scale_factor
,
2
))
self
.
block1
=
nn
.
Sequential
(
nn
.
Conv2D
(
in_planes
,
2
*
hidden_units
,
kernel_size
=
9
,
padding
=
4
),
nn
.
Conv2D
(
in_planes
,
2
*
hidden_units
,
kernel_size
=
9
,
padding
=
4
),
nn
.
PReLU
()
# nn.ReLU()
)
self
.
srb_nums
=
srb_nums
for
i
in
range
(
srb_nums
):
setattr
(
self
,
'block%d'
%
(
i
+
2
),
RecurrentResidualBlock
(
2
*
hidden_units
))
setattr
(
self
,
'block%d'
%
(
srb_nums
+
2
),
nn
.
Sequential
(
nn
.
Conv2D
(
2
*
hidden_units
,
2
*
hidden_units
,
kernel_size
=
3
,
padding
=
1
),
nn
.
BatchNorm2D
(
2
*
hidden_units
)
))
setattr
(
self
,
'block%d'
%
(
i
+
2
),
RecurrentResidualBlock
(
2
*
hidden_units
))
setattr
(
self
,
'block%d'
%
(
srb_nums
+
2
),
nn
.
Sequential
(
nn
.
Conv2D
(
2
*
hidden_units
,
2
*
hidden_units
,
kernel_size
=
3
,
padding
=
1
),
nn
.
BatchNorm2D
(
2
*
hidden_units
)))
# self.non_local = NonLocalBlock2D(64, 64)
block_
=
[
UpsampleBLock
(
2
*
hidden_units
,
2
)
for
_
in
range
(
upsample_block_num
)]
block_
.
append
(
nn
.
Conv2D
(
2
*
hidden_units
,
in_planes
,
kernel_size
=
9
,
padding
=
4
))
block_
=
[
UpsampleBLock
(
2
*
hidden_units
,
2
)
for
_
in
range
(
upsample_block_num
)
]
block_
.
append
(
nn
.
Conv2D
(
2
*
hidden_units
,
in_planes
,
kernel_size
=
9
,
padding
=
4
))
setattr
(
self
,
'block%d'
%
(
srb_nums
+
3
),
nn
.
Sequential
(
*
block_
))
self
.
tps_inputsize
=
[
height
//
scale_factor
,
width
//
scale_factor
]
tps_outputsize
=
[
height
//
scale_factor
,
width
//
scale_factor
]
...
...
@@ -164,7 +182,8 @@ class TBSRN(nn.Layer):
self
.
english_dict
=
{}
for
index
in
range
(
len
(
self
.
english_alphabet
)):
self
.
english_dict
[
self
.
english_alphabet
[
index
]]
=
index
transformer
=
Transformer
(
alphabet
=
'-0123456789abcdefghijklmnopqrstuvwxyz'
)
transformer
=
Transformer
(
alphabet
=
'-0123456789abcdefghijklmnopqrstuvwxyz'
)
self
.
transformer
=
transformer
for
param
in
self
.
transformer
.
parameters
():
param
.
trainable
=
False
...
...
@@ -219,10 +238,10 @@ class TBSRN(nn.Layer):
# add transformer
label
=
[
str_filt
(
i
,
'lower'
)
+
'-'
for
i
in
x
[
2
]]
length_tensor
,
input_tensor
,
text_gt
=
self
.
label_encoder
(
label
)
hr_pred
,
word_attention_map_gt
,
hr_correct_list
=
self
.
transformer
(
hr_img
,
length_tensor
,
input_tensor
)
sr_pred
,
word_attention_map_pred
,
sr_correct_list
=
self
.
transformer
(
sr_img
,
length_tensor
,
input_tensor
)
hr_pred
,
word_attention_map_gt
,
hr_correct_list
=
self
.
transformer
(
hr_img
,
length_tensor
,
input_tensor
)
sr_pred
,
word_attention_map_pred
,
sr_correct_list
=
self
.
transformer
(
sr_img
,
length_tensor
,
input_tensor
)
output
[
"hr_img"
]
=
hr_img
output
[
"hr_pred"
]
=
hr_pred
output
[
"text_gt"
]
=
text_gt
...
...
@@ -257,8 +276,8 @@ class RecurrentResidualBlock(nn.Layer):
residual
=
self
.
conv2
(
residual
)
residual
=
self
.
bn2
(
residual
)
size
=
residual
.
shape
size
=
paddle
.
shape
(
residual
)
residual
=
residual
.
reshape
([
size
[
0
],
size
[
1
],
-
1
])
residual
=
self
.
feature_enhancer
(
residual
)
residual
=
residual
.
reshape
([
size
[
0
],
size
[
1
],
size
[
2
],
size
[
3
]])
return
x
+
residual
\ No newline at end of file
return
x
+
residual
tools/export_model.py
浏览文件 @
096fd271
...
...
@@ -187,7 +187,8 @@ def export_single_model(model,
shape
=
[
None
]
+
infer_shape
,
dtype
=
"float32"
)
])
if
arch_config
[
"Backbone"
][
"name"
]
==
"PPLCNetV3"
:
if
arch_config
[
"model_type"
]
!=
"sr"
and
arch_config
[
"Backbone"
][
"name"
]
==
"PPLCNetV3"
:
# for rep lcnetv3
for
layer
in
model
.
sublayers
():
if
hasattr
(
layer
,
"rep"
)
and
not
getattr
(
layer
,
"is_repped"
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录