Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleOCR
提交
2f9f258f
P
PaddleOCR
项目概览
PaddlePaddle
/
PaddleOCR
大约 1 年 前同步成功
通知
1528
Star
32962
Fork
6643
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
108
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleOCR
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
108
Issue
108
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
2f9f258f
编写于
11月 10, 2020
作者:
W
WenmuZhou
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
添加tps网络
上级
ff0f23d4
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
398 addition
and
6 deletion
+398
-6
configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
+100
-0
ppocr/modeling/architectures/base_model.py
ppocr/modeling/architectures/base_model.py
+6
-5
ppocr/modeling/transform/__init__.py
ppocr/modeling/transform/__init__.py
+3
-1
ppocr/modeling/transform/tps.py
ppocr/modeling/transform/tps.py
+289
-0
未找到文件。
configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
0 → 100644
浏览文件 @
2f9f258f
Global
:
use_gpu
:
true
epoch_num
:
72
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/rec/r34_vd_tps_bilstm_ctc/
save_epoch_step
:
3
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step
:
[
0
,
2000
]
# if pretrained_model is saved in static mode, load_static_weights must set to True
cal_metric_during_train
:
True
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
False
infer_img
:
doc/imgs_words/ch/word_1.jpg
# for data or label process
character_dict_path
:
character_type
:
en
max_text_length
:
25
infer_mode
:
False
use_space_char
:
False
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
learning_rate
:
0.0005
regularizer
:
name
:
'
L2'
factor
:
0
Architecture
:
model_type
:
rec
algorithm
:
CRNN
Transform
:
name
:
TPS
num_fiducial
:
20
loc_lr
:
0.1
model_name
:
small
Backbone
:
name
:
ResNet
layers
:
34
Neck
:
name
:
SequenceEncoder
encoder_type
:
rnn
hidden_size
:
256
Head
:
name
:
CTCHead
fc_decay
:
0
Loss
:
name
:
CTCLoss
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
Train
:
dataset
:
name
:
LMDBDateSet
data_dir
:
./train_data/data_lmdb_release/training/
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
CTCLabelEncode
:
# Class handling label
-
RecResizeImg
:
image_shape
:
[
3
,
32
,
100
]
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
label'
,
'
length'
]
# dataloader will return list in this order
loader
:
shuffle
:
True
batch_size_per_card
:
256
drop_last
:
True
num_workers
:
8
Eval
:
dataset
:
name
:
LMDBDateSet
data_dir
:
./train_data/data_lmdb_release/validation/
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
CTCLabelEncode
:
# Class handling label
-
RecResizeImg
:
image_shape
:
[
3
,
32
,
100
]
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
label'
,
'
length'
]
# dataloader will return list in this order
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
256
num_workers
:
4
ppocr/modeling/architectures/base_model.py
浏览文件 @
2f9f258f
...
...
@@ -16,13 +16,14 @@ from __future__ import division
from
__future__
import
print_function
from
paddle
import
nn
from
ppocr.modeling.transform
import
build_transform
from
ppocr.modeling.backbones
import
build_backbone
from
ppocr.modeling.necks
import
build_neck
from
ppocr.modeling.heads
import
build_head
__all__
=
[
'BaseModel'
]
class
BaseModel
(
nn
.
Layer
):
def
__init__
(
self
,
config
):
"""
...
...
ppocr/modeling/transform/__init__.py
浏览文件 @
2f9f258f
...
...
@@ -16,7 +16,9 @@ __all__ = ['build_transform']
def
build_transform
(
config
):
support_dict
=
[
''
]
from
.tps
import
TPS
support_dict
=
[
'TPS'
]
module_name
=
config
.
pop
(
'name'
)
assert
module_name
in
support_dict
,
Exception
(
...
...
ppocr/modeling/transform/tps.py
0 → 100644
浏览文件 @
2f9f258f
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
nn
,
ParamAttr
from
paddle.nn
import
functional
as
F
import
numpy
as
np
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
groups
=
1
,
act
=
None
,
name
=
None
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
conv
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
(
kernel_size
-
1
)
//
2
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
bias_attr
=
False
)
bn_name
=
"bn_"
+
name
self
.
bn
=
nn
.
BatchNorm
(
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
bn_name
+
'_scale'
),
bias_attr
=
ParamAttr
(
bn_name
+
'_offset'
),
moving_mean_name
=
bn_name
+
'_mean'
,
moving_variance_name
=
bn_name
+
'_variance'
)
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
return
x
class
LocalizationNetwork
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
num_fiducial
,
loc_lr
,
model_name
):
super
(
LocalizationNetwork
,
self
).
__init__
()
self
.
F
=
num_fiducial
F
=
num_fiducial
if
model_name
==
"large"
:
num_filters_list
=
[
64
,
128
,
256
,
512
]
fc_dim
=
256
else
:
num_filters_list
=
[
16
,
32
,
64
,
128
]
fc_dim
=
64
self
.
block_list
=
[]
for
fno
in
range
(
0
,
len
(
num_filters_list
)):
num_filters
=
num_filters_list
[
fno
]
name
=
"loc_conv%d"
%
fno
conv
=
self
.
add_sublayer
(
name
,
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
num_filters
,
kernel_size
=
3
,
act
=
'relu'
,
name
=
name
))
self
.
block_list
.
append
(
conv
)
if
fno
==
len
(
num_filters_list
)
-
1
:
pool
=
nn
.
AdaptiveAvgPool2D
(
1
)
else
:
pool
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
)
in_channels
=
num_filters
self
.
block_list
.
append
(
pool
)
name
=
"loc_fc1"
self
.
fc1
=
nn
.
Linear
(
in_channels
,
fc_dim
,
weight_attr
=
ParamAttr
(
learning_rate
=
loc_lr
,
name
=
name
+
"_w"
),
bias_attr
=
ParamAttr
(
name
=
name
+
'.b_0'
),
name
=
name
)
# Init fc2 in LocalizationNetwork
initial_bias
=
self
.
get_initial_fiducials
()
initial_bias
=
initial_bias
.
reshape
(
-
1
)
name
=
"loc_fc2"
param_attr
=
ParamAttr
(
learning_rate
=
loc_lr
,
initializer
=
paddle
.
fluid
.
initializer
.
NumpyArrayInitializer
(
np
.
zeros
([
fc_dim
,
F
*
2
])),
name
=
name
+
"_w"
)
bias_attr
=
ParamAttr
(
learning_rate
=
loc_lr
,
initializer
=
paddle
.
fluid
.
initializer
.
NumpyArrayInitializer
(
initial_bias
),
name
=
name
+
"_b"
)
self
.
fc2
=
nn
.
Linear
(
fc_dim
,
F
*
2
,
weight_attr
=
param_attr
,
bias_attr
=
bias_attr
,
name
=
name
)
self
.
out_channels
=
F
*
2
def
forward
(
self
,
x
):
"""
Estimating parameters of geometric transformation
Args:
image: input
Return:
batch_C_prime: the matrix of the geometric transformation
"""
B
=
x
.
shape
[
0
]
i
=
0
for
block
in
self
.
block_list
:
x
=
block
(
x
)
x
=
x
.
reshape
([
B
,
-
1
])
x
=
self
.
fc1
(
x
)
x
=
F
.
relu
(
x
)
x
=
self
.
fc2
(
x
)
x
=
x
.
reshape
(
shape
=
[
-
1
,
self
.
F
,
2
])
return
x
def
get_initial_fiducials
(
self
):
""" see RARE paper Fig. 6 (a) """
F
=
self
.
F
ctrl_pts_x
=
np
.
linspace
(
-
1.0
,
1.0
,
int
(
F
/
2
))
ctrl_pts_y_top
=
np
.
linspace
(
0.0
,
-
1.0
,
num
=
int
(
F
/
2
))
ctrl_pts_y_bottom
=
np
.
linspace
(
1.0
,
0.0
,
num
=
int
(
F
/
2
))
ctrl_pts_top
=
np
.
stack
([
ctrl_pts_x
,
ctrl_pts_y_top
],
axis
=
1
)
ctrl_pts_bottom
=
np
.
stack
([
ctrl_pts_x
,
ctrl_pts_y_bottom
],
axis
=
1
)
initial_bias
=
np
.
concatenate
([
ctrl_pts_top
,
ctrl_pts_bottom
],
axis
=
0
)
return
initial_bias
class
GridGenerator
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
num_fiducial
):
super
(
GridGenerator
,
self
).
__init__
()
self
.
eps
=
1e-6
self
.
F
=
num_fiducial
name
=
"ex_fc"
initializer
=
nn
.
initializer
.
Constant
(
value
=
0.0
)
param_attr
=
ParamAttr
(
learning_rate
=
0.0
,
initializer
=
initializer
,
name
=
name
+
"_w"
)
bias_attr
=
ParamAttr
(
learning_rate
=
0.0
,
initializer
=
initializer
,
name
=
name
+
"_b"
)
self
.
fc
=
nn
.
Linear
(
in_channels
,
6
,
weight_attr
=
param_attr
,
bias_attr
=
bias_attr
,
name
=
name
)
def
forward
(
self
,
batch_C_prime
,
I_r_size
):
"""
Generate the grid for the grid_sampler.
Args:
batch_C_prime: the matrix of the geometric transformation
I_r_size: the shape of the input image
Return:
batch_P_prime: the grid for the grid_sampler
"""
C
=
self
.
build_C
()
P
=
self
.
build_P
(
I_r_size
)
inv_delta_C
=
self
.
build_inv_delta_C
(
C
).
astype
(
'float32'
)
P_hat
=
self
.
build_P_hat
(
C
,
P
).
astype
(
'float32'
)
inv_delta_C_tensor
=
paddle
.
to_tensor
(
inv_delta_C
)
inv_delta_C_tensor
.
stop_gradient
=
True
P_hat_tensor
=
paddle
.
to_tensor
(
P_hat
)
P_hat_tensor
.
stop_gradient
=
True
batch_C_ex_part_tensor
=
self
.
get_expand_tensor
(
batch_C_prime
)
batch_C_ex_part_tensor
.
stop_gradient
=
True
batch_C_prime_with_zeros
=
paddle
.
concat
(
[
batch_C_prime
,
batch_C_ex_part_tensor
],
axis
=
1
)
batch_T
=
paddle
.
matmul
(
inv_delta_C_tensor
,
batch_C_prime_with_zeros
)
batch_P_prime
=
paddle
.
matmul
(
P_hat_tensor
,
batch_T
)
return
batch_P_prime
def
build_C
(
self
):
""" Return coordinates of fiducial points in I_r; C """
F
=
self
.
F
ctrl_pts_x
=
np
.
linspace
(
-
1.0
,
1.0
,
int
(
F
/
2
))
ctrl_pts_y_top
=
-
1
*
np
.
ones
(
int
(
F
/
2
))
ctrl_pts_y_bottom
=
np
.
ones
(
int
(
F
/
2
))
ctrl_pts_top
=
np
.
stack
([
ctrl_pts_x
,
ctrl_pts_y_top
],
axis
=
1
)
ctrl_pts_bottom
=
np
.
stack
([
ctrl_pts_x
,
ctrl_pts_y_bottom
],
axis
=
1
)
C
=
np
.
concatenate
([
ctrl_pts_top
,
ctrl_pts_bottom
],
axis
=
0
)
return
C
# F x 2
def
build_P
(
self
,
I_r_size
):
I_r_width
,
I_r_height
=
I_r_size
I_r_grid_x
=
(
np
.
arange
(
-
I_r_width
,
I_r_width
,
2
)
+
1.0
)
\
/
I_r_width
# self.I_r_width
I_r_grid_y
=
(
np
.
arange
(
-
I_r_height
,
I_r_height
,
2
)
+
1.0
)
\
/
I_r_height
# self.I_r_height
# P: self.I_r_width x self.I_r_height x 2
P
=
np
.
stack
(
np
.
meshgrid
(
I_r_grid_x
,
I_r_grid_y
),
axis
=
2
)
# n (= self.I_r_width x self.I_r_height) x 2
return
P
.
reshape
([
-
1
,
2
])
def
build_inv_delta_C
(
self
,
C
):
""" Return inv_delta_C which is needed to calculate T """
F
=
self
.
F
hat_C
=
np
.
zeros
((
F
,
F
),
dtype
=
float
)
# F x F
for
i
in
range
(
0
,
F
):
for
j
in
range
(
i
,
F
):
r
=
np
.
linalg
.
norm
(
C
[
i
]
-
C
[
j
])
hat_C
[
i
,
j
]
=
r
hat_C
[
j
,
i
]
=
r
np
.
fill_diagonal
(
hat_C
,
1
)
hat_C
=
(
hat_C
**
2
)
*
np
.
log
(
hat_C
)
# print(C.shape, hat_C.shape)
delta_C
=
np
.
concatenate
(
# F+3 x F+3
[
np
.
concatenate
(
[
np
.
ones
((
F
,
1
)),
C
,
hat_C
],
axis
=
1
),
# F x F+3
np
.
concatenate
(
[
np
.
zeros
((
2
,
3
)),
np
.
transpose
(
C
)],
axis
=
1
),
# 2 x F+3
np
.
concatenate
(
[
np
.
zeros
((
1
,
3
)),
np
.
ones
((
1
,
F
))],
axis
=
1
)
# 1 x F+3
],
axis
=
0
)
inv_delta_C
=
np
.
linalg
.
inv
(
delta_C
)
return
inv_delta_C
# F+3 x F+3
def
build_P_hat
(
self
,
C
,
P
):
F
=
self
.
F
eps
=
self
.
eps
n
=
P
.
shape
[
0
]
# n (= self.I_r_width x self.I_r_height)
# P_tile: n x 2 -> n x 1 x 2 -> n x F x 2
P_tile
=
np
.
tile
(
np
.
expand_dims
(
P
,
axis
=
1
),
(
1
,
F
,
1
))
C_tile
=
np
.
expand_dims
(
C
,
axis
=
0
)
# 1 x F x 2
P_diff
=
P_tile
-
C_tile
# n x F x 2
# rbf_norm: n x F
rbf_norm
=
np
.
linalg
.
norm
(
P_diff
,
ord
=
2
,
axis
=
2
,
keepdims
=
False
)
# rbf: n x F
rbf
=
np
.
multiply
(
np
.
square
(
rbf_norm
),
np
.
log
(
rbf_norm
+
eps
))
P_hat
=
np
.
concatenate
([
np
.
ones
((
n
,
1
)),
P
,
rbf
],
axis
=
1
)
return
P_hat
# n x F+3
def
get_expand_tensor
(
self
,
batch_C_prime
):
B
=
batch_C_prime
.
shape
[
0
]
batch_C_prime
=
batch_C_prime
.
reshape
([
B
,
-
1
])
batch_C_ex_part_tensor
=
self
.
fc
(
batch_C_prime
)
batch_C_ex_part_tensor
=
batch_C_ex_part_tensor
.
reshape
([
-
1
,
3
,
2
])
return
batch_C_ex_part_tensor
class
TPS
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
num_fiducial
,
loc_lr
,
model_name
):
super
(
TPS
,
self
).
__init__
()
self
.
loc_net
=
LocalizationNetwork
(
in_channels
,
num_fiducial
,
loc_lr
,
model_name
)
self
.
grid_generator
=
GridGenerator
(
self
.
loc_net
.
out_channels
,
num_fiducial
)
self
.
out_channels
=
in_channels
def
forward
(
self
,
image
):
image
.
stop_gradient
=
False
I_r_size
=
[
image
.
shape
[
3
],
image
.
shape
[
2
]]
batch_C_prime
=
self
.
loc_net
(
image
)
batch_P_prime
=
self
.
grid_generator
(
batch_C_prime
,
I_r_size
)
batch_P_prime
=
batch_P_prime
.
reshape
(
[
-
1
,
image
.
shape
[
2
],
image
.
shape
[
3
],
2
])
batch_I_r
=
F
.
grid_sample
(
x
=
image
,
grid
=
batch_P_prime
)
return
batch_I_r
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录