Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
hapi
提交
27afc286
H
hapi
项目概览
PaddlePaddle
/
hapi
通知
11
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
4
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
H
hapi
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
4
Issue
4
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
27afc286
编写于
4月 05, 2020
作者:
G
guosheng
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Update Additive Attention followed by GRU
上级
e4e393c8
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
244 addition
and
164 deletion
+244
-164
seq2seq/seq2seq_add_attn.py
seq2seq/seq2seq_add_attn.py
+104
-164
seq2seq/train_ocr.py
seq2seq/train_ocr.py
+140
-0
未找到文件。
seq2seq/seq2seq_add_attn.py
浏览文件 @
27afc286
import
numpy
as
np
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddle.fluid.layers
as
layers
from
paddle.fluid.dygraph.nn
import
Conv2D
,
Pool2D
,
Linear
,
BatchNorm
,
Embedding
,
GRUUnit
from
paddle.fluid.dygraph.nn
import
Conv2D
,
Pool2D
,
Linear
,
BatchNorm
,
Embedding
,
GRUUnit
from
text
import
DynamicDecode
,
RNN
,
BasicLSTMCell
,
RNNCell
from
text
import
DynamicDecode
,
RNN
,
RNNCell
from
model
import
Model
,
Loss
from
model
import
Model
,
Loss
...
@@ -91,82 +92,70 @@ class OCRConv(fluid.dygraph.Layer):
...
@@ -91,82 +92,70 @@ class OCRConv(fluid.dygraph.Layer):
return
inputs_4
return
inputs_4
class
SimpleAttention
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
decoder_size
):
super
(
SimpleAttention
,
self
).
__init__
()
self
.
fc1
=
Linear
(
decoder_size
,
decoder_size
,
bias_attr
=
False
)
self
.
fc2
=
Linear
(
decoder_size
,
1
,
bias_attr
=
False
)
def
forward
(
self
,
encoder_vec
,
encoder_proj
,
decoder_state
):
decoder_state
=
self
.
fc1
(
decoder_state
)
decoder_state
=
fluid
.
layers
.
unsqueeze
(
decoder_state
,
[
1
])
mix
=
fluid
.
layers
.
elementwise_add
(
encoder_proj
,
decoder_state
)
mix
=
fluid
.
layers
.
tanh
(
x
=
mix
)
attn_score
=
self
.
fc2
(
mix
)
attn_scores
=
layers
.
squeeze
(
attn_score
,
[
2
])
attn_scores
=
fluid
.
layers
.
softmax
(
attn_scores
)
scaled
=
fluid
.
layers
.
elementwise_mul
(
x
=
encoder_vec
,
y
=
attn_scores
,
axis
=
0
)
context
=
fluid
.
layers
.
reduce_sum
(
scaled
,
dim
=
1
)
return
context
class
GRUCell
(
RNNCell
):
class
GRUCell
(
RNNCell
):
def
__init__
(
self
,
def
__init__
(
self
,
size
,
input_size
,
hidden_size
,
param_attr
=
None
,
param_attr
=
None
,
bias_attr
=
None
,
bias_attr
=
None
,
is_reverse
=
False
,
gate_activation
=
'sigmoid'
,
gate_activation
=
'sigmoid'
,
candidate_activation
=
'tanh'
,
candidate_activation
=
'tanh'
,
origin_mode
=
False
,
origin_mode
=
False
):
init_size
=
None
):
super
(
GRUCell
,
self
).
__init__
()
super
(
GRUCell
,
self
).
__init__
()
self
.
hidden_size
=
hidden_size
self
.
input_proj
=
Linear
(
self
.
fc_layer
=
Linear
(
768
,
size
*
3
,
param_attr
=
param_attr
,
bias_attr
=
False
)
input_size
,
hidden_size
*
3
,
param_attr
=
param_attr
,
bias_attr
=
False
)
self
.
gru_unit
=
GRUUnit
(
self
.
gru_unit
=
GRUUnit
(
size
*
3
,
hidden_
size
*
3
,
param_attr
=
param_attr
,
param_attr
=
param_attr
,
bias_attr
=
bias_attr
,
bias_attr
=
bias_attr
,
activation
=
candidate_activation
,
activation
=
candidate_activation
,
gate_activation
=
gate_activation
,
gate_activation
=
gate_activation
,
origin_mode
=
origin_mode
)
origin_mode
=
origin_mode
)
self
.
size
=
size
self
.
is_reverse
=
is_reverse
def
forward
(
self
,
inputs
,
states
):
def
forward
(
self
,
inputs
,
states
):
# step_outputs, new_states = cell(step_inputs, states)
# step_outputs, new_states = cell(step_inputs, states)
# for GRUCell, `step_outputs` and `new_states` both are hidden
# for GRUCell, `step_outputs` and `new_states` both are hidden
x
=
self
.
input_proj
(
inputs
)
x
=
self
.
fc_layer
(
inputs
)
hidden
,
_
,
_
=
self
.
gru_unit
(
x
,
states
)
hidden
,
_
,
_
=
self
.
gru_unit
(
x
,
states
)
return
hidden
,
hidden
return
hidden
,
hidden
@
property
class
DecoderCell
(
RNNCell
):
def
state_shape
(
self
):
def
__init__
(
self
,
size
):
return
[
self
.
hidden_size
]
self
.
gru
=
GRUCell
(
size
)
self
.
attention
=
SimpleAttention
(
size
)
self
.
fc_1_layer
=
Linear
(
input_dim
=
size
*
2
,
output_dim
=
size
*
3
,
bias_attr
=
False
)
self
.
fc_2_layer
=
Linear
(
input_dim
=
size
,
output_dim
=
size
*
3
,
bias_attr
=
False
)
def
forward
(
self
,
inputs
,
states
,
encoder_vec
,
encoder_proj
):
context
=
self
.
attention
(
encoder_vec
,
encoder_proj
,
states
)
fc_1
=
self
.
fc_1_layer
(
context
)
fc_2
=
self
.
fc_2_layer
(
inputs
)
decoder_inputs
=
fluid
.
layers
.
elementwise_add
(
x
=
fc_1
,
y
=
fc_2
)
h
,
_
=
self
.
gru
(
decoder_inputs
,
states
)
return
h
,
h
class
Decoder
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
size
,
num_classes
):
super
(
Decoder
,
self
).
__init__
()
self
.
embedder
=
Embedding
(
size
=
[
num_classes
,
size
])
self
.
gru_attention
=
RNN
(
DecoderCell
(
size
),
is_reverse
=
False
,
time_major
=
False
)
self
.
output_layer
=
Linear
(
size
,
num_classes
,
bias_attr
=
False
)
def
forward
(
self
,
target
,
decoder_initial_states
,
encoder_vec
,
encoder_proj
):
inputs
=
self
.
embedder
(
target
)
decoder_output
,
_
=
self
.
gru_attention
(
inputs
,
initial_states
=
decoder_initial_states
,
encoder_vec
=
encoder_vec
,
encoder_proj
=
encoder_proj
)
predict
=
self
.
output_layer
(
decoder_output
)
return
predict
class
EncoderNet
(
fluid
.
dygraph
.
Layer
):
class
EncoderNet
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
def
__init__
(
self
,
batch_size
,
decoder_size
,
decoder_size
,
rnn_hidden_size
=
200
,
rnn_hidden_size
=
200
,
is_test
=
False
,
is_test
=
False
,
...
@@ -179,21 +168,24 @@ class EncoderNet(fluid.dygraph.Layer):
...
@@ -179,21 +168,24 @@ class EncoderNet(fluid.dygraph.Layer):
initializer
=
fluid
.
initializer
.
Normal
(
0.0
,
0.02
),
learning_rate
=
2.0
)
initializer
=
fluid
.
initializer
.
Normal
(
0.0
,
0.02
),
learning_rate
=
2.0
)
self
.
ocr_convs
=
OCRConv
(
is_test
=
is_test
,
use_cudnn
=
use_cudnn
)
self
.
ocr_convs
=
OCRConv
(
is_test
=
is_test
,
use_cudnn
=
use_cudnn
)
self
.
fc_1_layer
=
Linear
(
self
.
gru_forward_layer
=
RNN
(
768
,
rnn_hidden_size
*
3
,
param_attr
=
para_attr
,
bias_attr
=
False
)
cell
=
GRUCell
(
self
.
fc_2_layer
=
Linear
(
input_size
=
128
*
6
,
# channel * h
768
,
rnn_hidden_size
*
3
,
param_attr
=
para_attr
,
bias_attr
=
False
)
hidden_size
=
rnn_hidden_size
,
self
.
gru_forward_layer
=
DynamicGRU
(
size
=
rnn_hidden_size
,
param_attr
=
para_attr
,
param_attr
=
para_attr
,
bias_attr
=
bias_attr
,
bias_attr
=
bias_attr
,
candidate_activation
=
'relu'
)
candidate_activation
=
'relu'
),
self
.
gru_backward_layer
=
DynamicGRU
(
is_reverse
=
False
,
size
=
rnn_hidden_size
,
time_major
=
False
)
self
.
gru_backward_layer
=
RNN
(
cell
=
GRUCell
(
input_size
=
128
*
6
,
# channel * h
hidden_size
=
rnn_hidden_size
,
param_attr
=
para_attr
,
param_attr
=
para_attr
,
bias_attr
=
bias_attr
,
bias_attr
=
bias_attr
,
candidate_activation
=
'relu'
,
candidate_activation
=
'relu'
),
is_reverse
=
True
)
is_reverse
=
True
,
time_major
=
False
)
self
.
encoded_proj_fc
=
Linear
(
self
.
encoded_proj_fc
=
Linear
(
rnn_hidden_size
*
2
,
decoder_size
,
bias_attr
=
False
)
rnn_hidden_size
*
2
,
decoder_size
,
bias_attr
=
False
)
...
@@ -211,13 +203,9 @@ class EncoderNet(fluid.dygraph.Layer):
...
@@ -211,13 +203,9 @@ class EncoderNet(fluid.dygraph.Layer):
],
],
inplace
=
False
)
inplace
=
False
)
fc_1
=
self
.
fc_1_layer
(
sliced_feature
)
gru_forward
,
_
=
self
.
gru_forward_layer
(
sliced_feature
)
fc_2
=
self
.
fc_2_layer
(
sliced_feature
)
gru_forward
=
self
.
gru_forward_layer
(
fc_1
)
gru_backward
,
_
=
self
.
gru_backward_layer
(
sliced_feature
)
gru_backward
=
self
.
gru_backward_layer
(
fc_2
)
encoded_vector
=
fluid
.
layers
.
concat
(
encoded_vector
=
fluid
.
layers
.
concat
(
input
=
[
gru_forward
,
gru_backward
],
axis
=
2
)
input
=
[
gru_forward
,
gru_backward
],
axis
=
2
)
...
@@ -227,88 +215,50 @@ class EncoderNet(fluid.dygraph.Layer):
...
@@ -227,88 +215,50 @@ class EncoderNet(fluid.dygraph.Layer):
return
gru_backward
,
encoded_vector
,
encoded_proj
return
gru_backward
,
encoded_vector
,
encoded_proj
class
SimpleAttention
(
fluid
.
dygraph
.
Layer
):
class
DecoderCell
(
RNNCell
):
def
__init__
(
self
,
decoder_size
):
def
__init__
(
self
,
encoder_size
,
decoder_size
):
super
(
SimpleAttention
,
self
).
__init__
()
super
(
DecoderCell
,
self
).
__init__
()
self
.
attention
=
SimpleAttention
(
decoder_size
)
self
.
fc_1
=
Linear
(
self
.
gru_cell
=
GRUCell
(
decoder_size
,
decoder_size
,
act
=
None
,
bias_attr
=
False
)
input_size
=
encoder_size
*
2
+
self
.
fc_2
=
Linear
(
decoder_size
,
1
,
act
=
None
,
bias_attr
=
False
)
decoder_size
,
# encoded_vector.shape[-1] + embed_size
hidden_size
=
decoder_size
)
def
forward
(
self
,
encoder_vec
,
encoder_proj
,
decoder_state
):
def
forward
(
self
,
current_word
,
states
,
encoder_vec
,
encoder_proj
):
decoder_state_fc
=
self
.
fc_1
(
decoder_state
)
context
=
self
.
attention
(
encoder_vec
,
encoder_proj
,
states
)
decoder_inputs
=
layers
.
concat
([
current_word
,
context
],
axis
=
1
)
decoder_state_proj_reshape
=
fluid
.
layers
.
reshape
(
hidden
,
_
=
self
.
gru_cell
(
decoder_inputs
,
states
)
decoder_state_fc
,
[
-
1
,
1
,
decoder_state_fc
.
shape
[
1
]],
return
hidden
,
hidden
inplace
=
False
)
decoder_state_expand
=
fluid
.
layers
.
expand
(
decoder_state_proj_reshape
,
[
1
,
encoder_proj
.
shape
[
1
],
1
])
concated
=
fluid
.
layers
.
elementwise_add
(
encoder_proj
,
decoder_state_expand
)
concated
=
fluid
.
layers
.
tanh
(
x
=
concated
)
attention_weight
=
self
.
fc_2
(
concated
)
weights_reshape
=
fluid
.
layers
.
reshape
(
x
=
attention_weight
,
shape
=
[
concated
.
shape
[
0
],
-
1
],
inplace
=
False
)
weights_reshape
=
fluid
.
layers
.
softmax
(
weights_reshape
)
scaled
=
fluid
.
layers
.
elementwise_mul
(
x
=
encoder_vec
,
y
=
weights_reshape
,
axis
=
0
)
context
=
fluid
.
layers
.
reduce_sum
(
scaled
,
dim
=
1
)
return
context
class
GRUDecoderWithAttention
(
fluid
.
dygraph
.
Layer
):
class
GRUDecoderWithAttention
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
encoder_size
,
decoder_size
,
num_classes
):
def
__init__
(
self
,
encoder_size
,
decoder_size
,
num_classes
):
super
(
GRUDecoderWithAttention
,
self
).
__init__
()
super
(
GRUDecoderWithAttention
,
self
).
__init__
()
self
.
simple_attention
=
SimpleAttention
(
decoder_size
)
self
.
gru_attention
=
RNN
(
DecoderCell
(
encoder_size
,
decoder_size
),
is_reverse
=
False
,
self
.
fc_1_layer
=
Linear
(
time_major
=
False
)
input_dim
=
encoder_size
*
2
,
output_dim
=
decoder_size
*
3
,
bias_attr
=
False
)
self
.
fc_2_layer
=
Linear
(
input_dim
=
decoder_size
,
output_dim
=
decoder_size
*
3
,
bias_attr
=
False
)
self
.
gru_unit
=
GRUUnit
(
size
=
decoder_size
*
3
,
param_attr
=
None
,
bias_attr
=
None
)
self
.
out_layer
=
Linear
(
self
.
out_layer
=
Linear
(
input_dim
=
decoder_size
,
input_dim
=
decoder_size
,
output_dim
=
num_classes
+
2
,
output_dim
=
num_classes
+
2
,
bias_attr
=
None
,
bias_attr
=
None
,
act
=
'softmax'
)
act
=
'softmax'
)
self
.
decoder_size
=
decoder_size
def
forward
(
self
,
inputs
,
decoder_initial_states
,
encoder_vec
,
encoder_proj
):
def
forward
(
self
,
out
,
_
=
self
.
gru_attention
(
current_word
,
inputs
,
encoder_vec
,
initial_states
=
decoder_initial_states
,
encoder_proj
,
encoder_vec
=
encoder_vec
,
decoder_boot
,
encoder_proj
=
encoder_proj
)
inference
=
False
):
predict
=
self
.
out_layer
(
out
)
current_word
=
fluid
.
layers
.
reshape
(
return
predict
current_word
,
[
-
1
,
current_word
.
shape
[
2
]],
inplace
=
False
)
context
=
self
.
simple_attention
(
encoder_vec
,
encoder_proj
,
decoder_boot
)
fc_1
=
self
.
fc_1_layer
(
context
)
fc_2
=
self
.
fc_2_layer
(
current_word
)
decoder_inputs
=
fluid
.
layers
.
elementwise_add
(
x
=
fc_1
,
y
=
fc_2
)
h
,
_
,
_
=
self
.
gru_unit
(
decoder_inputs
,
decoder_boot
)
out
=
self
.
out_layer
(
h
)
return
out
,
h
class
OCRAttention
(
fluid
.
dygraph
.
Layer
):
class
OCRAttention
(
Model
):
def
__init__
(
self
,
batch_size
,
num_classes
,
encoder_size
,
decoder_size
,
def
__init__
(
self
,
num_classes
,
encoder_size
,
decoder_size
,
word_vector_dim
):
word_vector_dim
):
super
(
OCRAttention
,
self
).
__init__
()
super
(
OCRAttention
,
self
).
__init__
()
self
.
encoder_net
=
EncoderNet
(
batch_size
,
decoder_size
)
self
.
encoder_net
=
EncoderNet
(
decoder_size
)
self
.
fc
=
Linear
(
self
.
fc
=
Linear
(
input_dim
=
encoder_size
,
input_dim
=
encoder_size
,
output_dim
=
decoder_size
,
output_dim
=
decoder_size
,
...
@@ -318,36 +268,26 @@ class OCRAttention(fluid.dygraph.Layer):
...
@@ -318,36 +268,26 @@ class OCRAttention(fluid.dygraph.Layer):
[
num_classes
+
2
,
word_vector_dim
],
dtype
=
'float32'
)
[
num_classes
+
2
,
word_vector_dim
],
dtype
=
'float32'
)
self
.
gru_decoder_with_attention
=
GRUDecoderWithAttention
(
self
.
gru_decoder_with_attention
=
GRUDecoderWithAttention
(
encoder_size
,
decoder_size
,
num_classes
)
encoder_size
,
decoder_size
,
num_classes
)
self
.
batch_size
=
batch_size
def
forward
(
self
,
inputs
,
label_in
):
def
forward
(
self
,
inputs
,
label_in
):
gru_backward
,
encoded_vector
,
encoded_proj
=
self
.
encoder_net
(
inputs
)
gru_backward
,
encoded_vector
,
encoded_proj
=
self
.
encoder_net
(
inputs
)
backward_first
=
fluid
.
layers
.
slice
(
gru_backward
,
axes
=
[
1
],
starts
=
[
0
],
ends
=
[
1
])
backward_first
=
fluid
.
layers
.
reshape
(
backward_first
,
[
-
1
,
backward_first
.
shape
[
2
]],
inplace
=
False
)
decoder_boot
=
self
.
fc
(
backward_first
)
label_in
=
fluid
.
layers
.
reshape
(
label_in
,
[
-
1
],
inplace
=
False
)
decoder_boot
=
self
.
fc
(
gru_backward
[:,
0
]
)
trg_embedding
=
self
.
embedding
(
label_in
)
trg_embedding
=
self
.
embedding
(
label_in
)
prediction
=
self
.
gru_decoder_with_attention
(
trg_embedding
,
decoder_boot
,
encoded_vector
,
encoded_proj
)
trg_embedding
=
fluid
.
layers
.
reshape
(
return
prediction
trg_embedding
,
[
self
.
batch_size
,
-
1
,
trg_embedding
.
shape
[
1
]],
inplace
=
False
)
pred_temp
=
[]
for
i
in
range
(
trg_embedding
.
shape
[
1
]):
current_word
=
fluid
.
layers
.
slice
(
trg_embedding
,
axes
=
[
1
],
starts
=
[
i
],
ends
=
[
i
+
1
])
out
,
decoder_boot
=
self
.
gru_decoder_with_attention
(
current_word
,
encoded_vector
,
encoded_proj
,
decoder_boot
)
pred_temp
.
append
(
out
)
pred_temp
=
fluid
.
layers
.
concat
(
pred_temp
,
axis
=
1
)
batch_size
=
trg_embedding
.
shape
[
0
]
class
CrossEntropyCriterion
(
Loss
):
seq_len
=
trg_embedding
.
shape
[
1
]
def
__init__
(
self
):
prediction
=
fluid
.
layers
.
reshape
(
super
(
CrossEntropyCriterion
,
self
).
__init__
()
pred_temp
,
shape
=
[
batch_size
,
seq_len
,
-
1
])
return
prediction
def
forward
(
self
,
outputs
,
labels
):
predict
,
(
label
,
mask
)
=
outputs
[
0
],
labels
loss
=
layers
.
cross_entropy
(
predict
,
label
=
label
,
soft_label
=
False
)
loss
=
layers
.
elementwise_mul
(
loss
,
mask
,
axis
=
0
)
loss
=
layers
.
reduce_sum
(
loss
)
return
loss
seq2seq/train_ocr.py
0 → 100644
浏览文件 @
27afc286
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
os
import
sys
sys
.
path
.
append
(
os
.
path
.
dirname
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))))
import
paddle.fluid.profiler
as
profiler
import
paddle.fluid
as
fluid
import
data_reader
from
paddle.fluid.dygraph.base
import
to_variable
import
argparse
import
functools
from
utility
import
add_arguments
,
print_arguments
,
get_attention_feeder_data
from
model
import
Input
,
set_device
from
nets
import
OCRAttention
,
CrossEntropyCriterion
from
eval
import
evaluate
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
add_arg
=
functools
.
partial
(
add_arguments
,
argparser
=
parser
)
# yapf: disable
add_arg
(
'batch_size'
,
int
,
32
,
"Minibatch size."
)
add_arg
(
'epoch_num'
,
int
,
30
,
"Epoch number."
)
add_arg
(
'lr'
,
float
,
0.001
,
"Learning rate."
)
add_arg
(
'lr_decay_strategy'
,
str
,
""
,
"Learning rate decay strategy."
)
add_arg
(
'log_period'
,
int
,
200
,
"Log period."
)
add_arg
(
'save_model_period'
,
int
,
2000
,
"Save model period. '-1' means never saving the model."
)
add_arg
(
'eval_period'
,
int
,
2000
,
"Evaluate period. '-1' means never evaluating the model."
)
add_arg
(
'save_model_dir'
,
str
,
"./output"
,
"The directory the model to be saved to."
)
add_arg
(
'train_images'
,
str
,
None
,
"The directory of images to be used for training."
)
add_arg
(
'train_list'
,
str
,
None
,
"The list file of images to be used for training."
)
add_arg
(
'test_images'
,
str
,
None
,
"The directory of images to be used for test."
)
add_arg
(
'test_list'
,
str
,
None
,
"The list file of images to be used for training."
)
add_arg
(
'init_model'
,
str
,
None
,
"The init model file of directory."
)
add_arg
(
'use_gpu'
,
bool
,
True
,
"Whether use GPU to train."
)
add_arg
(
'parallel'
,
bool
,
False
,
"Whether use parallel training."
)
add_arg
(
'profile'
,
bool
,
False
,
"Whether to use profiling."
)
add_arg
(
'skip_batch_num'
,
int
,
0
,
"The number of first minibatches to skip as warm-up for better performance test."
)
add_arg
(
'skip_test'
,
bool
,
False
,
"Whether to skip test phase."
)
# model hyper paramters
add_arg
(
'encoder_size'
,
int
,
200
,
"Encoder size."
)
add_arg
(
'decoder_size'
,
int
,
128
,
"Decoder size."
)
add_arg
(
'word_vector_dim'
,
int
,
128
,
"Word vector dim."
)
add_arg
(
'num_classes'
,
int
,
95
,
"Number classes."
)
add_arg
(
'gradient_clip'
,
float
,
5.0
,
"Gradient clip value."
)
add_arg
(
'dynamic'
,
bool
,
False
,
"Whether to use dygraph."
)
def
train
(
args
):
device
=
set_device
(
"gpu"
if
args
.
use_gpu
else
"cpu"
)
fluid
.
enable_dygraph
(
device
)
if
args
.
dynamic
else
None
ocr_attention
=
OCRAttention
(
encoder_size
=
args
.
encoder_size
,
decoder_size
=
args
.
decoder_size
,
num_classes
=
args
.
num_classes
,
word_vector_dim
=
args
.
word_vector_dim
)
LR
=
args
.
lr
if
args
.
lr_decay_strategy
==
"piecewise_decay"
:
learning_rate
=
fluid
.
layers
.
piecewise_decay
([
200000
,
250000
],
[
LR
,
LR
*
0.1
,
LR
*
0.01
])
else
:
learning_rate
=
LR
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
learning_rate
,
parameter_list
=
ocr_attention
.
parameters
())
# grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(args.gradient_clip)
inputs
=
[
Input
([
None
,
1
,
48
,
384
],
"float32"
,
name
=
"pixel"
),
Input
([
None
,
None
],
"int64"
,
name
=
"label_in"
),
]
labels
=
[
Input
([
None
,
None
],
"int64"
,
name
=
"label_out"
),
Input
([
None
,
None
],
"float32"
,
name
=
"mask"
)]
ocr_attention
.
prepare
(
optimizer
,
CrossEntropyCriterion
(),
inputs
=
inputs
,
labels
=
labels
)
train_reader
=
data_reader
.
data_reader
(
args
.
batch_size
,
shuffle
=
True
,
images_dir
=
args
.
train_images
,
list_file
=
args
.
train_list
,
data_type
=
'train'
)
# test_reader = data_reader.data_reader(
# args.batch_size,
# images_dir=args.test_images,
# list_file=args.test_list,
# data_type="test")
# if not os.path.exists(args.save_model_dir):
# os.makedirs(args.save_model_dir)
total_step
=
0
epoch_num
=
args
.
epoch_num
for
epoch
in
range
(
epoch_num
):
batch_id
=
0
total_loss
=
0.0
for
data
in
train_reader
():
total_step
+=
1
data_dict
=
get_attention_feeder_data
(
data
)
pixel
=
data_dict
[
"pixel"
]
label_in
=
data_dict
[
"label_in"
].
reshape
([
pixel
.
shape
[
0
],
-
1
])
label_out
=
data_dict
[
"label_out"
].
reshape
([
pixel
.
shape
[
0
],
-
1
])
mask
=
data_dict
[
"mask"
].
reshape
(
label_out
.
shape
).
astype
(
"float32"
)
avg_loss
=
ocr_attention
.
train
(
inputs
=
[
pixel
,
label_in
],
labels
=
[
label_out
,
mask
])[
0
]
total_loss
+=
avg_loss
if
True
:
#batch_id > 0 and batch_id % args.log_period == 0:
print
(
"epoch: {}, batch_id: {}, loss {}"
.
format
(
epoch
,
batch_id
,
total_loss
/
args
.
batch_size
/
args
.
log_period
))
total_loss
=
0.0
batch_id
+=
1
if
__name__
==
'__main__'
:
args
=
parser
.
parse_args
()
print_arguments
(
args
)
if
args
.
profile
:
if
args
.
use_gpu
:
with
profiler
.
cuda_profiler
(
"cuda_profiler.txt"
,
'csv'
)
as
nvprof
:
train
(
args
)
else
:
with
profiler
.
profiler
(
"CPU"
,
sorted_key
=
'total'
)
as
cpuprof
:
train
(
args
)
else
:
train
(
args
)
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录