Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
hapi
提交
e4e393c8
H
hapi
项目概览
PaddlePaddle
/
hapi
通知
11
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
4
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
H
hapi
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
4
Issue
4
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e4e393c8
编写于
4月 04, 2020
作者:
G
guosheng
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add Additive Attention followed by GRU
上级
2bb216b7
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
353 addition
and
0 deletion
+353
-0
seq2seq/seq2seq_add_attn.py
seq2seq/seq2seq_add_attn.py
+353
-0
未找到文件。
seq2seq/seq2seq_add_attn.py
0 → 100644
浏览文件 @
e4e393c8
import
numpy
as
np
import
paddle.fluid
as
fluid
from
paddle.fluid.dygraph.nn
import
Conv2D
,
Pool2D
,
Linear
,
BatchNorm
,
Embedding
,
GRUUnit
from
text
import
DynamicDecode
,
RNN
,
BasicLSTMCell
,
RNNCell
from
model
import
Model
,
Loss
class
ConvBNPool
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
out_ch
,
channels
,
act
=
"relu"
,
is_test
=
False
,
pool
=
True
,
use_cudnn
=
True
):
super
(
ConvBNPool
,
self
).
__init__
()
self
.
pool
=
pool
filter_size
=
3
conv_std_0
=
(
2.0
/
(
filter_size
**
2
*
channels
[
0
]))
**
0.5
conv_param_0
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Normal
(
0.0
,
conv_std_0
))
conv_std_1
=
(
2.0
/
(
filter_size
**
2
*
channels
[
1
]))
**
0.5
conv_param_1
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Normal
(
0.0
,
conv_std_1
))
self
.
conv_0_layer
=
Conv2D
(
channels
[
0
],
out_ch
[
0
],
3
,
padding
=
1
,
param_attr
=
conv_param_0
,
bias_attr
=
False
,
act
=
None
,
use_cudnn
=
use_cudnn
)
self
.
bn_0_layer
=
BatchNorm
(
out_ch
[
0
],
act
=
act
,
is_test
=
is_test
)
self
.
conv_1_layer
=
Conv2D
(
out_ch
[
0
],
num_filters
=
out_ch
[
1
],
filter_size
=
3
,
padding
=
1
,
param_attr
=
conv_param_1
,
bias_attr
=
False
,
act
=
None
,
use_cudnn
=
use_cudnn
)
self
.
bn_1_layer
=
BatchNorm
(
out_ch
[
1
],
act
=
act
,
is_test
=
is_test
)
if
self
.
pool
:
self
.
pool_layer
=
Pool2D
(
pool_size
=
2
,
pool_type
=
'max'
,
pool_stride
=
2
,
use_cudnn
=
use_cudnn
,
ceil_mode
=
True
)
def
forward
(
self
,
inputs
):
conv_0
=
self
.
conv_0_layer
(
inputs
)
bn_0
=
self
.
bn_0_layer
(
conv_0
)
conv_1
=
self
.
conv_1_layer
(
bn_0
)
bn_1
=
self
.
bn_1_layer
(
conv_1
)
if
self
.
pool
:
bn_pool
=
self
.
pool_layer
(
bn_1
)
return
bn_pool
return
bn_1
class
OCRConv
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
is_test
=
False
,
use_cudnn
=
True
):
super
(
OCRConv
,
self
).
__init__
()
self
.
conv_bn_pool_1
=
ConvBNPool
(
[
16
,
16
],
[
1
,
16
],
is_test
=
is_test
,
use_cudnn
=
use_cudnn
)
self
.
conv_bn_pool_2
=
ConvBNPool
(
[
32
,
32
],
[
16
,
32
],
is_test
=
is_test
,
use_cudnn
=
use_cudnn
)
self
.
conv_bn_pool_3
=
ConvBNPool
(
[
64
,
64
],
[
32
,
64
],
is_test
=
is_test
,
use_cudnn
=
use_cudnn
)
self
.
conv_bn_pool_4
=
ConvBNPool
(
[
128
,
128
],
[
64
,
128
],
is_test
=
is_test
,
pool
=
False
,
use_cudnn
=
use_cudnn
)
def
forward
(
self
,
inputs
):
inputs_1
=
self
.
conv_bn_pool_1
(
inputs
)
inputs_2
=
self
.
conv_bn_pool_2
(
inputs_1
)
inputs_3
=
self
.
conv_bn_pool_3
(
inputs_2
)
inputs_4
=
self
.
conv_bn_pool_4
(
inputs_3
)
return
inputs_4
class
GRUCell
(
RNNCell
):
def
__init__
(
self
,
size
,
param_attr
=
None
,
bias_attr
=
None
,
is_reverse
=
False
,
gate_activation
=
'sigmoid'
,
candidate_activation
=
'tanh'
,
origin_mode
=
False
,
init_size
=
None
):
super
(
GRUCell
,
self
).
__init__
()
self
.
input_proj
=
Linear
(
768
,
size
*
3
,
param_attr
=
param_attr
,
bias_attr
=
False
)
self
.
gru_unit
=
GRUUnit
(
size
*
3
,
param_attr
=
param_attr
,
bias_attr
=
bias_attr
,
activation
=
candidate_activation
,
gate_activation
=
gate_activation
,
origin_mode
=
origin_mode
)
self
.
size
=
size
self
.
is_reverse
=
is_reverse
def
forward
(
self
,
inputs
,
states
):
# step_outputs, new_states = cell(step_inputs, states)
# for GRUCell, `step_outputs` and `new_states` both are hidden
x
=
self
.
input_proj
(
inputs
)
hidden
,
_
,
_
=
self
.
gru_unit
(
x
,
states
)
return
hidden
,
hidden
class
DecoderCell
(
RNNCell
):
def
__init__
(
self
,
size
):
self
.
gru
=
GRUCell
(
size
)
self
.
attention
=
SimpleAttention
(
size
)
self
.
fc_1_layer
=
Linear
(
input_dim
=
size
*
2
,
output_dim
=
size
*
3
,
bias_attr
=
False
)
self
.
fc_2_layer
=
Linear
(
input_dim
=
size
,
output_dim
=
size
*
3
,
bias_attr
=
False
)
def
forward
(
self
,
inputs
,
states
,
encoder_vec
,
encoder_proj
):
context
=
self
.
attention
(
encoder_vec
,
encoder_proj
,
states
)
fc_1
=
self
.
fc_1_layer
(
context
)
fc_2
=
self
.
fc_2_layer
(
inputs
)
decoder_inputs
=
fluid
.
layers
.
elementwise_add
(
x
=
fc_1
,
y
=
fc_2
)
h
,
_
=
self
.
gru
(
decoder_inputs
,
states
)
return
h
,
h
class
Decoder
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
size
,
num_classes
):
super
(
Decoder
,
self
).
__init__
()
self
.
embedder
=
Embedding
(
size
=
[
num_classes
,
size
])
self
.
gru_attention
=
RNN
(
DecoderCell
(
size
),
is_reverse
=
False
,
time_major
=
False
)
self
.
output_layer
=
Linear
(
size
,
num_classes
,
bias_attr
=
False
)
def
forward
(
self
,
target
,
decoder_initial_states
,
encoder_vec
,
encoder_proj
):
inputs
=
self
.
embedder
(
target
)
decoder_output
,
_
=
self
.
gru_attention
(
inputs
,
initial_states
=
decoder_initial_states
,
encoder_vec
=
encoder_vec
,
encoder_proj
=
encoder_proj
)
predict
=
self
.
output_layer
(
decoder_output
)
return
predict
class
EncoderNet
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
batch_size
,
decoder_size
,
rnn_hidden_size
=
200
,
is_test
=
False
,
use_cudnn
=
True
):
super
(
EncoderNet
,
self
).
__init__
()
self
.
rnn_hidden_size
=
rnn_hidden_size
para_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Normal
(
0.0
,
0.02
))
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Normal
(
0.0
,
0.02
),
learning_rate
=
2.0
)
self
.
ocr_convs
=
OCRConv
(
is_test
=
is_test
,
use_cudnn
=
use_cudnn
)
self
.
fc_1_layer
=
Linear
(
768
,
rnn_hidden_size
*
3
,
param_attr
=
para_attr
,
bias_attr
=
False
)
self
.
fc_2_layer
=
Linear
(
768
,
rnn_hidden_size
*
3
,
param_attr
=
para_attr
,
bias_attr
=
False
)
self
.
gru_forward_layer
=
DynamicGRU
(
size
=
rnn_hidden_size
,
param_attr
=
para_attr
,
bias_attr
=
bias_attr
,
candidate_activation
=
'relu'
)
self
.
gru_backward_layer
=
DynamicGRU
(
size
=
rnn_hidden_size
,
param_attr
=
para_attr
,
bias_attr
=
bias_attr
,
candidate_activation
=
'relu'
,
is_reverse
=
True
)
self
.
encoded_proj_fc
=
Linear
(
rnn_hidden_size
*
2
,
decoder_size
,
bias_attr
=
False
)
def
forward
(
self
,
inputs
):
conv_features
=
self
.
ocr_convs
(
inputs
)
transpose_conv_features
=
fluid
.
layers
.
transpose
(
conv_features
,
perm
=
[
0
,
3
,
1
,
2
])
sliced_feature
=
fluid
.
layers
.
reshape
(
transpose_conv_features
,
[
-
1
,
transpose_conv_features
.
shape
[
1
],
transpose_conv_features
.
shape
[
2
]
*
transpose_conv_features
.
shape
[
3
]
],
inplace
=
False
)
fc_1
=
self
.
fc_1_layer
(
sliced_feature
)
fc_2
=
self
.
fc_2_layer
(
sliced_feature
)
gru_forward
=
self
.
gru_forward_layer
(
fc_1
)
gru_backward
=
self
.
gru_backward_layer
(
fc_2
)
encoded_vector
=
fluid
.
layers
.
concat
(
input
=
[
gru_forward
,
gru_backward
],
axis
=
2
)
encoded_proj
=
self
.
encoded_proj_fc
(
encoded_vector
)
return
gru_backward
,
encoded_vector
,
encoded_proj
class
SimpleAttention
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
decoder_size
):
super
(
SimpleAttention
,
self
).
__init__
()
self
.
fc_1
=
Linear
(
decoder_size
,
decoder_size
,
act
=
None
,
bias_attr
=
False
)
self
.
fc_2
=
Linear
(
decoder_size
,
1
,
act
=
None
,
bias_attr
=
False
)
def
forward
(
self
,
encoder_vec
,
encoder_proj
,
decoder_state
):
decoder_state_fc
=
self
.
fc_1
(
decoder_state
)
decoder_state_proj_reshape
=
fluid
.
layers
.
reshape
(
decoder_state_fc
,
[
-
1
,
1
,
decoder_state_fc
.
shape
[
1
]],
inplace
=
False
)
decoder_state_expand
=
fluid
.
layers
.
expand
(
decoder_state_proj_reshape
,
[
1
,
encoder_proj
.
shape
[
1
],
1
])
concated
=
fluid
.
layers
.
elementwise_add
(
encoder_proj
,
decoder_state_expand
)
concated
=
fluid
.
layers
.
tanh
(
x
=
concated
)
attention_weight
=
self
.
fc_2
(
concated
)
weights_reshape
=
fluid
.
layers
.
reshape
(
x
=
attention_weight
,
shape
=
[
concated
.
shape
[
0
],
-
1
],
inplace
=
False
)
weights_reshape
=
fluid
.
layers
.
softmax
(
weights_reshape
)
scaled
=
fluid
.
layers
.
elementwise_mul
(
x
=
encoder_vec
,
y
=
weights_reshape
,
axis
=
0
)
context
=
fluid
.
layers
.
reduce_sum
(
scaled
,
dim
=
1
)
return
context
class
GRUDecoderWithAttention
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
encoder_size
,
decoder_size
,
num_classes
):
super
(
GRUDecoderWithAttention
,
self
).
__init__
()
self
.
simple_attention
=
SimpleAttention
(
decoder_size
)
self
.
fc_1_layer
=
Linear
(
input_dim
=
encoder_size
*
2
,
output_dim
=
decoder_size
*
3
,
bias_attr
=
False
)
self
.
fc_2_layer
=
Linear
(
input_dim
=
decoder_size
,
output_dim
=
decoder_size
*
3
,
bias_attr
=
False
)
self
.
gru_unit
=
GRUUnit
(
size
=
decoder_size
*
3
,
param_attr
=
None
,
bias_attr
=
None
)
self
.
out_layer
=
Linear
(
input_dim
=
decoder_size
,
output_dim
=
num_classes
+
2
,
bias_attr
=
None
,
act
=
'softmax'
)
self
.
decoder_size
=
decoder_size
def
forward
(
self
,
current_word
,
encoder_vec
,
encoder_proj
,
decoder_boot
,
inference
=
False
):
current_word
=
fluid
.
layers
.
reshape
(
current_word
,
[
-
1
,
current_word
.
shape
[
2
]],
inplace
=
False
)
context
=
self
.
simple_attention
(
encoder_vec
,
encoder_proj
,
decoder_boot
)
fc_1
=
self
.
fc_1_layer
(
context
)
fc_2
=
self
.
fc_2_layer
(
current_word
)
decoder_inputs
=
fluid
.
layers
.
elementwise_add
(
x
=
fc_1
,
y
=
fc_2
)
h
,
_
,
_
=
self
.
gru_unit
(
decoder_inputs
,
decoder_boot
)
out
=
self
.
out_layer
(
h
)
return
out
,
h
class
OCRAttention
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
batch_size
,
num_classes
,
encoder_size
,
decoder_size
,
word_vector_dim
):
super
(
OCRAttention
,
self
).
__init__
()
self
.
encoder_net
=
EncoderNet
(
batch_size
,
decoder_size
)
self
.
fc
=
Linear
(
input_dim
=
encoder_size
,
output_dim
=
decoder_size
,
bias_attr
=
False
,
act
=
'relu'
)
self
.
embedding
=
Embedding
(
[
num_classes
+
2
,
word_vector_dim
],
dtype
=
'float32'
)
self
.
gru_decoder_with_attention
=
GRUDecoderWithAttention
(
encoder_size
,
decoder_size
,
num_classes
)
self
.
batch_size
=
batch_size
def
forward
(
self
,
inputs
,
label_in
):
gru_backward
,
encoded_vector
,
encoded_proj
=
self
.
encoder_net
(
inputs
)
backward_first
=
fluid
.
layers
.
slice
(
gru_backward
,
axes
=
[
1
],
starts
=
[
0
],
ends
=
[
1
])
backward_first
=
fluid
.
layers
.
reshape
(
backward_first
,
[
-
1
,
backward_first
.
shape
[
2
]],
inplace
=
False
)
decoder_boot
=
self
.
fc
(
backward_first
)
label_in
=
fluid
.
layers
.
reshape
(
label_in
,
[
-
1
],
inplace
=
False
)
trg_embedding
=
self
.
embedding
(
label_in
)
trg_embedding
=
fluid
.
layers
.
reshape
(
trg_embedding
,
[
self
.
batch_size
,
-
1
,
trg_embedding
.
shape
[
1
]],
inplace
=
False
)
pred_temp
=
[]
for
i
in
range
(
trg_embedding
.
shape
[
1
]):
current_word
=
fluid
.
layers
.
slice
(
trg_embedding
,
axes
=
[
1
],
starts
=
[
i
],
ends
=
[
i
+
1
])
out
,
decoder_boot
=
self
.
gru_decoder_with_attention
(
current_word
,
encoded_vector
,
encoded_proj
,
decoder_boot
)
pred_temp
.
append
(
out
)
pred_temp
=
fluid
.
layers
.
concat
(
pred_temp
,
axis
=
1
)
batch_size
=
trg_embedding
.
shape
[
0
]
seq_len
=
trg_embedding
.
shape
[
1
]
prediction
=
fluid
.
layers
.
reshape
(
pred_temp
,
shape
=
[
batch_size
,
seq_len
,
-
1
])
return
prediction
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录