Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
c93012a9
M
models
项目概览
PaddlePaddle
/
models
大约 1 年 前同步成功
通知
222
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c93012a9
编写于
11月 07, 2017
作者:
R
ranqiu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refine conv_seq_to_seq
上级
578f4099
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
58 addition
and
57 deletion
+58
-57
conv_seq_to_seq/infer.py
conv_seq_to_seq/infer.py
+3
-3
conv_seq_to_seq/model.py
conv_seq_to_seq/model.py
+51
-50
conv_seq_to_seq/reader.py
conv_seq_to_seq/reader.py
+3
-3
conv_seq_to_seq/train.py
conv_seq_to_seq/train.py
+1
-1
未找到文件。
conv_seq_to_seq/infer.py
浏览文件 @
c93012a9
...
...
@@ -68,12 +68,12 @@ def parse_args():
"--beam_size"
,
default
=
1
,
type
=
int
,
help
=
"
Beam search width
. (default: %(default)s)"
)
help
=
"
The width of beam expasion
. (default: %(default)s)"
)
parser
.
add_argument
(
"--model_path"
,
type
=
str
,
required
=
True
,
help
=
"
Model path
. (default: %(default)s)"
)
help
=
"
The path of trained model
. (default: %(default)s)"
)
return
parser
.
parse_args
()
...
...
@@ -122,7 +122,7 @@ def infer(infer_data_path,
:type drop_rate: float
:param max_len: The maximum length of the sentence to be generated.
:type max_len: int
:param beam_size: The width of beam
search
.
:param beam_size: The width of beam
expansion
.
:type beam_size: int
"""
# load dict
...
...
conv_seq_to_seq/model.py
浏览文件 @
c93012a9
...
...
@@ -20,7 +20,7 @@ def gated_conv_with_batchnorm(input,
:type input: LayerOutput
:param size: The dimension of the block's output.
:type size: int
:param context_len: The context
wid
th of the convolution.
:param context_len: The context
leng
th of the convolution.
:type context_len: int
:param context_start: The start position of the context.
:type context_start: int
...
...
@@ -81,9 +81,9 @@ def encoder(token_emb,
:type token_emb: LayerOutput
:param pos_emb: The embedding vector of the input token's position.
:type pos_emb: LayerOutput
:param conv_blocks: The scale list of the convolution blocks.
And each element of the
list contains output dimension and context length of the corresponding
convolution block.
:param conv_blocks: The scale list of the convolution blocks.
Each element of
the list contains output dimension and context length of
the corresponding
convolution block.
:type conv_blocks: list of tuple
:param num_attention: The total number of the attention modules used in the decoder.
:type num_attention: int
...
...
@@ -109,9 +109,9 @@ def encoder(token_emb,
for
(
size
,
context_len
)
in
conv_blocks
:
if
block_input
.
size
==
size
:
res
=
block_input
res
idual
=
block_input
else
:
res
=
paddle
.
layer
.
fc
(
res
idual
=
paddle
.
layer
.
fc
(
input
=
block_input
,
size
=
size
,
act
=
paddle
.
activation
.
Linear
(),
...
...
@@ -127,9 +127,10 @@ def encoder(token_emb,
drop_rate
=
drop_rate
)
with
paddle
.
layer
.
mixed
(
size
=
size
)
as
block_output
:
block_output
+=
paddle
.
layer
.
identity_projection
(
res
)
block_output
+=
paddle
.
layer
.
identity_projection
(
res
idual
)
block_output
+=
paddle
.
layer
.
identity_projection
(
gated_conv
)
# halve the variance of the sum
block_output
=
paddle
.
layer
.
slope_intercept
(
input
=
block_output
,
slope
=
math
.
sqrt
(
0.5
))
...
...
@@ -143,14 +144,15 @@ def encoder(token_emb,
param_attr
=
paddle
.
attr
.
Param
(
learning_rate
=
1.0
/
(
2.0
*
num_attention
)),
bias_attr
=
True
)
encoded
=
paddle
.
layer
.
addto
(
input
=
[
encoded_vec
,
embedding
])
encoded
_sum
=
paddle
.
layer
.
addto
(
input
=
[
encoded_vec
,
embedding
])
encoded
=
paddle
.
layer
.
slope_intercept
(
input
=
encoded
,
slope
=
math
.
sqrt
(
0.5
))
# halve the variance of the sum
encoded_sum
=
paddle
.
layer
.
slope_intercept
(
input
=
encoded_sum
,
slope
=
math
.
sqrt
(
0.5
))
return
encoded_vec
,
encoded
return
encoded_vec
,
encoded
_sum
def
attention
(
decoder_state
,
cur_embedding
,
encoded_vec
,
encoded
):
def
attention
(
decoder_state
,
cur_embedding
,
encoded_vec
,
encoded
_sum
):
"""
Definition of the attention.
...
...
@@ -160,12 +162,12 @@ def attention(decoder_state, cur_embedding, encoded_vec, encoded):
:type cur_embedding: LayerOutput
:param encoded_vec: The source token encoding.
:type encoded_vec: LayerOutput
:param encoded: The sum of the source token's encoding and embedding.
:type encoded: LayerOutput
:param encoded
_sum
: The sum of the source token's encoding and embedding.
:type encoded
_sum
: LayerOutput
:return: A context vector.
:rtype: LayerOutput
"""
res
=
decoder_state
res
idual
=
decoder_state
state_size
=
decoder_state
.
size
emb_dim
=
cur_embedding
.
size
...
...
@@ -173,6 +175,7 @@ def attention(decoder_state, cur_embedding, encoded_vec, encoded):
state_summary
+=
paddle
.
layer
.
full_matrix_projection
(
decoder_state
)
state_summary
+=
paddle
.
layer
.
identity_projection
(
cur_embedding
)
# halve the variance of the sum
state_summary
=
paddle
.
layer
.
slope_intercept
(
input
=
state_summary
,
slope
=
math
.
sqrt
(
0.5
))
...
...
@@ -186,7 +189,7 @@ def attention(decoder_state, cur_embedding, encoded_vec, encoded):
act
=
paddle
.
activation
.
SequenceSoftmax
(),
bias_attr
=
False
)
scaled
=
paddle
.
layer
.
scaling
(
weight
=
attention_weight
,
input
=
encoded
)
scaled
=
paddle
.
layer
.
scaling
(
weight
=
attention_weight
,
input
=
encoded
_sum
)
attended
=
paddle
.
layer
.
pooling
(
input
=
scaled
,
pooling_type
=
paddle
.
pooling
.
Sum
())
...
...
@@ -197,9 +200,9 @@ def attention(decoder_state, cur_embedding, encoded_vec, encoded):
act
=
paddle
.
activation
.
Linear
(),
bias_attr
=
True
)
# TODO scaled by length
attention_result
=
paddle
.
layer
.
addto
(
input
=
[
attended_proj
,
residual
])
attention_result
=
paddle
.
layer
.
addto
(
input
=
[
attended_proj
,
res
])
# halve the variance of the sum
attention_result
=
paddle
.
layer
.
slope_intercept
(
input
=
attention_result
,
slope
=
math
.
sqrt
(
0.5
))
return
attention_result
...
...
@@ -208,7 +211,7 @@ def attention(decoder_state, cur_embedding, encoded_vec, encoded):
def
decoder
(
token_emb
,
pos_emb
,
encoded_vec
,
encoded
,
encoded
_sum
,
dict_size
,
conv_blocks
=
[(
256
,
3
)]
*
3
,
drop_rate
=
0.1
):
...
...
@@ -221,13 +224,13 @@ def decoder(token_emb,
:type pos_emb: LayerOutput
:param encoded_vec: The source token encoding.
:type encoded_vec: LayerOutput
:param encoded: The sum of the source token's encoding and embedding.
:type encoded: LayerOutput
:param encoded
_sum
: The sum of the source token's encoding and embedding.
:type encoded
_sum
: LayerOutput
:param dict_size: The size of the target dictionary.
:type dict_size: int
:param conv_blocks: The scale list of the convolution blocks.
And each element of the
list contains output dimension and context length of the corresponding
convolution block.
:param conv_blocks: The scale list of the convolution blocks.
Each element
of the list contains output dimension and context length
of the corresponding
convolution block.
:type conv_blocks: list of tuple
:param drop_rate: Dropout rate.
:type drop_rate: float
...
...
@@ -235,24 +238,14 @@ def decoder(token_emb,
:rtype: LayerOutput
"""
def
attention_step
(
decoder_state
,
cur_embedding
,
encoded_vec
,
encoded
):
def
attention_step
(
decoder_state
,
cur_embedding
,
encoded_vec
,
encoded
_sum
):
conditional
=
attention
(
decoder_state
=
decoder_state
,
cur_embedding
=
cur_embedding
,
encoded_vec
=
encoded_vec
,
encoded
=
encoded
)
encoded
_sum
=
encoded_sum
)
return
conditional
def
softmax_step
(
input
):
return
paddle
.
layer
.
fc
(
input
=
input
,
size
=
dict_size
,
act
=
paddle
.
activation
.
Softmax
(),
param_attr
=
paddle
.
attr
.
Param
(
initial_mean
=
0.
,
initial_std
=
math
.
sqrt
((
1.0
-
drop_rate
)
/
input
.
size
)),
bias_attr
=
True
,
)
embedding
=
paddle
.
layer
.
addto
(
input
=
[
token_emb
,
pos_emb
],
layer_attr
=
paddle
.
attr
.
Extra
(
drop_rate
=
drop_rate
))
...
...
@@ -269,9 +262,9 @@ def decoder(token_emb,
for
(
size
,
context_len
)
in
conv_blocks
:
if
block_input
.
size
==
size
:
res
=
block_input
res
idual
=
block_input
else
:
res
=
paddle
.
layer
.
fc
(
res
idual
=
paddle
.
layer
.
fc
(
input
=
block_input
,
size
=
size
,
act
=
paddle
.
activation
.
Linear
(),
...
...
@@ -288,13 +281,15 @@ def decoder(token_emb,
decoder_state
,
embedding
,
paddle
.
layer
.
StaticInput
(
input
=
encoded_vec
),
paddle
.
layer
.
StaticInput
(
input
=
encoded
),
paddle
.
layer
.
StaticInput
(
input
=
encoded
_sum
),
]
conditional
=
paddle
.
layer
.
recurrent_group
(
step
=
attention_step
,
input
=
group_inputs
)
block_output
=
paddle
.
layer
.
addto
(
input
=
[
conditional
,
res
])
block_output
=
paddle
.
layer
.
addto
(
input
=
[
conditional
,
residual
])
# halve the variance of the sum
block_output
=
paddle
.
layer
.
slope_intercept
(
input
=
block_output
,
slope
=
math
.
sqrt
(
0.5
))
...
...
@@ -307,8 +302,14 @@ def decoder(token_emb,
act
=
paddle
.
activation
.
Linear
(),
layer_attr
=
paddle
.
attr
.
Extra
(
drop_rate
=
drop_rate
))
decoder_out
=
paddle
.
layer
.
recurrent_group
(
step
=
softmax_step
,
input
=
[
block_output
])
decoder_out
=
paddle
.
layer
.
fc
(
input
=
block_output
,
size
=
dict_size
,
act
=
paddle
.
activation
.
Softmax
(),
param_attr
=
paddle
.
attr
.
Param
(
initial_mean
=
0.
,
initial_std
=
math
.
sqrt
((
1.0
-
drop_rate
)
/
block_output
.
size
)),
bias_attr
=
True
)
return
decoder_out
...
...
@@ -333,13 +334,13 @@ def conv_seq2seq(src_dict_size,
:type pos_size: int
:param emb_dim: The dimension of the embedding vector.
:type emb_dim: int
:param enc_conv_blocks: The scale list of the encoder's convolution blocks.
And each element of
the list contains output dimension and context length of the corresponding
convolution block.
:param enc_conv_blocks: The scale list of the encoder's convolution blocks.
Each element
of the list contains output dimension and context length of the
co
rresponding co
nvolution block.
:type enc_conv_blocks: list of tuple
:param dec_conv_blocks: The scale list of the decoder's convolution blocks.
And each element of
the list contains output dimension and context length of the corresponding
convolution block.
:param dec_conv_blocks: The scale list of the decoder's convolution blocks.
Each element
of the list contains output dimension and context length of the
co
rresponding co
nvolution block.
:type dec_conv_blocks: list of tuple
:param drop_rate: Dropout rate.
:type drop_rate: float
...
...
@@ -368,7 +369,7 @@ def conv_seq2seq(src_dict_size,
param_attr
=
paddle
.
attr
.
Param
(
initial_mean
=
0.
,
initial_std
=
0.1
))
num_attention
=
len
(
dec_conv_blocks
)
encoded_vec
,
encoded
=
encoder
(
encoded_vec
,
encoded
_sum
=
encoder
(
token_emb
=
src_emb
,
pos_emb
=
src_pos_emb
,
conv_blocks
=
enc_conv_blocks
,
...
...
@@ -399,7 +400,7 @@ def conv_seq2seq(src_dict_size,
token_emb
=
trg_emb
,
pos_emb
=
trg_pos_emb
,
encoded_vec
=
encoded_vec
,
encoded
=
encoded
,
encoded
_sum
=
encoded_sum
,
dict_size
=
trg_dict_size
,
conv_blocks
=
dec_conv_blocks
,
drop_rate
=
drop_rate
)
...
...
@@ -413,4 +414,4 @@ def conv_seq2seq(src_dict_size,
cost
=
paddle
.
layer
.
classification_cost
(
input
=
decoder_out
,
label
=
trg_next_word
)
return
cost
\ No newline at end of file
return
cost
conv_seq_to_seq/reader.py
浏览文件 @
c93012a9
...
...
@@ -18,7 +18,7 @@ def get_reverse_dict(dictionary):
def
load_data
(
data_file
,
src_dict
,
trg_dict
):
UNK_IDX
=
src_dict
[
'
UNK
'
]
UNK_IDX
=
src_dict
[
'
<unk>
'
]
with
open
(
data_file
,
'r'
)
as
f
:
for
line
in
f
:
line_split
=
line
.
strip
().
split
(
'
\t
'
)
...
...
@@ -34,7 +34,7 @@ def load_data(data_file, src_dict, trg_dict):
def
data_reader
(
data_file
,
src_dict
,
trg_dict
,
pos_size
,
padding_num
):
def
reader
():
UNK_IDX
=
src_dict
[
'
UNK
'
]
UNK_IDX
=
src_dict
[
'
<unk>
'
]
word_padding
=
trg_dict
.
__len__
()
pos_padding
=
pos_size
...
...
@@ -64,4 +64,4 @@ def data_reader(data_file, src_dict, trg_dict, pos_size, padding_num):
trg_next_word
=
trg_next_word
+
[
trg_dict
[
'<e>'
]]
*
padding_num
yield
src_word
,
src_word_pos
,
trg_word
,
trg_word_pos
,
trg_next_word
return
reader
\ No newline at end of file
return
reader
conv_seq_to_seq/train.py
浏览文件 @
c93012a9
...
...
@@ -249,4 +249,4 @@ def main():
if
__name__
==
'__main__'
:
main
()
\ No newline at end of file
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录