Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
c93012a9
M
models
项目概览
PaddlePaddle
/
models
大约 1 年 前同步成功
通知
222
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c93012a9
编写于
11月 07, 2017
作者:
R
ranqiu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refine conv_seq_to_seq
上级
578f4099
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
58 addition
and
57 deletion
+58
-57
conv_seq_to_seq/infer.py
conv_seq_to_seq/infer.py
+3
-3
conv_seq_to_seq/model.py
conv_seq_to_seq/model.py
+51
-50
conv_seq_to_seq/reader.py
conv_seq_to_seq/reader.py
+3
-3
conv_seq_to_seq/train.py
conv_seq_to_seq/train.py
+1
-1
未找到文件。
conv_seq_to_seq/infer.py
浏览文件 @
c93012a9
...
@@ -68,12 +68,12 @@ def parse_args():
...
@@ -68,12 +68,12 @@ def parse_args():
"--beam_size"
,
"--beam_size"
,
default
=
1
,
default
=
1
,
type
=
int
,
type
=
int
,
help
=
"
Beam search width
. (default: %(default)s)"
)
help
=
"
The width of beam expasion
. (default: %(default)s)"
)
parser
.
add_argument
(
parser
.
add_argument
(
"--model_path"
,
"--model_path"
,
type
=
str
,
type
=
str
,
required
=
True
,
required
=
True
,
help
=
"
Model path
. (default: %(default)s)"
)
help
=
"
The path of trained model
. (default: %(default)s)"
)
return
parser
.
parse_args
()
return
parser
.
parse_args
()
...
@@ -122,7 +122,7 @@ def infer(infer_data_path,
...
@@ -122,7 +122,7 @@ def infer(infer_data_path,
:type drop_rate: float
:type drop_rate: float
:param max_len: The maximum length of the sentence to be generated.
:param max_len: The maximum length of the sentence to be generated.
:type max_len: int
:type max_len: int
:param beam_size: The width of beam
search
.
:param beam_size: The width of beam
expansion
.
:type beam_size: int
:type beam_size: int
"""
"""
# load dict
# load dict
...
...
conv_seq_to_seq/model.py
浏览文件 @
c93012a9
...
@@ -20,7 +20,7 @@ def gated_conv_with_batchnorm(input,
...
@@ -20,7 +20,7 @@ def gated_conv_with_batchnorm(input,
:type input: LayerOutput
:type input: LayerOutput
:param size: The dimension of the block's output.
:param size: The dimension of the block's output.
:type size: int
:type size: int
:param context_len: The context
wid
th of the convolution.
:param context_len: The context
leng
th of the convolution.
:type context_len: int
:type context_len: int
:param context_start: The start position of the context.
:param context_start: The start position of the context.
:type context_start: int
:type context_start: int
...
@@ -81,9 +81,9 @@ def encoder(token_emb,
...
@@ -81,9 +81,9 @@ def encoder(token_emb,
:type token_emb: LayerOutput
:type token_emb: LayerOutput
:param pos_emb: The embedding vector of the input token's position.
:param pos_emb: The embedding vector of the input token's position.
:type pos_emb: LayerOutput
:type pos_emb: LayerOutput
:param conv_blocks: The scale list of the convolution blocks.
And each element of the
:param conv_blocks: The scale list of the convolution blocks.
Each element of
list contains output dimension and context length of the corresponding
the list contains output dimension and context length of
convolution block.
the corresponding
convolution block.
:type conv_blocks: list of tuple
:type conv_blocks: list of tuple
:param num_attention: The total number of the attention modules used in the decoder.
:param num_attention: The total number of the attention modules used in the decoder.
:type num_attention: int
:type num_attention: int
...
@@ -109,9 +109,9 @@ def encoder(token_emb,
...
@@ -109,9 +109,9 @@ def encoder(token_emb,
for
(
size
,
context_len
)
in
conv_blocks
:
for
(
size
,
context_len
)
in
conv_blocks
:
if
block_input
.
size
==
size
:
if
block_input
.
size
==
size
:
res
=
block_input
res
idual
=
block_input
else
:
else
:
res
=
paddle
.
layer
.
fc
(
res
idual
=
paddle
.
layer
.
fc
(
input
=
block_input
,
input
=
block_input
,
size
=
size
,
size
=
size
,
act
=
paddle
.
activation
.
Linear
(),
act
=
paddle
.
activation
.
Linear
(),
...
@@ -127,9 +127,10 @@ def encoder(token_emb,
...
@@ -127,9 +127,10 @@ def encoder(token_emb,
drop_rate
=
drop_rate
)
drop_rate
=
drop_rate
)
with
paddle
.
layer
.
mixed
(
size
=
size
)
as
block_output
:
with
paddle
.
layer
.
mixed
(
size
=
size
)
as
block_output
:
block_output
+=
paddle
.
layer
.
identity_projection
(
res
)
block_output
+=
paddle
.
layer
.
identity_projection
(
res
idual
)
block_output
+=
paddle
.
layer
.
identity_projection
(
gated_conv
)
block_output
+=
paddle
.
layer
.
identity_projection
(
gated_conv
)
# halve the variance of the sum
block_output
=
paddle
.
layer
.
slope_intercept
(
block_output
=
paddle
.
layer
.
slope_intercept
(
input
=
block_output
,
slope
=
math
.
sqrt
(
0.5
))
input
=
block_output
,
slope
=
math
.
sqrt
(
0.5
))
...
@@ -143,14 +144,15 @@ def encoder(token_emb,
...
@@ -143,14 +144,15 @@ def encoder(token_emb,
param_attr
=
paddle
.
attr
.
Param
(
learning_rate
=
1.0
/
(
2.0
*
num_attention
)),
param_attr
=
paddle
.
attr
.
Param
(
learning_rate
=
1.0
/
(
2.0
*
num_attention
)),
bias_attr
=
True
)
bias_attr
=
True
)
encoded
=
paddle
.
layer
.
addto
(
input
=
[
encoded_vec
,
embedding
])
encoded
_sum
=
paddle
.
layer
.
addto
(
input
=
[
encoded_vec
,
embedding
])
encoded
=
paddle
.
layer
.
slope_intercept
(
input
=
encoded
,
slope
=
math
.
sqrt
(
0.5
))
# halve the variance of the sum
encoded_sum
=
paddle
.
layer
.
slope_intercept
(
input
=
encoded_sum
,
slope
=
math
.
sqrt
(
0.5
))
return
encoded_vec
,
encoded
return
encoded_vec
,
encoded
_sum
def
attention
(
decoder_state
,
cur_embedding
,
encoded_vec
,
encoded
):
def
attention
(
decoder_state
,
cur_embedding
,
encoded_vec
,
encoded
_sum
):
"""
"""
Definition of the attention.
Definition of the attention.
...
@@ -160,12 +162,12 @@ def attention(decoder_state, cur_embedding, encoded_vec, encoded):
...
@@ -160,12 +162,12 @@ def attention(decoder_state, cur_embedding, encoded_vec, encoded):
:type cur_embedding: LayerOutput
:type cur_embedding: LayerOutput
:param encoded_vec: The source token encoding.
:param encoded_vec: The source token encoding.
:type encoded_vec: LayerOutput
:type encoded_vec: LayerOutput
:param encoded: The sum of the source token's encoding and embedding.
:param encoded
_sum
: The sum of the source token's encoding and embedding.
:type encoded: LayerOutput
:type encoded
_sum
: LayerOutput
:return: A context vector.
:return: A context vector.
:rtype: LayerOutput
:rtype: LayerOutput
"""
"""
res
=
decoder_state
res
idual
=
decoder_state
state_size
=
decoder_state
.
size
state_size
=
decoder_state
.
size
emb_dim
=
cur_embedding
.
size
emb_dim
=
cur_embedding
.
size
...
@@ -173,6 +175,7 @@ def attention(decoder_state, cur_embedding, encoded_vec, encoded):
...
@@ -173,6 +175,7 @@ def attention(decoder_state, cur_embedding, encoded_vec, encoded):
state_summary
+=
paddle
.
layer
.
full_matrix_projection
(
decoder_state
)
state_summary
+=
paddle
.
layer
.
full_matrix_projection
(
decoder_state
)
state_summary
+=
paddle
.
layer
.
identity_projection
(
cur_embedding
)
state_summary
+=
paddle
.
layer
.
identity_projection
(
cur_embedding
)
# halve the variance of the sum
state_summary
=
paddle
.
layer
.
slope_intercept
(
state_summary
=
paddle
.
layer
.
slope_intercept
(
input
=
state_summary
,
slope
=
math
.
sqrt
(
0.5
))
input
=
state_summary
,
slope
=
math
.
sqrt
(
0.5
))
...
@@ -186,7 +189,7 @@ def attention(decoder_state, cur_embedding, encoded_vec, encoded):
...
@@ -186,7 +189,7 @@ def attention(decoder_state, cur_embedding, encoded_vec, encoded):
act
=
paddle
.
activation
.
SequenceSoftmax
(),
act
=
paddle
.
activation
.
SequenceSoftmax
(),
bias_attr
=
False
)
bias_attr
=
False
)
scaled
=
paddle
.
layer
.
scaling
(
weight
=
attention_weight
,
input
=
encoded
)
scaled
=
paddle
.
layer
.
scaling
(
weight
=
attention_weight
,
input
=
encoded
_sum
)
attended
=
paddle
.
layer
.
pooling
(
attended
=
paddle
.
layer
.
pooling
(
input
=
scaled
,
pooling_type
=
paddle
.
pooling
.
Sum
())
input
=
scaled
,
pooling_type
=
paddle
.
pooling
.
Sum
())
...
@@ -197,9 +200,9 @@ def attention(decoder_state, cur_embedding, encoded_vec, encoded):
...
@@ -197,9 +200,9 @@ def attention(decoder_state, cur_embedding, encoded_vec, encoded):
act
=
paddle
.
activation
.
Linear
(),
act
=
paddle
.
activation
.
Linear
(),
bias_attr
=
True
)
bias_attr
=
True
)
# TODO scaled by length
attention_result
=
paddle
.
layer
.
addto
(
input
=
[
attended_proj
,
residual
])
attention_result
=
paddle
.
layer
.
addto
(
input
=
[
attended_proj
,
res
])
# halve the variance of the sum
attention_result
=
paddle
.
layer
.
slope_intercept
(
attention_result
=
paddle
.
layer
.
slope_intercept
(
input
=
attention_result
,
slope
=
math
.
sqrt
(
0.5
))
input
=
attention_result
,
slope
=
math
.
sqrt
(
0.5
))
return
attention_result
return
attention_result
...
@@ -208,7 +211,7 @@ def attention(decoder_state, cur_embedding, encoded_vec, encoded):
...
@@ -208,7 +211,7 @@ def attention(decoder_state, cur_embedding, encoded_vec, encoded):
def
decoder
(
token_emb
,
def
decoder
(
token_emb
,
pos_emb
,
pos_emb
,
encoded_vec
,
encoded_vec
,
encoded
,
encoded
_sum
,
dict_size
,
dict_size
,
conv_blocks
=
[(
256
,
3
)]
*
3
,
conv_blocks
=
[(
256
,
3
)]
*
3
,
drop_rate
=
0.1
):
drop_rate
=
0.1
):
...
@@ -221,13 +224,13 @@ def decoder(token_emb,
...
@@ -221,13 +224,13 @@ def decoder(token_emb,
:type pos_emb: LayerOutput
:type pos_emb: LayerOutput
:param encoded_vec: The source token encoding.
:param encoded_vec: The source token encoding.
:type encoded_vec: LayerOutput
:type encoded_vec: LayerOutput
:param encoded: The sum of the source token's encoding and embedding.
:param encoded
_sum
: The sum of the source token's encoding and embedding.
:type encoded: LayerOutput
:type encoded
_sum
: LayerOutput
:param dict_size: The size of the target dictionary.
:param dict_size: The size of the target dictionary.
:type dict_size: int
:type dict_size: int
:param conv_blocks: The scale list of the convolution blocks.
And each element of the
:param conv_blocks: The scale list of the convolution blocks.
Each element
list contains output dimension and context length of the corresponding
of the list contains output dimension and context length
convolution block.
of the corresponding
convolution block.
:type conv_blocks: list of tuple
:type conv_blocks: list of tuple
:param drop_rate: Dropout rate.
:param drop_rate: Dropout rate.
:type drop_rate: float
:type drop_rate: float
...
@@ -235,24 +238,14 @@ def decoder(token_emb,
...
@@ -235,24 +238,14 @@ def decoder(token_emb,
:rtype: LayerOutput
:rtype: LayerOutput
"""
"""
def
attention_step
(
decoder_state
,
cur_embedding
,
encoded_vec
,
encoded
):
def
attention_step
(
decoder_state
,
cur_embedding
,
encoded_vec
,
encoded
_sum
):
conditional
=
attention
(
conditional
=
attention
(
decoder_state
=
decoder_state
,
decoder_state
=
decoder_state
,
cur_embedding
=
cur_embedding
,
cur_embedding
=
cur_embedding
,
encoded_vec
=
encoded_vec
,
encoded_vec
=
encoded_vec
,
encoded
=
encoded
)
encoded
_sum
=
encoded_sum
)
return
conditional
return
conditional
def
softmax_step
(
input
):
return
paddle
.
layer
.
fc
(
input
=
input
,
size
=
dict_size
,
act
=
paddle
.
activation
.
Softmax
(),
param_attr
=
paddle
.
attr
.
Param
(
initial_mean
=
0.
,
initial_std
=
math
.
sqrt
((
1.0
-
drop_rate
)
/
input
.
size
)),
bias_attr
=
True
,
)
embedding
=
paddle
.
layer
.
addto
(
embedding
=
paddle
.
layer
.
addto
(
input
=
[
token_emb
,
pos_emb
],
input
=
[
token_emb
,
pos_emb
],
layer_attr
=
paddle
.
attr
.
Extra
(
drop_rate
=
drop_rate
))
layer_attr
=
paddle
.
attr
.
Extra
(
drop_rate
=
drop_rate
))
...
@@ -269,9 +262,9 @@ def decoder(token_emb,
...
@@ -269,9 +262,9 @@ def decoder(token_emb,
for
(
size
,
context_len
)
in
conv_blocks
:
for
(
size
,
context_len
)
in
conv_blocks
:
if
block_input
.
size
==
size
:
if
block_input
.
size
==
size
:
res
=
block_input
res
idual
=
block_input
else
:
else
:
res
=
paddle
.
layer
.
fc
(
res
idual
=
paddle
.
layer
.
fc
(
input
=
block_input
,
input
=
block_input
,
size
=
size
,
size
=
size
,
act
=
paddle
.
activation
.
Linear
(),
act
=
paddle
.
activation
.
Linear
(),
...
@@ -288,13 +281,15 @@ def decoder(token_emb,
...
@@ -288,13 +281,15 @@ def decoder(token_emb,
decoder_state
,
decoder_state
,
embedding
,
embedding
,
paddle
.
layer
.
StaticInput
(
input
=
encoded_vec
),
paddle
.
layer
.
StaticInput
(
input
=
encoded_vec
),
paddle
.
layer
.
StaticInput
(
input
=
encoded
),
paddle
.
layer
.
StaticInput
(
input
=
encoded
_sum
),
]
]
conditional
=
paddle
.
layer
.
recurrent_group
(
conditional
=
paddle
.
layer
.
recurrent_group
(
step
=
attention_step
,
input
=
group_inputs
)
step
=
attention_step
,
input
=
group_inputs
)
block_output
=
paddle
.
layer
.
addto
(
input
=
[
conditional
,
res
])
block_output
=
paddle
.
layer
.
addto
(
input
=
[
conditional
,
residual
])
# halve the variance of the sum
block_output
=
paddle
.
layer
.
slope_intercept
(
block_output
=
paddle
.
layer
.
slope_intercept
(
input
=
block_output
,
slope
=
math
.
sqrt
(
0.5
))
input
=
block_output
,
slope
=
math
.
sqrt
(
0.5
))
...
@@ -307,8 +302,14 @@ def decoder(token_emb,
...
@@ -307,8 +302,14 @@ def decoder(token_emb,
act
=
paddle
.
activation
.
Linear
(),
act
=
paddle
.
activation
.
Linear
(),
layer_attr
=
paddle
.
attr
.
Extra
(
drop_rate
=
drop_rate
))
layer_attr
=
paddle
.
attr
.
Extra
(
drop_rate
=
drop_rate
))
decoder_out
=
paddle
.
layer
.
recurrent_group
(
decoder_out
=
paddle
.
layer
.
fc
(
step
=
softmax_step
,
input
=
[
block_output
])
input
=
block_output
,
size
=
dict_size
,
act
=
paddle
.
activation
.
Softmax
(),
param_attr
=
paddle
.
attr
.
Param
(
initial_mean
=
0.
,
initial_std
=
math
.
sqrt
((
1.0
-
drop_rate
)
/
block_output
.
size
)),
bias_attr
=
True
)
return
decoder_out
return
decoder_out
...
@@ -333,13 +334,13 @@ def conv_seq2seq(src_dict_size,
...
@@ -333,13 +334,13 @@ def conv_seq2seq(src_dict_size,
:type pos_size: int
:type pos_size: int
:param emb_dim: The dimension of the embedding vector.
:param emb_dim: The dimension of the embedding vector.
:type emb_dim: int
:type emb_dim: int
:param enc_conv_blocks: The scale list of the encoder's convolution blocks.
And each element of
:param enc_conv_blocks: The scale list of the encoder's convolution blocks.
Each element
the list contains output dimension and context length of the corresponding
of the list contains output dimension and context length of the
convolution block.
co
rresponding co
nvolution block.
:type enc_conv_blocks: list of tuple
:type enc_conv_blocks: list of tuple
:param dec_conv_blocks: The scale list of the decoder's convolution blocks.
And each element of
:param dec_conv_blocks: The scale list of the decoder's convolution blocks.
Each element
the list contains output dimension and context length of the corresponding
of the list contains output dimension and context length of the
convolution block.
co
rresponding co
nvolution block.
:type dec_conv_blocks: list of tuple
:type dec_conv_blocks: list of tuple
:param drop_rate: Dropout rate.
:param drop_rate: Dropout rate.
:type drop_rate: float
:type drop_rate: float
...
@@ -368,7 +369,7 @@ def conv_seq2seq(src_dict_size,
...
@@ -368,7 +369,7 @@ def conv_seq2seq(src_dict_size,
param_attr
=
paddle
.
attr
.
Param
(
initial_mean
=
0.
,
initial_std
=
0.1
))
param_attr
=
paddle
.
attr
.
Param
(
initial_mean
=
0.
,
initial_std
=
0.1
))
num_attention
=
len
(
dec_conv_blocks
)
num_attention
=
len
(
dec_conv_blocks
)
encoded_vec
,
encoded
=
encoder
(
encoded_vec
,
encoded
_sum
=
encoder
(
token_emb
=
src_emb
,
token_emb
=
src_emb
,
pos_emb
=
src_pos_emb
,
pos_emb
=
src_pos_emb
,
conv_blocks
=
enc_conv_blocks
,
conv_blocks
=
enc_conv_blocks
,
...
@@ -399,7 +400,7 @@ def conv_seq2seq(src_dict_size,
...
@@ -399,7 +400,7 @@ def conv_seq2seq(src_dict_size,
token_emb
=
trg_emb
,
token_emb
=
trg_emb
,
pos_emb
=
trg_pos_emb
,
pos_emb
=
trg_pos_emb
,
encoded_vec
=
encoded_vec
,
encoded_vec
=
encoded_vec
,
encoded
=
encoded
,
encoded
_sum
=
encoded_sum
,
dict_size
=
trg_dict_size
,
dict_size
=
trg_dict_size
,
conv_blocks
=
dec_conv_blocks
,
conv_blocks
=
dec_conv_blocks
,
drop_rate
=
drop_rate
)
drop_rate
=
drop_rate
)
...
@@ -413,4 +414,4 @@ def conv_seq2seq(src_dict_size,
...
@@ -413,4 +414,4 @@ def conv_seq2seq(src_dict_size,
cost
=
paddle
.
layer
.
classification_cost
(
cost
=
paddle
.
layer
.
classification_cost
(
input
=
decoder_out
,
label
=
trg_next_word
)
input
=
decoder_out
,
label
=
trg_next_word
)
return
cost
return
cost
\ No newline at end of file
conv_seq_to_seq/reader.py
浏览文件 @
c93012a9
...
@@ -18,7 +18,7 @@ def get_reverse_dict(dictionary):
...
@@ -18,7 +18,7 @@ def get_reverse_dict(dictionary):
def
load_data
(
data_file
,
src_dict
,
trg_dict
):
def
load_data
(
data_file
,
src_dict
,
trg_dict
):
UNK_IDX
=
src_dict
[
'
UNK
'
]
UNK_IDX
=
src_dict
[
'
<unk>
'
]
with
open
(
data_file
,
'r'
)
as
f
:
with
open
(
data_file
,
'r'
)
as
f
:
for
line
in
f
:
for
line
in
f
:
line_split
=
line
.
strip
().
split
(
'
\t
'
)
line_split
=
line
.
strip
().
split
(
'
\t
'
)
...
@@ -34,7 +34,7 @@ def load_data(data_file, src_dict, trg_dict):
...
@@ -34,7 +34,7 @@ def load_data(data_file, src_dict, trg_dict):
def
data_reader
(
data_file
,
src_dict
,
trg_dict
,
pos_size
,
padding_num
):
def
data_reader
(
data_file
,
src_dict
,
trg_dict
,
pos_size
,
padding_num
):
def
reader
():
def
reader
():
UNK_IDX
=
src_dict
[
'
UNK
'
]
UNK_IDX
=
src_dict
[
'
<unk>
'
]
word_padding
=
trg_dict
.
__len__
()
word_padding
=
trg_dict
.
__len__
()
pos_padding
=
pos_size
pos_padding
=
pos_size
...
@@ -64,4 +64,4 @@ def data_reader(data_file, src_dict, trg_dict, pos_size, padding_num):
...
@@ -64,4 +64,4 @@ def data_reader(data_file, src_dict, trg_dict, pos_size, padding_num):
trg_next_word
=
trg_next_word
+
[
trg_dict
[
'<e>'
]]
*
padding_num
trg_next_word
=
trg_next_word
+
[
trg_dict
[
'<e>'
]]
*
padding_num
yield
src_word
,
src_word_pos
,
trg_word
,
trg_word_pos
,
trg_next_word
yield
src_word
,
src_word_pos
,
trg_word
,
trg_word_pos
,
trg_next_word
return
reader
return
reader
\ No newline at end of file
conv_seq_to_seq/train.py
浏览文件 @
c93012a9
...
@@ -249,4 +249,4 @@ def main():
...
@@ -249,4 +249,4 @@ def main():
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
main
()
main
()
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录