Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
e7bbad6c
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e7bbad6c
编写于
2月 10, 2020
作者:
G
Guo Sheng
提交者:
GitHub
2月 10, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix the leaving out of rnn_memory_helper_grad's output vars. test=develop (#22499)
上级
d143f70a
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
166 addition
and
1 deletion
+166
-1
python/paddle/fluid/backward.py
python/paddle/fluid/backward.py
+1
-1
python/paddle/fluid/tests/unittests/test_rnn_cell_api.py
python/paddle/fluid/tests/unittests/test_rnn_cell_api.py
+165
-0
未找到文件。
python/paddle/fluid/backward.py
浏览文件 @
e7bbad6c
...
...
@@ -1038,7 +1038,7 @@ def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map):
'''
if
op_desc
.
type
()
not
in
[
'rnn_memory_helper_grad'
]:
ops_to_remove
.
append
(
op_idx
)
continue
continue
new_vars
=
set
()
# create new gradient variables
...
...
python/paddle/fluid/tests/unittests/test_rnn_cell_api.py
浏览文件 @
e7bbad6c
...
...
@@ -245,5 +245,170 @@ class TestRnnUtil(unittest.TestCase):
pass
class
EncoderCell
(
RNNCell
):
"""Encoder Cell"""
def
__init__
(
self
,
num_layers
,
hidden_size
,
dropout_prob
=
0.
,
init_scale
=
0.1
,
):
self
.
num_layers
=
num_layers
self
.
hidden_size
=
hidden_size
self
.
dropout_prob
=
dropout_prob
self
.
lstm_cells
=
[]
for
i
in
range
(
num_layers
):
self
.
lstm_cells
.
append
(
LSTMCell
(
hidden_size
))
def
call
(
self
,
step_input
,
states
):
new_states
=
[]
for
i
in
range
(
self
.
num_layers
):
out
,
new_state
=
self
.
lstm_cells
[
i
](
step_input
,
states
[
i
])
step_input
=
layers
.
dropout
(
out
,
self
.
dropout_prob
,
)
if
self
.
dropout_prob
else
out
new_states
.
append
(
new_state
)
return
step_input
,
new_states
@
property
def
state_shape
(
self
):
return
[
cell
.
state_shape
for
cell
in
self
.
lstm_cells
]
class
DecoderCell
(
RNNCell
):
"""Decoder Cell"""
def
__init__
(
self
,
num_layers
,
hidden_size
,
dropout_prob
=
0.
):
self
.
num_layers
=
num_layers
self
.
hidden_size
=
hidden_size
self
.
dropout_prob
=
dropout_prob
self
.
lstm_cells
=
[]
for
i
in
range
(
num_layers
):
self
.
lstm_cells
.
append
(
LSTMCell
(
hidden_size
))
def
call
(
self
,
step_input
,
states
):
new_lstm_states
=
[]
for
i
in
range
(
self
.
num_layers
):
out
,
new_lstm_state
=
self
.
lstm_cells
[
i
](
step_input
,
states
[
i
])
step_input
=
layers
.
dropout
(
out
,
self
.
dropout_prob
,
)
if
self
.
dropout_prob
else
out
new_lstm_states
.
append
(
new_lstm_state
)
return
step_input
,
new_lstm_states
def
def_seq2seq_model
(
num_layers
,
hidden_size
,
dropout_prob
,
src_vocab_size
,
trg_vocab_size
):
"vanilla seq2seq model"
# data
source
=
fluid
.
data
(
name
=
"src"
,
shape
=
[
None
,
None
],
dtype
=
"int64"
)
source_length
=
fluid
.
data
(
name
=
"src_sequence_length"
,
shape
=
[
None
],
dtype
=
"int64"
)
target
=
fluid
.
data
(
name
=
"trg"
,
shape
=
[
None
,
None
],
dtype
=
"int64"
)
target_length
=
fluid
.
data
(
name
=
"trg_sequence_length"
,
shape
=
[
None
],
dtype
=
"int64"
)
label
=
fluid
.
data
(
name
=
"label"
,
shape
=
[
None
,
None
,
1
],
dtype
=
"int64"
)
# embedding
src_emb
=
fluid
.
embedding
(
source
,
(
src_vocab_size
,
hidden_size
))
tar_emb
=
fluid
.
embedding
(
target
,
(
src_vocab_size
,
hidden_size
))
# encoder
enc_cell
=
EncoderCell
(
num_layers
,
hidden_size
,
dropout_prob
)
enc_output
,
enc_final_state
=
dynamic_rnn
(
cell
=
enc_cell
,
inputs
=
src_emb
,
sequence_length
=
source_length
)
# decoder
dec_cell
=
DecoderCell
(
num_layers
,
hidden_size
,
dropout_prob
)
dec_output
,
dec_final_state
=
dynamic_rnn
(
cell
=
dec_cell
,
inputs
=
tar_emb
,
initial_states
=
enc_final_state
)
logits
=
layers
.
fc
(
dec_output
,
size
=
trg_vocab_size
,
num_flatten_dims
=
len
(
dec_output
.
shape
)
-
1
,
bias_attr
=
False
)
# loss
loss
=
layers
.
softmax_with_cross_entropy
(
logits
=
logits
,
label
=
label
,
soft_label
=
False
)
loss
=
layers
.
unsqueeze
(
loss
,
axes
=
[
2
])
max_tar_seq_len
=
layers
.
shape
(
target
)[
1
]
tar_mask
=
layers
.
sequence_mask
(
target_length
,
maxlen
=
max_tar_seq_len
,
dtype
=
"float"
)
loss
=
loss
*
tar_mask
loss
=
layers
.
reduce_mean
(
loss
,
dim
=
[
0
])
loss
=
layers
.
reduce_sum
(
loss
)
# optimizer
optimizer
=
fluid
.
optimizer
.
Adam
(
0.001
)
optimizer
.
minimize
(
loss
)
return
loss
class
TestSeq2SeqModel
(
unittest
.
TestCase
):
"""
Test cases to confirm seq2seq api training correctly.
"""
def
setUp
(
self
):
np
.
random
.
seed
(
123
)
self
.
model_hparams
=
{
"num_layers"
:
2
,
"hidden_size"
:
128
,
"dropout_prob"
:
0.1
,
"src_vocab_size"
:
100
,
"trg_vocab_size"
:
100
}
self
.
iter_num
=
iter_num
=
2
self
.
batch_size
=
batch_size
=
4
src_seq_len
=
10
trg_seq_len
=
12
self
.
data
=
{
"src"
:
np
.
random
.
randint
(
2
,
self
.
model_hparams
[
"src_vocab_size"
],
(
iter_num
*
batch_size
,
src_seq_len
)).
astype
(
"int64"
),
"src_sequence_length"
:
np
.
random
.
randint
(
1
,
src_seq_len
,
(
iter_num
*
batch_size
,
)).
astype
(
"int64"
),
"trg"
:
np
.
random
.
randint
(
2
,
self
.
model_hparams
[
"src_vocab_size"
],
(
iter_num
*
batch_size
,
trg_seq_len
)).
astype
(
"int64"
),
"trg_sequence_length"
:
np
.
random
.
randint
(
1
,
trg_seq_len
,
(
iter_num
*
batch_size
,
)).
astype
(
"int64"
),
"label"
:
np
.
random
.
randint
(
2
,
self
.
model_hparams
[
"src_vocab_size"
],
(
iter_num
*
batch_size
,
trg_seq_len
,
1
)).
astype
(
"int64"
),
}
place
=
core
.
CUDAPlace
(
0
)
if
core
.
is_compiled_with_cuda
(
)
else
core
.
CPUPlace
()
self
.
exe
=
Executor
(
place
)
def
test_seq2seq_model
(
self
):
main_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main_program
,
startup_program
):
cost
=
def_seq2seq_model
(
**
self
.
model_hparams
)
self
.
exe
.
run
(
startup_program
)
for
iter_idx
in
range
(
self
.
iter_num
):
cost_val
=
self
.
exe
.
run
(
feed
=
{
"src"
:
self
.
data
[
"src"
][
iter_idx
*
self
.
batch_size
:(
iter_idx
+
1
)
*
self
.
batch_size
,
:],
"src_sequence_length"
:
self
.
data
[
"src_sequence_length"
]
[
iter_idx
*
self
.
batch_size
:(
iter_idx
+
1
)
*
self
.
batch_size
],
"trg"
:
self
.
data
[
"trg"
][
iter_idx
*
self
.
batch_size
:(
iter_idx
+
1
)
*
self
.
batch_size
,
:],
"trg_sequence_length"
:
self
.
data
[
"trg_sequence_length"
][
iter_idx
*
self
.
batch_size
:(
iter_idx
+
1
)
*
self
.
batch_size
],
"label"
:
self
.
data
[
"label"
][
iter_idx
*
self
.
batch_size
:(
iter_idx
+
1
)
*
self
.
batch_size
]
},
fetch_list
=
[
cost
])[
0
]
print
(
"iter_idx: %d, cost: %f"
%
(
iter_idx
,
cost_val
))
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录