Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
acee3dd3
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
acee3dd3
编写于
12月 13, 2022
作者:
L
lugimzzz
提交者:
GitHub
12月 13, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[fluid clean] remove 4 fluid.layers api and imigrate 2 fluid.layer api (#48972)
* fluid clean layer * docs
上级
b06a5946
变更
6
展开全部
隐藏空白更改
内联
并排
Showing
6 changed file
with
431 addition
and
1910 deletion
+431
-1910
python/paddle/fluid/layers/rnn.py
python/paddle/fluid/layers/rnn.py
+0
-903
python/paddle/fluid/tests/unittests/test_layers.py
python/paddle/fluid/tests/unittests/test_layers.py
+0
-20
python/paddle/fluid/tests/unittests/test_lstm_unit_op.py
python/paddle/fluid/tests/unittests/test_lstm_unit_op.py
+0
-77
python/paddle/fluid/tests/unittests/test_rnn_cell_api.py
python/paddle/fluid/tests/unittests/test_rnn_cell_api.py
+62
-566
python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
+0
-338
python/paddle/nn/layer/rnn.py
python/paddle/nn/layer/rnn.py
+369
-6
未找到文件。
python/paddle/fluid/layers/rnn.py
浏览文件 @
acee3dd3
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/test_layers.py
浏览文件 @
acee3dd3
...
...
@@ -2179,26 +2179,6 @@ class TestBook(LayerTest):
x
,
kernel_size
=
[
5
,
3
],
stride
=
[
1
,
2
],
padding
=
(
2
,
1
)
)
def
make_lstm_unit
(
self
):
with
program_guard
(
fluid
.
default_main_program
(),
fluid
.
default_startup_program
()
):
x_t_data
=
self
.
_get_data
(
name
=
'x_t_data'
,
shape
=
[
10
,
10
],
dtype
=
'float32'
)
x_t
=
layers
.
fc
(
input
=
x_t_data
,
size
=
10
)
prev_hidden_data
=
self
.
_get_data
(
name
=
'prev_hidden_data'
,
shape
=
[
10
,
30
],
dtype
=
'float32'
)
prev_hidden
=
layers
.
fc
(
input
=
prev_hidden_data
,
size
=
30
)
prev_cell_data
=
self
.
_get_data
(
name
=
'prev_cell'
,
shape
=
[
10
,
30
],
dtype
=
'float32'
)
prev_cell
=
layers
.
fc
(
input
=
prev_cell_data
,
size
=
30
)
return
layers
.
lstm_unit
(
x_t
=
x_t
,
hidden_t_prev
=
prev_hidden
,
cell_t_prev
=
prev_cell
)
def
make_softmax
(
self
):
with
program_guard
(
fluid
.
default_main_program
(),
fluid
.
default_startup_program
()
...
...
python/paddle/fluid/tests/unittests/test_lstm_unit_op.py
浏览文件 @
acee3dd3
...
...
@@ -17,10 +17,6 @@ import unittest
import
numpy
as
np
from
op_test
import
OpTest
from
paddle
import
fluid
from
paddle.fluid.framework
import
Program
,
program_guard
from
paddle.fluid.layers
import
lstm_unit
def
sigmoid_np
(
x
):
return
1.0
/
(
1.0
+
np
.
exp
(
-
x
))
...
...
@@ -30,79 +26,6 @@ def tanh_np(x):
return
2
*
sigmoid_np
(
2.0
*
x
)
-
1.0
class
LstmUnitTestError
(
unittest
.
TestCase
):
def
test_errors
(
self
):
with
program_guard
(
Program
(),
Program
()):
batch_size
,
dict_dim
,
emb_dim
,
hidden_dim
=
32
,
128
,
64
,
512
data
=
fluid
.
data
(
name
=
'step_data'
,
shape
=
[
batch_size
],
dtype
=
'int64'
)
inputs
=
fluid
.
embedding
(
input
=
data
,
size
=
[
dict_dim
,
emb_dim
])
pre_hidden
=
fluid
.
data
(
name
=
'pre_hidden'
,
shape
=
[
batch_size
,
hidden_dim
],
dtype
=
'float32'
,
)
pre_cell
=
fluid
.
data
(
name
=
'pre_cell'
,
shape
=
[
batch_size
,
hidden_dim
],
dtype
=
'float32'
)
np_input
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
batch_size
,
emb_dim
)
).
astype
(
'float64'
)
np_pre_hidden
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
batch_size
,
hidden_dim
)
).
astype
(
'float64'
)
np_pre_cell
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
batch_size
,
hidden_dim
)
).
astype
(
'float64'
)
def
test_input_Variable
():
lstm_unit
(
np_input
,
pre_hidden
,
pre_cell
)
self
.
assertRaises
(
TypeError
,
test_input_Variable
)
def
test_pre_hidden_Variable
():
lstm_unit
(
inputs
,
np_pre_hidden
,
pre_cell
)
self
.
assertRaises
(
TypeError
,
test_pre_hidden_Variable
)
def
test_pre_cell_Variable
():
lstm_unit
(
inputs
,
pre_hidden
,
np_pre_cell
)
self
.
assertRaises
(
TypeError
,
test_pre_cell_Variable
)
def
test_input_type
():
error_input
=
fluid
.
data
(
name
=
'error_input'
,
shape
=
[
batch_size
,
emb_dim
],
dtype
=
'int32'
,
)
lstm_unit
(
error_input
,
pre_hidden
,
pre_cell
)
self
.
assertRaises
(
TypeError
,
test_input_type
)
def
test_pre_hidden_type
():
error_pre_hidden
=
fluid
.
data
(
name
=
'error_pre_hidden'
,
shape
=
[
batch_size
,
hidden_dim
],
dtype
=
'int32'
,
)
lstm_unit
(
inputs
,
error_pre_hidden
,
pre_cell
)
self
.
assertRaises
(
TypeError
,
test_pre_hidden_type
)
def
test_pre_cell_type
():
error_pre_cell
=
fluid
.
data
(
name
=
'error_pre_cell'
,
shape
=
[
batch_size
,
hidden_dim
],
dtype
=
'int32'
,
)
lstm_unit
(
inputs
,
pre_hidden
,
error_pre_cell
)
self
.
assertRaises
(
TypeError
,
test_pre_cell_type
)
class
LstmUnitTest
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"lstm_unit"
...
...
python/paddle/fluid/tests/unittests/test_rnn_cell_api.py
浏览文件 @
acee3dd3
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
浏览文件 @
acee3dd3
...
...
@@ -19,12 +19,10 @@ import numpy as np
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
paddle.fluid.layers
as
layers
import
paddle.nn
as
nn
from
paddle
import
Model
,
set_device
from
paddle.fluid.dygraph
import
Layer
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.framework
import
_test_eager_guard
from
paddle.nn
import
BeamSearchDecoder
,
dynamic_decode
from
paddle.static
import
InputSpec
as
Input
...
...
@@ -32,257 +30,6 @@ from paddle.static import InputSpec as Input
paddle
.
enable_static
()
class
EncoderCell
(
layers
.
RNNCell
):
def
__init__
(
self
,
num_layers
,
hidden_size
,
dropout_prob
=
0.0
):
self
.
num_layers
=
num_layers
self
.
hidden_size
=
hidden_size
self
.
dropout_prob
=
dropout_prob
self
.
lstm_cells
=
[
layers
.
LSTMCell
(
hidden_size
)
for
i
in
range
(
num_layers
)
]
def
call
(
self
,
step_input
,
states
):
new_states
=
[]
for
i
in
range
(
self
.
num_layers
):
out
,
new_state
=
self
.
lstm_cells
[
i
](
step_input
,
states
[
i
])
step_input
=
(
layers
.
dropout
(
out
,
self
.
dropout_prob
)
if
self
.
dropout_prob
>
0
else
out
)
new_states
.
append
(
new_state
)
return
step_input
,
new_states
@
property
def
state_shape
(
self
):
return
[
cell
.
state_shape
for
cell
in
self
.
lstm_cells
]
class
DecoderCell
(
layers
.
RNNCell
):
def
__init__
(
self
,
num_layers
,
hidden_size
,
dropout_prob
=
0.0
):
self
.
num_layers
=
num_layers
self
.
hidden_size
=
hidden_size
self
.
dropout_prob
=
dropout_prob
self
.
lstm_cells
=
[
layers
.
LSTMCell
(
hidden_size
)
for
i
in
range
(
num_layers
)
]
def
attention
(
self
,
hidden
,
encoder_output
,
encoder_padding_mask
):
query
=
layers
.
fc
(
hidden
,
size
=
encoder_output
.
shape
[
-
1
],
bias_attr
=
False
)
attn_scores
=
paddle
.
matmul
(
layers
.
unsqueeze
(
query
,
[
1
]),
encoder_output
,
transpose_y
=
True
)
if
encoder_padding_mask
is
not
None
:
attn_scores
=
paddle
.
add
(
attn_scores
,
encoder_padding_mask
)
attn_scores
=
paddle
.
nn
.
functional
.
softmax
(
attn_scores
)
attn_out
=
paddle
.
squeeze
(
paddle
.
matmul
(
attn_scores
,
encoder_output
),
[
1
]
)
attn_out
=
layers
.
concat
([
attn_out
,
hidden
],
1
)
attn_out
=
layers
.
fc
(
attn_out
,
size
=
self
.
hidden_size
,
bias_attr
=
False
)
return
attn_out
def
call
(
self
,
step_input
,
states
,
encoder_output
,
encoder_padding_mask
=
None
):
lstm_states
,
input_feed
=
states
new_lstm_states
=
[]
step_input
=
layers
.
concat
([
step_input
,
input_feed
],
1
)
for
i
in
range
(
self
.
num_layers
):
out
,
new_lstm_state
=
self
.
lstm_cells
[
i
](
step_input
,
lstm_states
[
i
])
step_input
=
(
layers
.
dropout
(
out
,
self
.
dropout_prob
)
if
self
.
dropout_prob
>
0
else
out
)
new_lstm_states
.
append
(
new_lstm_state
)
out
=
self
.
attention
(
step_input
,
encoder_output
,
encoder_padding_mask
)
return
out
,
[
new_lstm_states
,
out
]
class
Encoder
:
def
__init__
(
self
,
num_layers
,
hidden_size
,
dropout_prob
=
0.0
):
self
.
encoder_cell
=
EncoderCell
(
num_layers
,
hidden_size
,
dropout_prob
)
def
__call__
(
self
,
src_emb
,
src_sequence_length
):
encoder_output
,
encoder_final_state
=
layers
.
rnn
(
cell
=
self
.
encoder_cell
,
inputs
=
src_emb
,
sequence_length
=
src_sequence_length
,
is_reverse
=
False
,
)
return
encoder_output
,
encoder_final_state
class
Decoder
:
def
__init__
(
self
,
num_layers
,
hidden_size
,
dropout_prob
,
decoding_strategy
=
"infer_sample"
,
max_decoding_length
=
20
,
):
self
.
decoder_cell
=
DecoderCell
(
num_layers
,
hidden_size
,
dropout_prob
)
self
.
decoding_strategy
=
decoding_strategy
self
.
max_decoding_length
=
(
None
if
(
self
.
decoding_strategy
==
"train_greedy"
)
else
max_decoding_length
)
def
__call__
(
self
,
decoder_initial_states
,
encoder_output
,
encoder_padding_mask
,
**
kwargs
):
output_layer
=
kwargs
.
pop
(
"output_layer"
,
None
)
beam_size
=
kwargs
.
get
(
"beam_size"
,
4
)
encoder_output
=
BeamSearchDecoder
.
tile_beam_merge_with_batch
(
encoder_output
,
beam_size
)
encoder_padding_mask
=
BeamSearchDecoder
.
tile_beam_merge_with_batch
(
encoder_padding_mask
,
beam_size
)
decoder
=
BeamSearchDecoder
(
cell
=
self
.
decoder_cell
,
output_fn
=
output_layer
,
**
kwargs
)
(
decoder_output
,
decoder_final_state
,
dec_seq_lengths
,
)
=
layers
.
dynamic_decode
(
decoder
,
inits
=
decoder_initial_states
,
max_step_num
=
self
.
max_decoding_length
,
encoder_output
=
encoder_output
,
encoder_padding_mask
=
encoder_padding_mask
,
impute_finished
=
False
# for test coverage
if
self
.
decoding_strategy
==
"beam_search"
else
True
,
is_test
=
True
if
self
.
decoding_strategy
==
"beam_search"
else
False
,
return_length
=
True
,
)
return
decoder_output
,
decoder_final_state
,
dec_seq_lengths
class
Seq2SeqModel
:
"""Seq2Seq model: RNN encoder-decoder with attention"""
def
__init__
(
self
,
num_layers
,
hidden_size
,
dropout_prob
,
src_vocab_size
,
trg_vocab_size
,
start_token
,
end_token
,
decoding_strategy
=
"infer_sample"
,
max_decoding_length
=
20
,
beam_size
=
4
,
):
self
.
start_token
,
self
.
end_token
=
start_token
,
end_token
self
.
max_decoding_length
,
self
.
beam_size
=
(
max_decoding_length
,
beam_size
,
)
self
.
src_embeder
=
paddle
.
nn
.
Embedding
(
src_vocab_size
,
hidden_size
,
weight_attr
=
fluid
.
ParamAttr
(
name
=
"source_embedding"
),
)
self
.
trg_embeder
=
paddle
.
nn
.
Embedding
(
trg_vocab_size
,
hidden_size
,
weight_attr
=
fluid
.
ParamAttr
(
name
=
"target_embedding"
),
)
self
.
encoder
=
Encoder
(
num_layers
,
hidden_size
,
dropout_prob
)
self
.
decoder
=
Decoder
(
num_layers
,
hidden_size
,
dropout_prob
,
decoding_strategy
,
max_decoding_length
,
)
self
.
output_layer
=
lambda
x
:
layers
.
fc
(
x
,
size
=
trg_vocab_size
,
num_flatten_dims
=
len
(
x
.
shape
)
-
1
,
param_attr
=
fluid
.
ParamAttr
(),
bias_attr
=
False
,
)
def
__call__
(
self
,
src
,
src_length
,
trg
=
None
,
trg_length
=
None
):
# encoder
encoder_output
,
encoder_final_state
=
self
.
encoder
(
self
.
src_embeder
(
src
),
src_length
)
decoder_initial_states
=
[
encoder_final_state
,
self
.
decoder
.
decoder_cell
.
get_initial_states
(
batch_ref
=
encoder_output
,
shape
=
[
encoder_output
.
shape
[
-
1
]]
),
]
src_mask
=
layers
.
sequence_mask
(
src_length
,
maxlen
=
paddle
.
shape
(
src
)[
1
],
dtype
=
"float32"
)
encoder_padding_mask
=
(
src_mask
-
1.0
)
*
1e9
encoder_padding_mask
=
layers
.
unsqueeze
(
encoder_padding_mask
,
[
1
])
# decoder
decoder_kwargs
=
(
{
"inputs"
:
self
.
trg_embeder
(
trg
),
"sequence_length"
:
trg_length
,
}
if
self
.
decoder
.
decoding_strategy
==
"train_greedy"
else
(
{
"embedding_fn"
:
self
.
trg_embeder
,
"beam_size"
:
self
.
beam_size
,
"start_token"
:
self
.
start_token
,
"end_token"
:
self
.
end_token
,
}
if
self
.
decoder
.
decoding_strategy
==
"beam_search"
else
{
"embedding_fn"
:
self
.
trg_embeder
,
"start_tokens"
:
layers
.
fill_constant_batch_size_like
(
input
=
encoder_output
,
shape
=
[
-
1
],
dtype
=
src
.
dtype
,
value
=
self
.
start_token
,
),
"end_token"
:
self
.
end_token
,
}
)
)
decoder_kwargs
[
"output_layer"
]
=
self
.
output_layer
(
decoder_output
,
decoder_final_state
,
dec_seq_lengths
)
=
self
.
decoder
(
decoder_initial_states
,
encoder_output
,
encoder_padding_mask
,
**
decoder_kwargs
)
if
self
.
decoder
.
decoding_strategy
==
"beam_search"
:
# for inference
return
decoder_output
logits
,
samples
,
sample_length
=
(
decoder_output
.
cell_outputs
,
decoder_output
.
sample_ids
,
dec_seq_lengths
,
)
probs
=
paddle
.
nn
.
functional
.
softmax
(
logits
)
return
probs
,
samples
,
sample_length
class
PolicyGradient
:
"""policy gradient"""
...
...
@@ -477,91 +224,6 @@ class SeqPGAgent:
return
results
class
TestDynamicDecode
(
unittest
.
TestCase
):
def
setUp
(
self
):
np
.
random
.
seed
(
123
)
self
.
model_hparams
=
{
"num_layers"
:
2
,
"hidden_size"
:
32
,
"dropout_prob"
:
0.1
,
"src_vocab_size"
:
100
,
"trg_vocab_size"
:
100
,
"start_token"
:
0
,
"end_token"
:
1
,
"decoding_strategy"
:
"infer_greedy"
,
"max_decoding_length"
:
10
,
}
self
.
iter_num
=
iter_num
=
2
self
.
batch_size
=
batch_size
=
4
src_seq_len
=
10
trg_seq_len
=
12
self
.
data
=
{
"src"
:
np
.
random
.
randint
(
2
,
self
.
model_hparams
[
"src_vocab_size"
],
(
iter_num
*
batch_size
,
src_seq_len
),
).
astype
(
"int64"
),
"src_sequence_length"
:
np
.
random
.
randint
(
1
,
src_seq_len
,
(
iter_num
*
batch_size
,)
).
astype
(
"int64"
),
"trg"
:
np
.
random
.
randint
(
2
,
self
.
model_hparams
[
"src_vocab_size"
],
(
iter_num
*
batch_size
,
trg_seq_len
),
).
astype
(
"int64"
),
"trg_sequence_length"
:
np
.
random
.
randint
(
1
,
trg_seq_len
,
(
iter_num
*
batch_size
,)
).
astype
(
"int64"
),
"label"
:
np
.
random
.
randint
(
2
,
self
.
model_hparams
[
"src_vocab_size"
],
(
iter_num
*
batch_size
,
trg_seq_len
,
1
),
).
astype
(
"int64"
),
}
place
=
(
core
.
CUDAPlace
(
0
)
if
core
.
is_compiled_with_cuda
()
else
core
.
CPUPlace
()
)
self
.
exe
=
Executor
(
place
)
def
test_beam_search_infer
(
self
):
paddle
.
set_default_dtype
(
"float32"
)
paddle
.
enable_static
()
self
.
model_hparams
[
"decoding_strategy"
]
=
"beam_search"
main_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main_program
,
startup_program
):
source
=
fluid
.
data
(
name
=
"src"
,
shape
=
[
None
,
None
],
dtype
=
"int64"
)
source_length
=
fluid
.
data
(
name
=
"src_sequence_length"
,
shape
=
[
None
],
dtype
=
"int64"
)
model
=
Seq2SeqModel
(
**
self
.
model_hparams
)
output
=
model
(
source
,
source_length
)
self
.
exe
.
run
(
startup_program
)
for
iter_idx
in
range
(
self
.
iter_num
):
trans_ids
=
self
.
exe
.
run
(
program
=
main_program
,
feed
=
{
"src"
:
self
.
data
[
"src"
][
iter_idx
*
self
.
batch_size
:
(
iter_idx
+
1
)
*
self
.
batch_size
,
:,
],
"src_sequence_length"
:
self
.
data
[
"src_sequence_length"
][
iter_idx
*
self
.
batch_size
:
(
iter_idx
+
1
)
*
self
.
batch_size
],
},
fetch_list
=
[
output
],
)[
0
]
class
ModuleApiTest
(
unittest
.
TestCase
):
@
classmethod
def
setUpClass
(
cls
):
...
...
python/paddle/nn/layer/rnn.py
浏览文件 @
acee3dd3
...
...
@@ -14,26 +14,389 @@
import
math
from
collections.abc
import
Sequence
from
functools
import
reduce
from
functools
import
partial
,
reduce
import
numpy
as
np
import
paddle
from
paddle
import
_C_ops
,
_legacy_C_ops
,
framework
,
in_dynamic_mode
from
paddle.fluid.framework
import
in_dygraph_mode
from
paddle.fluid.layers
import
utils
from
paddle.fluid.data_feeder
import
check_type
,
check_variable_and_dtype
from
paddle.fluid.framework
import
_non_static_mode
,
in_dygraph_mode
from
paddle.fluid.layers
import
control_flow
,
sequence_lod
,
utils
from
paddle.fluid.layers.utils
import
flatten
,
map_structure
from
paddle.framework
import
core
from
paddle.nn
import
Layer
from
paddle.nn
import
functional
as
F
from
paddle.nn
import
initializer
as
I
from
paddle.static
import
default_startup_program
,
program_guard
from
paddle.static
import
Variable
,
default_startup_program
,
program_guard
from
.container
import
LayerList
__all__
=
[]
def
rnn
(
cell
,
inputs
,
initial_states
=
None
,
sequence_length
=
None
,
time_major
=
False
,
is_reverse
=
False
,
**
kwargs
):
r
"""
rnn creates a recurrent neural network specified by RNNCell `cell`,
which performs :code:`cell.call()` (for dygraph mode :code:`cell.forward`)
repeatedly until reaches to the maximum length of `inputs`.
Parameters:
cell(RNNCellBase): An instance of `RNNCellBase`.
inputs(Tensor): the input sequences.
If time_major is True, the shape is
`[time_steps, batch_size, input_size]`
else the shape is `[batch_size, time_steps, input_size]`.
initial_states(Tensor|tuple|list, optional): the initial state of the
rnn cell. Tensor or a possibly nested structure of tensors. If not
provided, `cell.get_initial_states` would be called to produce
the initial state. Defaults to None.
sequence_length (Tensor, optional): shape `[batch_size]`, dtype: int64
or int32. The valid lengths of input sequences. Defaults to None.
If `sequence_length` is not None, the inputs are treated as
padded sequences. In each input sequence, elements whose time step
index are not less than the valid length are treated as paddings.
time_major (bool, optional): Whether the first dimension of the input means the
time steps. Defaults to False.
is_reverse (bool, optional): Indicate whether to calculate in the reverse
order of input sequences. Defaults to False.
**kwargs: Additional keyword arguments to pass to `forward` of the cell.
Returns:
outputs (Tensor|list|tuple): the output sequence. Tensor or nested
structure of Tensors.
If `time_major` is True, the shape of each tensor in outpus is
`[time_steps, batch_size, hidden_size]`, else
`[batch_size, time_steps, hidden_size]`.
final_states (Tensor|list|tuple): final states. A (possibly nested structure of)
tensor[s], representing the final state for RNN. It has the same
structure of intial state. Each tensor in final states has the same
shape and dtype as the corresponding tensor in initial states.
Examples:
.. code-block:: python
import paddle
paddle.disable_static()
cell = paddle.nn.SimpleRNNCell(16, 32)
inputs = paddle.rand((4, 23, 16))
prev_h = paddle.randn((4, 32))
outputs, final_states = paddle.nn.layer.rnn(cell, inputs, prev_h)
"""
if
_non_static_mode
():
return
_rnn_dynamic_graph
(
cell
,
inputs
,
initial_states
,
sequence_length
,
time_major
,
is_reverse
,
**
kwargs
)
else
:
return
_rnn_static_graph
(
cell
,
inputs
,
initial_states
,
sequence_length
,
time_major
,
is_reverse
,
**
kwargs
)
class
ArrayWrapper
:
def
__init__
(
self
,
x
):
self
.
array
=
[
x
]
def
append
(
self
,
x
):
self
.
array
.
append
(
x
)
return
self
def
__getitem__
(
self
,
item
):
return
self
.
array
.
__getitem__
(
item
)
def
_maybe_copy
(
state
,
new_state
,
step_mask
):
"""update rnn state or just pass the old state through"""
new_state
=
paddle
.
tensor
.
math
.
_multiply_with_axis
(
new_state
,
step_mask
,
axis
=
0
)
+
paddle
.
tensor
.
math
.
_multiply_with_axis
(
state
,
(
1
-
step_mask
),
axis
=
0
)
return
new_state
def
_transpose_batch_time
(
x
):
perm
=
[
1
,
0
]
+
list
(
range
(
2
,
len
(
x
.
shape
)))
return
paddle
.
transpose
(
x
,
perm
)
def
_rnn_dynamic_graph
(
cell
,
inputs
,
initial_states
=
None
,
sequence_length
=
None
,
time_major
=
False
,
is_reverse
=
False
,
**
kwargs
):
time_step_index
=
0
if
time_major
else
1
flat_inputs
=
flatten
(
inputs
)
time_steps
=
flat_inputs
[
0
].
shape
[
time_step_index
]
if
initial_states
is
None
:
initial_states
=
cell
.
get_initial_states
(
batch_ref
=
inputs
,
batch_dim_idx
=
1
if
time_major
else
0
)
if
not
time_major
:
inputs
=
map_structure
(
_transpose_batch_time
,
inputs
)
if
sequence_length
is
not
None
:
mask
=
sequence_lod
.
sequence_mask
(
sequence_length
,
maxlen
=
time_steps
,
dtype
=
inputs
.
dtype
)
mask
=
paddle
.
transpose
(
mask
,
[
1
,
0
])
if
is_reverse
:
inputs
=
map_structure
(
lambda
x
:
paddle
.
reverse
(
x
,
axis
=
[
0
]),
inputs
)
mask
=
(
paddle
.
reverse
(
mask
,
axis
=
[
0
])
if
sequence_length
is
not
None
else
None
)
states
=
initial_states
outputs
=
[]
for
i
in
range
(
time_steps
):
step_inputs
=
map_structure
(
lambda
x
:
x
[
i
],
inputs
)
step_outputs
,
new_states
=
cell
(
step_inputs
,
states
,
**
kwargs
)
if
sequence_length
is
not
None
:
new_states
=
map_structure
(
partial
(
_maybe_copy
,
step_mask
=
mask
[
i
]),
states
,
new_states
)
states
=
new_states
outputs
=
(
map_structure
(
lambda
x
:
ArrayWrapper
(
x
),
step_outputs
)
if
i
==
0
else
map_structure
(
lambda
x
,
x_array
:
x_array
.
append
(
x
),
step_outputs
,
outputs
)
)
final_outputs
=
map_structure
(
lambda
x
:
paddle
.
stack
(
x
.
array
,
axis
=
time_step_index
),
outputs
)
if
is_reverse
:
final_outputs
=
map_structure
(
lambda
x
:
paddle
.
reverse
(
x
,
axis
=
time_step_index
),
final_outputs
)
final_states
=
new_states
return
final_outputs
,
final_states
def
_rnn_static_graph
(
cell
,
inputs
,
initial_states
=
None
,
sequence_length
=
None
,
time_major
=
False
,
is_reverse
=
False
,
**
kwargs
):
check_type
(
inputs
,
'inputs'
,
(
Variable
,
list
,
tuple
),
'rnn'
)
if
isinstance
(
inputs
,
(
list
,
tuple
)):
for
i
,
input_x
in
enumerate
(
inputs
):
check_variable_and_dtype
(
input_x
,
'inputs['
+
str
(
i
)
+
']'
,
[
'float32'
,
'float64'
],
'rnn'
)
check_type
(
initial_states
,
'initial_states'
,
(
Variable
,
list
,
tuple
,
type
(
None
)),
'rnn'
,
)
check_type
(
sequence_length
,
'sequence_length'
,
(
Variable
,
type
(
None
)),
'rnn'
)
def
_switch_grad
(
x
,
stop
=
False
):
x
.
stop_gradient
=
stop
return
x
if
initial_states
is
None
:
initial_states
=
cell
.
get_initial_states
(
batch_ref
=
inputs
,
batch_dim_idx
=
1
if
time_major
else
0
)
initial_states
=
map_structure
(
_switch_grad
,
initial_states
)
if
not
time_major
:
inputs
=
map_structure
(
_transpose_batch_time
,
inputs
)
if
sequence_length
:
max_seq_len
=
paddle
.
shape
(
flatten
(
inputs
)[
0
])[
0
]
mask
=
sequence_lod
.
sequence_mask
(
sequence_length
,
maxlen
=
max_seq_len
,
dtype
=
flatten
(
initial_states
)[
0
].
dtype
,
)
mask
=
paddle
.
transpose
(
mask
,
[
1
,
0
])
if
is_reverse
:
inputs
=
map_structure
(
lambda
x
:
paddle
.
reverse
(
x
,
axis
=
[
0
]),
inputs
)
mask
=
paddle
.
reverse
(
mask
,
axis
=
[
0
])
if
sequence_length
else
None
# StaticRNN
rnn
=
control_flow
.
StaticRNN
()
with
rnn
.
step
():
inputs
=
map_structure
(
rnn
.
step_input
,
inputs
)
states
=
map_structure
(
rnn
.
memory
,
initial_states
)
copy_states
=
map_structure
(
lambda
x
:
x
,
states
)
outputs
,
new_states
=
cell
(
inputs
,
copy_states
,
**
kwargs
)
utils
.
assert_same_structure
(
states
,
new_states
)
if
sequence_length
:
step_mask
=
rnn
.
step_input
(
mask
)
new_states
=
map_structure
(
partial
(
_maybe_copy
,
step_mask
=
step_mask
),
states
,
new_states
)
map_structure
(
rnn
.
update_memory
,
states
,
new_states
)
flat_outputs
=
flatten
(
outputs
)
map_structure
(
rnn
.
step_output
,
outputs
)
map_structure
(
rnn
.
step_output
,
new_states
)
rnn_out
=
rnn
()
final_outputs
=
rnn_out
[:
len
(
flat_outputs
)]
final_outputs
=
utils
.
pack_sequence_as
(
outputs
,
final_outputs
)
final_states
=
map_structure
(
lambda
x
:
x
[
-
1
],
rnn_out
[
len
(
flat_outputs
)
:])
final_states
=
utils
.
pack_sequence_as
(
new_states
,
final_states
)
if
is_reverse
:
final_outputs
=
map_structure
(
lambda
x
:
paddle
.
reverse
(
x
,
axis
=
[
0
]),
final_outputs
)
if
not
time_major
:
final_outputs
=
map_structure
(
_transpose_batch_time
,
final_outputs
)
return
(
final_outputs
,
final_states
)
def
birnn
(
cell_fw
,
cell_bw
,
inputs
,
initial_states
=
None
,
sequence_length
=
None
,
time_major
=
False
,
**
kwargs
):
r
"""
birnn creates a bidirectional recurrent neural network specified by
RNNCell `cell_fw` and `cell_bw`, which performs :code:`cell.call()`
(for dygraph mode :code:`cell.forward`) repeatedly until reaches to
the maximum length of `inputs` and then concat the outputs for both RNNs
along the last axis.
Parameters:
cell_fw(RNNCellBase): An instance of `RNNCellBase`.
cell_bw(RNNCellBase): An instance of `RNNCellBase`.
inputs(Tensor): the input sequences.
If time_major is True, the shape is
`[time_steps, batch_size, input_size]`
else the shape is `[batch_size, time_steps, input_size]`.
initial_states(tuple, optional): A tuple of initial states of
`cell_fw` and `cell_bw`.
If not provided, `cell.get_initial_states` would be called to
produce initial state for each cell. Defaults to None.
sequence_length (Tensor, optional): shape `[batch_size]`, dtype: int64
or int32. The valid lengths of input sequences. Defaults to None.
If `sequence_length` is not None, the inputs are treated as
padded sequences. In each input sequence, elements whose time step
index are not less than the valid length are treated as paddings.
time_major (bool): Whether the first dimension of the input means the
time steps. Defaults to False.
**kwargs: Additional keyword arguments to pass to `forward` of each cell.
Returns:
outputs (Tensor): the outputs of the bidirectional RNN. It is the
concatenation of the outputs from the forward RNN and backward
RNN along the last axis.
If time major is True, the shape is `[time_steps, batch_size, size]`,
else the shape is `[batch_size, time_steps, size]`, where size is
`cell_fw.hidden_size + cell_bw.hidden_size`.
final_states (tuple): A tuple of the final states of the forward
cell and backward cell.
Examples:
.. code-block:: python
import paddle
paddle.disable_static()
cell_fw = paddle.nn.LSTMCell(16, 32)
cell_bw = paddle.nn.LSTMCell(16, 32)
inputs = paddle.rand((4, 23, 16))
hf, cf = paddle.rand((4, 32)), paddle.rand((4, 32))
hb, cb = paddle.rand((4, 32)), paddle.rand((4, 32))
initial_states = ((hf, cf), (hb, cb))
outputs, final_states = paddle.nn.layer.birnn(
cell_fw, cell_bw, inputs, initial_states)
"""
if
initial_states
is
None
:
states_fw
=
cell_fw
.
get_initial_states
(
batch_ref
=
inputs
,
batch_dim_idx
=
1
if
time_major
else
0
)
states_bw
=
cell_fw
.
get_initial_states
(
batch_ref
=
inputs
,
batch_dim_idx
=
1
if
time_major
else
0
)
else
:
states_fw
,
states_bw
=
initial_states
outputs_fw
,
states_fw
=
rnn
(
cell_fw
,
inputs
,
states_fw
,
sequence_length
,
time_major
=
time_major
,
**
kwargs
)
outputs_bw
,
states_bw
=
rnn
(
cell_bw
,
inputs
,
states_bw
,
sequence_length
,
time_major
=
time_major
,
is_reverse
=
True
,
**
kwargs
)
outputs
=
map_structure
(
lambda
x
,
y
:
paddle
.
concat
([
x
,
y
],
-
1
),
outputs_fw
,
outputs_bw
)
final_states
=
(
states_fw
,
states_bw
)
return
outputs
,
final_states
def
split_states
(
states
,
bidirectional
=
False
,
state_components
=
1
):
r
"""
Split states of RNN network into possibly nested list or tuple of
...
...
@@ -779,7 +1142,7 @@ class RNN(Layer):
def
forward
(
self
,
inputs
,
initial_states
=
None
,
sequence_length
=
None
,
**
kwargs
):
final_outputs
,
final_states
=
paddle
.
fluid
.
layers
.
rnn
(
final_outputs
,
final_states
=
rnn
(
self
.
cell
,
inputs
,
initial_states
=
initial_states
,
...
...
@@ -866,7 +1229,7 @@ class BiRNN(Layer):
len
(
initial_states
)
==
2
),
"length of initial_states should be 2 when it is a list/tuple"
outputs
,
final_states
=
paddle
.
fluid
.
layers
.
birnn
(
outputs
,
final_states
=
birnn
(
self
.
cell_fw
,
self
.
cell_bw
,
inputs
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录