Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
acee3dd3
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
acee3dd3
编写于
12月 13, 2022
作者:
L
lugimzzz
提交者:
GitHub
12月 13, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[fluid clean] remove 4 fluid.layers api and imigrate 2 fluid.layer api (#48972)
* fluid clean layer * docs
上级
b06a5946
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
431 addition
and
1910 deletion
+431
-1910
python/paddle/fluid/layers/rnn.py
python/paddle/fluid/layers/rnn.py
+0
-903
python/paddle/fluid/tests/unittests/test_layers.py
python/paddle/fluid/tests/unittests/test_layers.py
+0
-20
python/paddle/fluid/tests/unittests/test_lstm_unit_op.py
python/paddle/fluid/tests/unittests/test_lstm_unit_op.py
+0
-77
python/paddle/fluid/tests/unittests/test_rnn_cell_api.py
python/paddle/fluid/tests/unittests/test_rnn_cell_api.py
+62
-566
python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
+0
-338
python/paddle/nn/layer/rnn.py
python/paddle/nn/layer/rnn.py
+369
-6
未找到文件。
python/paddle/fluid/layers/rnn.py
浏览文件 @
acee3dd3
...
...
@@ -36,503 +36,15 @@ from ..data_feeder import check_variable_and_dtype, check_type, check_dtype
from
collections.abc
import
Sequence
__all__
=
[
'RNNCell'
,
'GRUCell'
,
'LSTMCell'
,
'rnn'
,
'birnn'
,
'dynamic_decode'
,
'dynamic_lstm'
,
'dynamic_lstmp'
,
'dynamic_gru'
,
'gru_unit'
,
'lstm_unit'
,
'lstm'
,
]
class
RNNCell
:
"""
:api_attr: Static Graph
RNNCell is the base class for abstraction representing the calculations
mapping the input and state to the output and new state. It is suitable to
and mostly used in RNN.
"""
def
call
(
self
,
inputs
,
states
,
**
kwargs
):
r
"""
Every cell must implement this method to do the calculations mapping the
inputs and states to the output and new states.
To be more flexible, both inputs and states can be a tensor variable or
a nested structure (list|tuple|namedtuple|dict) of tensor variable, that
is, a (possibly nested structure of) tensor variable[s].
Parameters:
inputs: A (possibly nested structure of) tensor variable[s].
states: A (possibly nested structure of) tensor variable[s].
**kwargs: Additional keyword arguments, provided by the caller.
Returns:
tuple: outputs and new_states pair. outputs and new_states both \
can be nested structure of tensor variables. new_states must \
have the same structure with states.
"""
raise
NotImplementedError
(
"RNNCell must implent the call function."
)
def
__call__
(
self
,
inputs
,
states
,
**
kwargs
):
return
self
.
call
(
inputs
,
states
,
**
kwargs
)
def
get_initial_states
(
self
,
batch_ref
,
shape
=
None
,
dtype
=
'float32'
,
init_value
=
0
,
batch_dim_idx
=
0
,
):
r
"""
Generate initialized states according to provided shape, data type and
value.
Parameters:
batch_ref: A (possibly nested structure of) tensor variable[s].
The first dimension of the tensor will be used as batch size to
initialize states.
shape: A (possibly nested structure of) shape[s], where a shape is
represented as a list/tuple of integer). -1(for batch size) will
beautomatically inserted if shape is not started with it. If None,
property `state_shape` will be used. The default value is None.
dtype: A (possibly nested structure of) data type[s]. The structure
must be same as that of `shape`, except when all tensors' in states
has the same data type, a single data type can be used. If
property `cell.state_shape` is not available, float32 will be used
as the data type. The default value is float32.
init_value: A float value used to initialize states.
batch_dim_idx: An integer indicating which dimension of the tensor in
inputs represents batch size. The default value is 0.
Returns:
Variable: tensor variable[s] packed in the same structure provided \
by shape, representing the initialized states.
"""
check_variable_and_dtype
(
batch_ref
,
'batch_ref'
,
[
'float32'
,
'float64'
,
'int32'
,
'int64'
],
'RNNCell'
,
)
check_type
(
shape
,
'shape'
,
(
list
,
tuple
,
type
(
None
),
int
),
'RNNCell'
)
if
isinstance
(
shape
,
(
list
,
tuple
)):
shapes
=
map_structure
(
lambda
x
:
x
,
shape
)
if
isinstance
(
shape
,
list
):
for
i
,
_shape
in
enumerate
(
shapes
):
check_type
(
_shape
,
'shapes['
+
str
(
i
)
+
']'
,
int
,
'RNNCell'
)
else
:
check_type
(
shapes
,
'shapes'
,
int
,
'RNNCell'
)
check_dtype
(
dtype
,
'dtype'
,
[
'float32'
,
'float64'
],
'RNNCell'
)
# TODO: use inputs and batch_size
batch_ref
=
flatten
(
batch_ref
)[
0
]
def
_is_shape_sequence
(
seq
):
"""For shape, list/tuple of integer is the finest-grained objection"""
if
isinstance
(
seq
,
list
)
or
isinstance
(
seq
,
tuple
):
if
reduce
(
lambda
flag
,
x
:
isinstance
(
x
,
int
)
and
flag
,
seq
,
True
):
return
False
# TODO: Add check for the illegal
if
isinstance
(
seq
,
dict
):
return
True
return
isinstance
(
seq
,
Sequence
)
and
not
isinstance
(
seq
,
str
)
class
Shape
:
def
__init__
(
self
,
shape
):
self
.
shape
=
shape
if
shape
[
0
]
==
-
1
else
([
-
1
]
+
list
(
shape
))
# nested structure of shapes
states_shapes
=
self
.
state_shape
if
shape
is
None
else
shape
is_sequence_ori
=
utils
.
is_sequence
utils
.
is_sequence
=
_is_shape_sequence
states_shapes
=
map_structure
(
lambda
shape
:
Shape
(
shape
),
states_shapes
)
utils
.
is_sequence
=
is_sequence_ori
# nested structure of dtypes
try
:
states_dtypes
=
self
.
state_dtype
if
dtype
is
None
else
dtype
except
NotImplementedError
:
# use fp32 as default
states_dtypes
=
"float32"
if
len
(
flatten
(
states_dtypes
))
==
1
:
dtype
=
flatten
(
states_dtypes
)[
0
]
states_dtypes
=
map_structure
(
lambda
shape
:
dtype
,
states_shapes
)
init_states
=
map_structure
(
lambda
shape
,
dtype
:
tensor
.
fill_constant_batch_size_like
(
input
=
batch_ref
,
shape
=
shape
.
shape
,
dtype
=
dtype
,
value
=
init_value
,
input_dim_idx
=
batch_dim_idx
,
),
states_shapes
,
states_dtypes
,
)
return
init_states
@
property
def
state_shape
(
self
):
"""
Abstract method (property).
Used to initialize states.
A (possibly nested structure of) shape[s], where a shape is represented
as a list/tuple of integers (-1 for batch size would be automatically
inserted into a shape if shape is not started with it).
Not necessary to be implemented if states are not initialized by
`get_initial_states` or the `shape` argument is provided when using
`get_initial_states`.
"""
raise
NotImplementedError
(
"Please add implementaion for `state_shape` in the used cell."
)
@
property
def
state_dtype
(
self
):
"""
Abstract method (property).
Used to initialize states.
A (possibly nested structure of) data types[s]. The structure must be
same as that of `shape`, except when all tensors' in states has the same
data type, a single data type can be used.
Not necessary to be implemented if states are not initialized
by `get_initial_states` or the `dtype` argument is provided when using
`get_initial_states`.
"""
raise
NotImplementedError
(
"Please add implementaion for `state_dtype` in the used cell."
)
class
GRUCell
(
RNNCell
):
r
"""
:api_attr: Static Graph
Gated Recurrent Unit cell. It is a wrapper for
`fluid.contrib.layers.rnn_impl.BasicGRUUnit` to make it adapt to RNNCell.
The formula used is as follow:
.. math::
u_t & = act_g(W_{ux}x_{t} + W_{uh}h_{t-1} + b_u)
r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r)
\\tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c)
h_t & = u_t \odot h_{t-1} + (1-u_t) \odot \\tilde{h_t}
For more details, please refer to `Learning Phrase Representations using
RNN Encoder Decoder for Statistical Machine Translation <https://arxiv.org/pdf/1406.1078.pdf>`_
Examples:
.. code-block:: python
import paddle.fluid.layers as layers
cell = layers.GRUCell(hidden_size=256)
"""
def
__init__
(
self
,
hidden_size
,
param_attr
=
None
,
bias_attr
=
None
,
gate_activation
=
None
,
activation
=
None
,
dtype
=
"float32"
,
name
=
"GRUCell"
,
):
"""
Constructor of GRUCell.
Parameters:
hidden_size (int): The hidden size in the GRU cell.
param_attr(ParamAttr, optional): The parameter attribute for the learnable
weight matrix. Default: None.
bias_attr (ParamAttr, optional): The parameter attribute for the bias
of GRU. Default: None.
gate_activation (function, optional): The activation function for :math:`act_g`.
Default: `fluid.layers.sigmoid`.
activation (function, optional): The activation function for :math:`act_c`.
Default: `fluid.layers.tanh`.
dtype(string, optional): The data type used in this cell. Default float32.
name(string, optional) : The name scope used to identify parameters and biases.
"""
check_type
(
hidden_size
,
'hidden_size'
,
(
int
),
'GRUCell'
)
check_dtype
(
dtype
,
'dtype'
,
[
'float32'
,
'float64'
],
'GRUCell'
)
self
.
hidden_size
=
hidden_size
from
..
import
contrib
# TODO: resolve recurrent import
self
.
gru_unit
=
contrib
.
layers
.
rnn_impl
.
BasicGRUUnit
(
name
,
hidden_size
,
param_attr
,
bias_attr
,
gate_activation
,
activation
,
dtype
,
)
def
call
(
self
,
inputs
,
states
):
r
"""
Perform calculations of GRU.
Parameters:
inputs(Variable): A tensor with shape `[batch_size, input_size]`,
corresponding to :math:`x_t` in the formula. The data type
should be float32 or float64.
states(Variable): A tensor with shape `[batch_size, hidden_size]`.
corresponding to :math:`h_{t-1}` in the formula. The data type
should be float32 or float64.
Returns:
tuple: A tuple( :code:`(outputs, new_states)` ), where `outputs` and \
`new_states` is the same tensor shaped `[batch_size, hidden_size]`, \
corresponding to :math:`h_t` in the formula. The data type of the \
tensor is same as that of `states`.
"""
check_variable_and_dtype
(
inputs
,
'inputs'
,
[
'float32'
,
'float64'
],
'GRUCell'
)
check_variable_and_dtype
(
states
,
'states'
,
[
'float32'
,
'float64'
],
'GRUCell'
)
new_hidden
=
self
.
gru_unit
(
inputs
,
states
)
return
new_hidden
,
new_hidden
@
property
def
state_shape
(
self
):
"""
The `state_shape` of GRUCell is a shape `[hidden_size]` (-1 for batch
size would be automatically inserted into shape). The shape corresponds
to :math:`h_{t-1}`.
"""
return
[
self
.
hidden_size
]
class
LSTMCell
(
RNNCell
):
r
"""
:api_attr: Static Graph
Long-Short Term Memory cell. It is a wrapper for
`fluid.contrib.layers.rnn_impl.BasicLSTMUnit` to make it adapt to RNNCell.
The formula used is as follow:
.. math::
i_{t} & = act_g(W_{x_{i}}x_{t} + W_{h_{i}}h_{t-1} + b_{i})
f_{t} & = act_g(W_{x_{f}}x_{t} + W_{h_{f}}h_{t-1} + b_{f} + forget\\_bias)
c_{t} & = f_{t}c_{t-1} + i_{t} act_c (W_{x_{c}}x_{t} + W_{h_{c}}h_{t-1} + b_{c})
o_{t} & = act_g(W_{x_{o}}x_{t} + W_{h_{o}}h_{t-1} + b_{o})
h_{t} & = o_{t} act_c (c_{t})
For more details, please refer to `RECURRENT NEURAL NETWORK REGULARIZATION <http://arxiv.org/abs/1409.2329>`_
Examples:
.. code-block:: python
import paddle.fluid.layers as layers
cell = layers.LSTMCell(hidden_size=256)
"""
def
__init__
(
self
,
hidden_size
,
param_attr
=
None
,
bias_attr
=
None
,
gate_activation
=
None
,
activation
=
None
,
forget_bias
=
1.0
,
dtype
=
"float32"
,
name
=
"LSTMCell"
,
):
"""
Constructor of LSTMCell.
Parameters:
hidden_size (int): The hidden size in the LSTM cell.
param_attr(ParamAttr, optional): The parameter attribute for the learnable
weight matrix. Default: None.
bias_attr (ParamAttr, optional): The parameter attribute for the bias
of LSTM. Default: None.
gate_activation (function, optional): The activation function for :math:`act_g`.
Default: 'fluid.layers.sigmoid'.
activation (function, optional): The activation function for :math:`act_h`.
Default: 'fluid.layers.tanh'.
forget_bias(float, optional): forget bias used when computing forget gate.
Default 1.0
dtype(string, optional): The data type used in this cell. Default float32.
name(string, optional) : The name scope used to identify parameters and biases.
"""
check_type
(
hidden_size
,
'hidden_size'
,
(
int
),
'LSTMCell'
)
check_dtype
(
dtype
,
'dtype'
,
[
'float32'
,
'float64'
],
'LSTMCell'
)
self
.
hidden_size
=
hidden_size
from
..
import
contrib
# TODO: resolve recurrent import
self
.
lstm_unit
=
contrib
.
layers
.
rnn_impl
.
BasicLSTMUnit
(
name
,
hidden_size
,
param_attr
,
bias_attr
,
gate_activation
,
activation
,
forget_bias
,
dtype
,
)
def
call
(
self
,
inputs
,
states
):
r
"""
Perform calculations of LSTM.
Parameters:
inputs(Variable): A tensor with shape `[batch_size, input_size]`,
corresponding to :math:`x_t` in the formula. The data type
should be float32 or float64.
states(Variable): A list of containing two tensors, each shaped
`[batch_size, hidden_size]`, corresponding to :math:`h_{t-1}, c_{t-1}`
in the formula. The data type should be float32 or float64.
Returns:
tuple: A tuple( :code:`(outputs, new_states)` ), where `outputs` is \
a tensor with shape `[batch_size, hidden_size]`, corresponding \
to :math:`h_{t}` in the formula; `new_states` is a list containing \
two tenser variables shaped `[batch_size, hidden_size]`, corresponding \
to :math:`h_{t}, c_{t}` in the formula. The data type of these \
tensors all is same as that of `states`.
"""
check_variable_and_dtype
(
inputs
,
'inputs'
,
[
'float32'
,
'float64'
],
'LSTMCell'
)
check_type
(
states
,
'states'
,
list
,
'LSTMCell'
)
if
isinstance
(
states
,
list
):
for
i
,
state
in
enumerate
(
states
):
check_variable_and_dtype
(
state
,
'state['
+
str
(
i
)
+
']'
,
[
'float32'
,
'float64'
],
'LSTMCell'
,
)
pre_hidden
,
pre_cell
=
states
new_hidden
,
new_cell
=
self
.
lstm_unit
(
inputs
,
pre_hidden
,
pre_cell
)
return
new_hidden
,
[
new_hidden
,
new_cell
]
@
property
def
state_shape
(
self
):
"""
The `state_shape` of LSTMCell is a list with two shapes: `[[hidden_size], [hidden_size]]`
(-1 for batch size would be automatically inserted into shape). These two
shapes correspond to :math:`h_{t-1}` and :math:`c_{t-1}` separately.
"""
return
[[
self
.
hidden_size
],
[
self
.
hidden_size
]]
def
rnn
(
cell
,
inputs
,
initial_states
=
None
,
sequence_length
=
None
,
time_major
=
False
,
is_reverse
=
False
,
**
kwargs
):
"""
rnn creates a recurrent neural network specified by RNNCell `cell`,
which performs :code:`cell.call()` (for dygraph mode :code:`cell.forward`)
repeatedly until reaches to the maximum length of `inputs`.
Arguments:
cell(RNNCellBase): An instance of `RNNCellBase`.
inputs(Tensor): the input sequences.
If time_major is True, the shape is
`[time_steps, batch_size, input_size]`
else the shape is `[batch_size, time_steps, input_size]`.
initial_states(Tensor|tuple|list, optional): the initial state of the
rnn cell. Tensor or a possibly nested structure of tensors. If not
provided, `cell.get_initial_states` would be called to produce
the initial state. Defaults to None.
sequence_length (Tensor, optional): shape `[batch_size]`, dtype: int64
or int32. The valid lengths of input sequences. Defaults to None.
If `sequence_length` is not None, the inputs are treated as
padded sequences. In each input sequence, elements whose time step
index are not less than the valid length are treated as paddings.
time_major (bool): Whether the first dimension of the input means the
time steps. Defaults to False.
is_reverse (bool, optional): Indicate whether to calculate in the reverse
order of input sequences. Defaults to False.
**kwargs: Additional keyword arguments to pass to `forward` of the cell.
Returns:
(outputs, final_states)
outputs (Tensor|list|tuple): the output sequence. Tensor or nested
structure of Tensors.
If `time_major` is True, the shape of each tensor in outpus is
`[time_steps, batch_size, hidden_size]`, else
`[batch_size, time_steps, hidden_size]`.
final_states (Tensor|list|tuple): final states. A (possibly nested structure of)
tensor[s], representing the final state for RNN. It has the same
structure of intial state. Each tensor in final states has the same
shape and dtype as the corresponding tensor in initial states.
Examples:
.. code-block:: python
import paddle
paddle.disable_static()
cell = paddle.nn.SimpleRNNCell(16, 32)
inputs = paddle.rand((4, 23, 16))
prev_h = paddle.randn((4, 32))
outputs, final_states = paddle.fluid.layers.rnn(cell, inputs, prev_h)
"""
if
_non_static_mode
():
return
_rnn_dynamic_graph
(
cell
,
inputs
,
initial_states
,
sequence_length
,
time_major
,
is_reverse
,
**
kwargs
)
else
:
return
_rnn_static_graph
(
cell
,
inputs
,
initial_states
,
sequence_length
,
time_major
,
is_reverse
,
**
kwargs
)
class
ArrayWrapper
:
def
__init__
(
self
,
x
):
self
.
array
=
[
x
]
...
...
@@ -545,273 +57,6 @@ class ArrayWrapper:
return
self
.
array
.
__getitem__
(
item
)
def
_maybe_copy
(
state
,
new_state
,
step_mask
):
"""update rnn state or just pass the old state through"""
new_state
=
paddle
.
tensor
.
math
.
_multiply_with_axis
(
new_state
,
step_mask
,
axis
=
0
)
+
paddle
.
tensor
.
math
.
_multiply_with_axis
(
state
,
(
1
-
step_mask
),
axis
=
0
)
return
new_state
def
_transpose_batch_time
(
x
):
perm
=
[
1
,
0
]
+
list
(
range
(
2
,
len
(
x
.
shape
)))
return
paddle
.
transpose
(
x
,
perm
)
def
_rnn_dynamic_graph
(
cell
,
inputs
,
initial_states
=
None
,
sequence_length
=
None
,
time_major
=
False
,
is_reverse
=
False
,
**
kwargs
):
time_step_index
=
0
if
time_major
else
1
flat_inputs
=
flatten
(
inputs
)
time_steps
=
flat_inputs
[
0
].
shape
[
time_step_index
]
if
initial_states
is
None
:
initial_states
=
cell
.
get_initial_states
(
batch_ref
=
inputs
,
batch_dim_idx
=
1
if
time_major
else
0
)
if
not
time_major
:
inputs
=
map_structure
(
_transpose_batch_time
,
inputs
)
if
sequence_length
is
not
None
:
mask
=
sequence_lod
.
sequence_mask
(
sequence_length
,
maxlen
=
time_steps
,
dtype
=
inputs
.
dtype
)
mask
=
paddle
.
transpose
(
mask
,
[
1
,
0
])
if
is_reverse
:
inputs
=
map_structure
(
lambda
x
:
paddle
.
reverse
(
x
,
axis
=
[
0
]),
inputs
)
mask
=
(
paddle
.
reverse
(
mask
,
axis
=
[
0
])
if
sequence_length
is
not
None
else
None
)
states
=
initial_states
outputs
=
[]
for
i
in
range
(
time_steps
):
step_inputs
=
map_structure
(
lambda
x
:
x
[
i
],
inputs
)
step_outputs
,
new_states
=
cell
(
step_inputs
,
states
,
**
kwargs
)
if
sequence_length
is
not
None
:
new_states
=
map_structure
(
partial
(
_maybe_copy
,
step_mask
=
mask
[
i
]),
states
,
new_states
)
states
=
new_states
outputs
=
(
map_structure
(
lambda
x
:
ArrayWrapper
(
x
),
step_outputs
)
if
i
==
0
else
map_structure
(
lambda
x
,
x_array
:
x_array
.
append
(
x
),
step_outputs
,
outputs
)
)
final_outputs
=
map_structure
(
lambda
x
:
paddle
.
stack
(
x
.
array
,
axis
=
time_step_index
),
outputs
)
if
is_reverse
:
final_outputs
=
map_structure
(
lambda
x
:
paddle
.
reverse
(
x
,
axis
=
time_step_index
),
final_outputs
)
final_states
=
new_states
return
final_outputs
,
final_states
def
_rnn_static_graph
(
cell
,
inputs
,
initial_states
=
None
,
sequence_length
=
None
,
time_major
=
False
,
is_reverse
=
False
,
**
kwargs
):
check_type
(
inputs
,
'inputs'
,
(
Variable
,
list
,
tuple
),
'rnn'
)
if
isinstance
(
inputs
,
(
list
,
tuple
)):
for
i
,
input_x
in
enumerate
(
inputs
):
check_variable_and_dtype
(
input_x
,
'inputs['
+
str
(
i
)
+
']'
,
[
'float32'
,
'float64'
],
'rnn'
)
check_type
(
initial_states
,
'initial_states'
,
(
Variable
,
list
,
tuple
,
type
(
None
)),
'rnn'
,
)
check_type
(
sequence_length
,
'sequence_length'
,
(
Variable
,
type
(
None
)),
'rnn'
)
def
_switch_grad
(
x
,
stop
=
False
):
x
.
stop_gradient
=
stop
return
x
if
initial_states
is
None
:
initial_states
=
cell
.
get_initial_states
(
batch_ref
=
inputs
,
batch_dim_idx
=
1
if
time_major
else
0
)
initial_states
=
map_structure
(
_switch_grad
,
initial_states
)
if
not
time_major
:
inputs
=
map_structure
(
_transpose_batch_time
,
inputs
)
if
sequence_length
:
max_seq_len
=
paddle
.
shape
(
flatten
(
inputs
)[
0
])[
0
]
mask
=
sequence_lod
.
sequence_mask
(
sequence_length
,
maxlen
=
max_seq_len
,
dtype
=
flatten
(
initial_states
)[
0
].
dtype
,
)
mask
=
paddle
.
transpose
(
mask
,
[
1
,
0
])
if
is_reverse
:
inputs
=
map_structure
(
lambda
x
:
paddle
.
reverse
(
x
,
axis
=
[
0
]),
inputs
)
mask
=
paddle
.
reverse
(
mask
,
axis
=
[
0
])
if
sequence_length
else
None
# StaticRNN
rnn
=
control_flow
.
StaticRNN
()
with
rnn
.
step
():
inputs
=
map_structure
(
rnn
.
step_input
,
inputs
)
states
=
map_structure
(
rnn
.
memory
,
initial_states
)
copy_states
=
map_structure
(
lambda
x
:
x
,
states
)
outputs
,
new_states
=
cell
(
inputs
,
copy_states
,
**
kwargs
)
assert_same_structure
(
states
,
new_states
)
if
sequence_length
:
step_mask
=
rnn
.
step_input
(
mask
)
new_states
=
map_structure
(
partial
(
_maybe_copy
,
step_mask
=
step_mask
),
states
,
new_states
)
map_structure
(
rnn
.
update_memory
,
states
,
new_states
)
flat_outputs
=
flatten
(
outputs
)
map_structure
(
rnn
.
step_output
,
outputs
)
map_structure
(
rnn
.
step_output
,
new_states
)
rnn_out
=
rnn
()
final_outputs
=
rnn_out
[:
len
(
flat_outputs
)]
final_outputs
=
pack_sequence_as
(
outputs
,
final_outputs
)
final_states
=
map_structure
(
lambda
x
:
x
[
-
1
],
rnn_out
[
len
(
flat_outputs
)
:])
final_states
=
pack_sequence_as
(
new_states
,
final_states
)
if
is_reverse
:
final_outputs
=
map_structure
(
lambda
x
:
paddle
.
reverse
(
x
,
axis
=
[
0
]),
final_outputs
)
if
not
time_major
:
final_outputs
=
map_structure
(
_transpose_batch_time
,
final_outputs
)
return
(
final_outputs
,
final_states
)
def
birnn
(
cell_fw
,
cell_bw
,
inputs
,
initial_states
=
None
,
sequence_length
=
None
,
time_major
=
False
,
**
kwargs
):
"""
birnn creates a bidirectional recurrent neural network specified by
RNNCell `cell_fw` and `cell_bw`, which performs :code:`cell.call()`
(for dygraph mode :code:`cell.forward`) repeatedly until reaches to
the maximum length of `inputs` and then concat the outputs for both RNNs
along the last axis.
Arguments:
cell_fw(RNNCellBase): An instance of `RNNCellBase`.
cell_bw(RNNCellBase): An instance of `RNNCellBase`.
inputs(Tensor): the input sequences.
If time_major is True, the shape is
`[time_steps, batch_size, input_size]`
else the shape is `[batch_size, time_steps, input_size]`.
initial_states(tuple, optional): A tuple of initial states of
`cell_fw` and `cell_bw`.
If not provided, `cell.get_initial_states` would be called to
produce initial state for each cell. Defaults to None.
sequence_length (Tensor, optional): shape `[batch_size]`, dtype: int64
or int32. The valid lengths of input sequences. Defaults to None.
If `sequence_length` is not None, the inputs are treated as
padded sequences. In each input sequence, elements whose time step
index are not less than the valid length are treated as paddings.
time_major (bool): Whether the first dimension of the input means the
time steps. Defaults to False.
**kwargs: Additional keyword arguments to pass to `forward` of each cell.
Returns:
(outputs, final_states)
outputs (Tensor): the outputs of the bidirectional RNN. It is the
concatenation of the outputs from the forward RNN and backward
RNN along the last axis.
If time major is True, the shape is `[time_steps, batch_size, size]`,
else the shape is `[batch_size, time_steps, size]`, where size is
`cell_fw.hidden_size + cell_bw.hidden_size`.
final_states (tuple): A tuple of the final states of the forward
cell and backward cell.
Examples:
.. code-block:: python
import paddle
paddle.disable_static()
cell_fw = paddle.nn.LSTMCell(16, 32)
cell_bw = paddle.nn.LSTMCell(16, 32)
inputs = paddle.rand((4, 23, 16))
hf, cf = paddle.rand((4, 32)), paddle.rand((4, 32))
hb, cb = paddle.rand((4, 32)), paddle.rand((4, 32))
initial_states = ((hf, cf), (hb, cb))
outputs, final_states = paddle.fluid.layers.birnn(
cell_fw, cell_bw, inputs, initial_states)
"""
if
initial_states
is
None
:
states_fw
=
cell_fw
.
get_initial_states
(
batch_ref
=
inputs
,
batch_dim_idx
=
1
if
time_major
else
0
)
states_bw
=
cell_fw
.
get_initial_states
(
batch_ref
=
inputs
,
batch_dim_idx
=
1
if
time_major
else
0
)
else
:
states_fw
,
states_bw
=
initial_states
outputs_fw
,
states_fw
=
rnn
(
cell_fw
,
inputs
,
states_fw
,
sequence_length
,
time_major
=
time_major
,
**
kwargs
)
outputs_bw
,
states_bw
=
rnn
(
cell_bw
,
inputs
,
states_bw
,
sequence_length
,
time_major
=
time_major
,
is_reverse
=
True
,
**
kwargs
)
outputs
=
map_structure
(
lambda
x
,
y
:
tensor
.
concat
([
x
,
y
],
-
1
),
outputs_fw
,
outputs_bw
)
final_states
=
(
states_fw
,
states_bw
)
return
outputs
,
final_states
def
_dynamic_decode_imperative
(
decoder
,
inits
=
None
,
...
...
@@ -2175,151 +1420,3 @@ def gru_unit(
)
return
updated_hidden
,
reset_hidden_pre
,
gate
def
lstm_unit
(
x_t
,
hidden_t_prev
,
cell_t_prev
,
forget_bias
=
0.0
,
param_attr
=
None
,
bias_attr
=
None
,
name
=
None
,
):
r
"""
:api_attr: Static Graph
Long-Short Term Memory (LSTM) RNN cell. This operator performs LSTM calculations for
one time step, whose implementation is based on calculations described in `RECURRENT
NEURAL NETWORK REGULARIZATION <http://arxiv.org/abs/1409.2329>`_ .
We add forget_bias to the biases of the forget gate in order to
reduce the scale of forgetting. The formula is as follows:
.. math::
i_{t} & = \sigma(W_{x_{i}}x_{t} + W_{h_{i}}h_{t-1} + b_{i})
f_{t} & = \sigma(W_{x_{f}}x_{t} + W_{h_{f}}h_{t-1} + b_{f} + forget\\_bias)
c_{t} & = f_{t}c_{t-1} + i_{t} tanh (W_{x_{c}}x_{t} + W_{h_{c}}h_{t-1} + b_{c})
o_{t} & = \sigma(W_{x_{o}}x_{t} + W_{h_{o}}h_{t-1} + b_{o})
h_{t} & = o_{t} tanh (c_{t})
:math:`x_{t}` stands for ``x_t`` , corresponding to the input of current time step;
:math:`h_{t-1}` and :math:`c_{t-1}` correspond to ``hidden_t_prev`` and ``cell_t_prev`` ,
representing the output of from previous time step.
:math:`i_{t}, f_{t}, c_{t}, o_{t}, h_{t}` are input gate, forget gate, cell, output gate
and hidden calculation.
Args:
x_t(Variable): A 2D Tensor representing the input of current time step.
Its shape should be :math:`[N, M]` , where :math:`N` stands for batch
size, :math:`M` for the feature size of input. The data type should
be float32 or float64.
hidden_t_prev(Variable): A 2D Tensor representing the hidden value from
previous step. Its shape should be :math:`[N, D]` , where :math:`N`
stands for batch size, :math:`D` for the hidden size. The data type
should be same as ``x_t`` .
cell_t_prev(Variable): A 2D Tensor representing the cell value from
previous step. It has the same shape and data type with ``hidden_t_prev`` .
forget_bias (float, optional): :math:`forget\\_bias` added to the biases
of the forget gate. Default 0.
param_attr(ParamAttr, optional): To specify the weight parameter property.
Default: None, which means the default weight parameter property is used.
See usage for details in :ref:`api_fluid_ParamAttr` .
bias_attr (ParamAttr, optional): To specify the bias parameter property.
Default: None, which means the default bias parameter property is used.
See usage for details in :ref:`api_fluid_ParamAttr` .
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
tuple: The tuple contains two Tensor variables with the same shape and \
data type with ``hidden_t_prev`` , representing the hidden value and \
cell value which correspond to :math:`h_{t}` and :math:`c_{t}` in \
the formula.
Raises:
ValueError: Rank of x_t must be 2.
ValueError: Rank of hidden_t_prev must be 2.
ValueError: Rank of cell_t_prev must be 2.
ValueError: The 1st dimensions of x_t, hidden_t_prev and cell_t_prev must be the same.
ValueError: The 2nd dimensions of hidden_t_prev and cell_t_prev must be the same.
Examples:
.. code-block:: python
import paddle.fluid as fluid
dict_dim, emb_dim, hidden_dim = 128, 64, 512
data = fluid.data(name='step_data', shape=[None], dtype='int64')
x = fluid.embedding(input=data, size=[dict_dim, emb_dim])
pre_hidden = fluid.data(
name='pre_hidden', shape=[None, hidden_dim], dtype='float32')
pre_cell = fluid.data(
name='pre_cell', shape=[None, hidden_dim], dtype='float32')
hidden = fluid.layers.lstm_unit(
x_t=x,
hidden_t_prev=pre_hidden,
cell_t_prev=pre_cell)
"""
helper
=
LayerHelper
(
'lstm_unit'
,
**
locals
())
check_variable_and_dtype
(
x_t
,
'x_t'
,
[
'float32'
,
'float64'
],
'lstm_unit'
)
check_variable_and_dtype
(
hidden_t_prev
,
'hidden_t_prev'
,
[
'float32'
,
'float64'
],
'lstm_unit'
)
check_variable_and_dtype
(
cell_t_prev
,
'cell_t_prev'
,
[
'float32'
,
'float64'
],
'lstm_unit'
)
if
len
(
x_t
.
shape
)
!=
2
:
raise
ValueError
(
"Rank of x_t must be 2."
)
if
len
(
hidden_t_prev
.
shape
)
!=
2
:
raise
ValueError
(
"Rank of hidden_t_prev must be 2."
)
if
len
(
cell_t_prev
.
shape
)
!=
2
:
raise
ValueError
(
"Rank of cell_t_prev must be 2."
)
if
(
x_t
.
shape
[
0
]
!=
hidden_t_prev
.
shape
[
0
]
or
x_t
.
shape
[
0
]
!=
cell_t_prev
.
shape
[
0
]
):
raise
ValueError
(
"The 1st dimensions of x_t, hidden_t_prev and "
"cell_t_prev must be the same."
)
if
hidden_t_prev
.
shape
[
1
]
!=
cell_t_prev
.
shape
[
1
]:
raise
ValueError
(
"The 2nd dimensions of hidden_t_prev and "
"cell_t_prev must be the same."
)
if
bias_attr
is
None
:
bias_attr
=
ParamAttr
()
size
=
cell_t_prev
.
shape
[
1
]
concat_out
=
nn
.
concat
(
input
=
[
x_t
,
hidden_t_prev
],
axis
=
1
)
fc_out
=
nn
.
fc
(
input
=
concat_out
,
size
=
4
*
size
,
param_attr
=
param_attr
,
bias_attr
=
bias_attr
,
)
dtype
=
x_t
.
dtype
c
=
helper
.
create_variable_for_type_inference
(
dtype
)
h
=
helper
.
create_variable_for_type_inference
(
dtype
)
helper
.
append_op
(
type
=
'lstm_unit'
,
inputs
=
{
"X"
:
fc_out
,
"C_prev"
:
cell_t_prev
},
outputs
=
{
"C"
:
c
,
"H"
:
h
},
attrs
=
{
"forget_bias"
:
forget_bias
},
)
return
h
,
c
python/paddle/fluid/tests/unittests/test_layers.py
浏览文件 @
acee3dd3
...
...
@@ -2179,26 +2179,6 @@ class TestBook(LayerTest):
x
,
kernel_size
=
[
5
,
3
],
stride
=
[
1
,
2
],
padding
=
(
2
,
1
)
)
def
make_lstm_unit
(
self
):
with
program_guard
(
fluid
.
default_main_program
(),
fluid
.
default_startup_program
()
):
x_t_data
=
self
.
_get_data
(
name
=
'x_t_data'
,
shape
=
[
10
,
10
],
dtype
=
'float32'
)
x_t
=
layers
.
fc
(
input
=
x_t_data
,
size
=
10
)
prev_hidden_data
=
self
.
_get_data
(
name
=
'prev_hidden_data'
,
shape
=
[
10
,
30
],
dtype
=
'float32'
)
prev_hidden
=
layers
.
fc
(
input
=
prev_hidden_data
,
size
=
30
)
prev_cell_data
=
self
.
_get_data
(
name
=
'prev_cell'
,
shape
=
[
10
,
30
],
dtype
=
'float32'
)
prev_cell
=
layers
.
fc
(
input
=
prev_cell_data
,
size
=
30
)
return
layers
.
lstm_unit
(
x_t
=
x_t
,
hidden_t_prev
=
prev_hidden
,
cell_t_prev
=
prev_cell
)
def
make_softmax
(
self
):
with
program_guard
(
fluid
.
default_main_program
(),
fluid
.
default_startup_program
()
...
...
python/paddle/fluid/tests/unittests/test_lstm_unit_op.py
浏览文件 @
acee3dd3
...
...
@@ -17,10 +17,6 @@ import unittest
import
numpy
as
np
from
op_test
import
OpTest
from
paddle
import
fluid
from
paddle.fluid.framework
import
Program
,
program_guard
from
paddle.fluid.layers
import
lstm_unit
def
sigmoid_np
(
x
):
return
1.0
/
(
1.0
+
np
.
exp
(
-
x
))
...
...
@@ -30,79 +26,6 @@ def tanh_np(x):
return
2
*
sigmoid_np
(
2.0
*
x
)
-
1.0
class
LstmUnitTestError
(
unittest
.
TestCase
):
def
test_errors
(
self
):
with
program_guard
(
Program
(),
Program
()):
batch_size
,
dict_dim
,
emb_dim
,
hidden_dim
=
32
,
128
,
64
,
512
data
=
fluid
.
data
(
name
=
'step_data'
,
shape
=
[
batch_size
],
dtype
=
'int64'
)
inputs
=
fluid
.
embedding
(
input
=
data
,
size
=
[
dict_dim
,
emb_dim
])
pre_hidden
=
fluid
.
data
(
name
=
'pre_hidden'
,
shape
=
[
batch_size
,
hidden_dim
],
dtype
=
'float32'
,
)
pre_cell
=
fluid
.
data
(
name
=
'pre_cell'
,
shape
=
[
batch_size
,
hidden_dim
],
dtype
=
'float32'
)
np_input
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
batch_size
,
emb_dim
)
).
astype
(
'float64'
)
np_pre_hidden
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
batch_size
,
hidden_dim
)
).
astype
(
'float64'
)
np_pre_cell
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
batch_size
,
hidden_dim
)
).
astype
(
'float64'
)
def
test_input_Variable
():
lstm_unit
(
np_input
,
pre_hidden
,
pre_cell
)
self
.
assertRaises
(
TypeError
,
test_input_Variable
)
def
test_pre_hidden_Variable
():
lstm_unit
(
inputs
,
np_pre_hidden
,
pre_cell
)
self
.
assertRaises
(
TypeError
,
test_pre_hidden_Variable
)
def
test_pre_cell_Variable
():
lstm_unit
(
inputs
,
pre_hidden
,
np_pre_cell
)
self
.
assertRaises
(
TypeError
,
test_pre_cell_Variable
)
def
test_input_type
():
error_input
=
fluid
.
data
(
name
=
'error_input'
,
shape
=
[
batch_size
,
emb_dim
],
dtype
=
'int32'
,
)
lstm_unit
(
error_input
,
pre_hidden
,
pre_cell
)
self
.
assertRaises
(
TypeError
,
test_input_type
)
def
test_pre_hidden_type
():
error_pre_hidden
=
fluid
.
data
(
name
=
'error_pre_hidden'
,
shape
=
[
batch_size
,
hidden_dim
],
dtype
=
'int32'
,
)
lstm_unit
(
inputs
,
error_pre_hidden
,
pre_cell
)
self
.
assertRaises
(
TypeError
,
test_pre_hidden_type
)
def
test_pre_cell_type
():
error_pre_cell
=
fluid
.
data
(
name
=
'error_pre_cell'
,
shape
=
[
batch_size
,
hidden_dim
],
dtype
=
'int32'
,
)
lstm_unit
(
inputs
,
pre_hidden
,
error_pre_cell
)
self
.
assertRaises
(
TypeError
,
test_pre_cell_type
)
class
LstmUnitTest
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"lstm_unit"
...
...
python/paddle/fluid/tests/unittests/test_rnn_cell_api.py
浏览文件 @
acee3dd3
...
...
@@ -16,296 +16,20 @@ import unittest
import
numpy
import
numpy
as
np
from
rnn.rnn_numpy
import
LSTMCell
from
rnn.rnn_numpy
import
rnn
as
numpy_rnn
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
paddle.fluid.layers
as
layers
import
paddle.fluid.layers.utils
as
utils
from
paddle.fluid
import
contrib
,
framework
from
paddle.fluid.contrib.layers
import
basic_lstm
from
paddle.fluid
import
framework
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.framework
import
Program
,
program_guard
from
paddle.fluid.layers
import
rnn
as
dynamic_rnn
from
paddle.fluid.layers.rnn
import
GRUCell
,
LSTMCell
,
RNNCell
from
paddle.nn.layer.rnn
import
rnn
as
dynamic_rnn
class
TestLSTMCellError
(
unittest
.
TestCase
):
def
test_errors
(
self
):
with
program_guard
(
Program
(),
Program
()):
batch_size
,
input_size
,
hidden_size
=
4
,
16
,
16
inputs
=
fluid
.
data
(
name
=
'inputs'
,
shape
=
[
None
,
input_size
],
dtype
=
'float32'
)
pre_hidden
=
fluid
.
data
(
name
=
'pre_hidden'
,
shape
=
[
None
,
hidden_size
],
dtype
=
'float32'
)
pre_cell
=
fluid
.
data
(
name
=
'pre_cell'
,
shape
=
[
None
,
hidden_size
],
dtype
=
'float32'
)
cell
=
LSTMCell
(
hidden_size
)
def
test_input_Variable
():
np_input
=
np
.
random
.
random
((
batch_size
,
input_size
)).
astype
(
"float32"
)
cell
(
np_input
,
[
pre_hidden
,
pre_cell
])
self
.
assertRaises
(
TypeError
,
test_input_Variable
)
def
test_pre_hidden_Variable
():
np_pre_hidden
=
np
.
random
.
random
(
(
batch_size
,
hidden_size
)
).
astype
(
"float32"
)
cell
(
inputs
,
[
np_pre_hidden
,
pre_cell
])
self
.
assertRaises
(
TypeError
,
test_pre_hidden_Variable
)
def
test_pre_cell_Variable
():
np_pre_cell
=
np
.
random
.
random
((
batch_size
,
input_size
)).
astype
(
"float32"
)
cell
(
inputs
,
[
pre_hidden
,
np_pre_cell
])
self
.
assertRaises
(
TypeError
,
test_pre_cell_Variable
)
def
test_input_type
():
error_inputs
=
fluid
.
data
(
name
=
'error_inputs'
,
shape
=
[
None
,
input_size
],
dtype
=
'int32'
)
cell
(
error_inputs
,
[
pre_hidden
,
pre_cell
])
self
.
assertRaises
(
TypeError
,
test_input_type
)
def
test_pre_hidden_type
():
error_pre_hidden
=
fluid
.
data
(
name
=
'error_pre_hidden'
,
shape
=
[
None
,
hidden_size
],
dtype
=
'int32'
,
)
cell
(
inputs
,
[
error_pre_hidden
,
pre_cell
])
self
.
assertRaises
(
TypeError
,
test_pre_hidden_type
)
def
test_pre_cell_type
():
error_pre_cell
=
fluid
.
data
(
name
=
'error_pre_cell'
,
shape
=
[
None
,
hidden_size
],
dtype
=
'int32'
,
)
cell
(
inputs
,
[
pre_hidden
,
error_pre_cell
])
self
.
assertRaises
(
TypeError
,
test_pre_cell_type
)
def
test_dtype
():
# the input type must be Variable
LSTMCell
(
hidden_size
,
dtype
=
"int32"
)
self
.
assertRaises
(
TypeError
,
test_dtype
)
class
TestLSTMCell
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
batch_size
=
4
self
.
input_size
=
16
self
.
hidden_size
=
16
def
test_run
(
self
):
inputs
=
fluid
.
data
(
name
=
'inputs'
,
shape
=
[
None
,
self
.
input_size
],
dtype
=
'float32'
)
pre_hidden
=
fluid
.
data
(
name
=
'pre_hidden'
,
shape
=
[
None
,
self
.
hidden_size
],
dtype
=
'float32'
)
pre_cell
=
fluid
.
data
(
name
=
'pre_cell'
,
shape
=
[
None
,
self
.
hidden_size
],
dtype
=
'float32'
)
cell
=
LSTMCell
(
self
.
hidden_size
)
lstm_hidden_new
,
lstm_states_new
=
cell
(
inputs
,
[
pre_hidden
,
pre_cell
])
lstm_unit
=
contrib
.
layers
.
rnn_impl
.
BasicLSTMUnit
(
"basicLSTM"
,
self
.
hidden_size
,
None
,
None
,
None
,
None
,
1.0
,
"float32"
,
)
lstm_hidden
,
lstm_cell
=
lstm_unit
(
inputs
,
pre_hidden
,
pre_cell
)
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
else
:
place
=
core
.
CPUPlace
()
exe
=
Executor
(
place
)
exe
.
run
(
framework
.
default_startup_program
())
inputs_np
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
self
.
batch_size
,
self
.
input_size
)
).
astype
(
'float32'
)
pre_hidden_np
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
self
.
batch_size
,
self
.
hidden_size
)
).
astype
(
'float32'
)
pre_cell_np
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
self
.
batch_size
,
self
.
hidden_size
)
).
astype
(
'float32'
)
param_names
=
[
[
"LSTMCell/BasicLSTMUnit_0.w_0"
,
"basicLSTM/BasicLSTMUnit_0.w_0"
],
[
"LSTMCell/BasicLSTMUnit_0.b_0"
,
"basicLSTM/BasicLSTMUnit_0.b_0"
],
]
for
names
in
param_names
:
param
=
np
.
array
(
fluid
.
global_scope
().
find_var
(
names
[
0
]).
get_tensor
()
)
param
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
size
=
param
.
shape
).
astype
(
'float32'
)
fluid
.
global_scope
().
find_var
(
names
[
0
]).
get_tensor
().
set
(
param
,
place
)
fluid
.
global_scope
().
find_var
(
names
[
1
]).
get_tensor
().
set
(
param
,
place
)
out
=
exe
.
run
(
feed
=
{
'inputs'
:
inputs_np
,
'pre_hidden'
:
pre_hidden_np
,
'pre_cell'
:
pre_cell_np
,
},
fetch_list
=
[
lstm_hidden_new
,
lstm_hidden
],
)
np
.
testing
.
assert_allclose
(
out
[
0
],
out
[
1
],
rtol
=
0.0001
,
atol
=
0
)
class
TestGRUCellError
(
unittest
.
TestCase
):
def
test_errors
(
self
):
with
program_guard
(
Program
(),
Program
()):
batch_size
,
input_size
,
hidden_size
=
4
,
16
,
16
inputs
=
fluid
.
data
(
name
=
'inputs'
,
shape
=
[
None
,
input_size
],
dtype
=
'float32'
)
pre_hidden
=
layers
.
data
(
name
=
'pre_hidden'
,
shape
=
[
None
,
hidden_size
],
append_batch_size
=
False
,
dtype
=
'float32'
,
)
cell
=
GRUCell
(
hidden_size
)
def
test_input_Variable
():
np_input
=
np
.
random
.
random
((
batch_size
,
input_size
)).
astype
(
"float32"
)
cell
(
np_input
,
pre_hidden
)
self
.
assertRaises
(
TypeError
,
test_input_Variable
)
def
test_pre_hidden_Variable
():
np_pre_hidden
=
np
.
random
.
random
(
(
batch_size
,
hidden_size
)
).
astype
(
"float32"
)
cell
(
inputs
,
np_pre_hidden
)
self
.
assertRaises
(
TypeError
,
test_pre_hidden_Variable
)
def
test_input_type
():
error_inputs
=
fluid
.
data
(
name
=
'error_inputs'
,
shape
=
[
None
,
input_size
],
dtype
=
'int32'
)
cell
(
error_inputs
,
pre_hidden
)
self
.
assertRaises
(
TypeError
,
test_input_type
)
def
test_pre_hidden_type
():
error_pre_hidden
=
fluid
.
data
(
name
=
'error_pre_hidden'
,
shape
=
[
None
,
hidden_size
],
dtype
=
'int32'
,
)
cell
(
inputs
,
error_pre_hidden
)
self
.
assertRaises
(
TypeError
,
test_pre_hidden_type
)
def
test_dtype
():
# the input type must be Variable
GRUCell
(
hidden_size
,
dtype
=
"int32"
)
self
.
assertRaises
(
TypeError
,
test_dtype
)
class
TestGRUCell
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
batch_size
=
4
self
.
input_size
=
16
self
.
hidden_size
=
16
def
test_run
(
self
):
inputs
=
fluid
.
data
(
name
=
'inputs'
,
shape
=
[
None
,
self
.
input_size
],
dtype
=
'float32'
)
pre_hidden
=
layers
.
data
(
name
=
'pre_hidden'
,
shape
=
[
None
,
self
.
hidden_size
],
append_batch_size
=
False
,
dtype
=
'float32'
,
)
cell
=
GRUCell
(
self
.
hidden_size
)
gru_hidden_new
,
_
=
cell
(
inputs
,
pre_hidden
)
gru_unit
=
contrib
.
layers
.
rnn_impl
.
BasicGRUUnit
(
"basicGRU"
,
self
.
hidden_size
,
None
,
None
,
None
,
None
,
"float32"
)
gru_hidden
=
gru_unit
(
inputs
,
pre_hidden
)
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
else
:
place
=
core
.
CPUPlace
()
exe
=
Executor
(
place
)
exe
.
run
(
framework
.
default_startup_program
())
inputs_np
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
self
.
batch_size
,
self
.
input_size
)
).
astype
(
'float32'
)
pre_hidden_np
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
self
.
batch_size
,
self
.
hidden_size
)
).
astype
(
'float32'
)
param_names
=
[
[
"GRUCell/BasicGRUUnit_0.w_0"
,
"basicGRU/BasicGRUUnit_0.w_0"
],
[
"GRUCell/BasicGRUUnit_0.w_1"
,
"basicGRU/BasicGRUUnit_0.w_1"
],
[
"GRUCell/BasicGRUUnit_0.b_0"
,
"basicGRU/BasicGRUUnit_0.b_0"
],
[
"GRUCell/BasicGRUUnit_0.b_1"
,
"basicGRU/BasicGRUUnit_0.b_1"
],
]
for
names
in
param_names
:
param
=
np
.
array
(
fluid
.
global_scope
().
find_var
(
names
[
0
]).
get_tensor
()
)
param
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
size
=
param
.
shape
).
astype
(
'float32'
)
fluid
.
global_scope
().
find_var
(
names
[
0
]).
get_tensor
().
set
(
param
,
place
)
fluid
.
global_scope
().
find_var
(
names
[
1
]).
get_tensor
().
set
(
param
,
place
)
out
=
exe
.
run
(
feed
=
{
'inputs'
:
inputs_np
,
'pre_hidden'
:
pre_hidden_np
},
fetch_list
=
[
gru_hidden_new
,
gru_hidden
],
)
np
.
testing
.
assert_allclose
(
out
[
0
],
out
[
1
],
rtol
=
0.0001
,
atol
=
0
)
paddle
.
enable_static
()
class
TestRnnError
(
unittest
.
TestCase
):
...
...
@@ -336,7 +60,9 @@ class TestRnnError(unittest.TestCase):
inputs_dynamic_rnn
=
paddle
.
transpose
(
inputs_basic_lstm
,
perm
=
[
1
,
0
,
2
]
)
cell
=
LSTMCell
(
hidden_size
,
name
=
"LSTMCell_for_rnn"
)
cell
=
paddle
.
nn
.
LSTMCell
(
input_size
,
hidden_size
,
name
=
"LSTMCell_for_rnn"
)
np_inputs_dynamic_rnn
=
np
.
random
.
random
(
(
seq_len
,
batch_size
,
input_size
)
).
astype
(
"float32"
)
...
...
@@ -362,7 +88,9 @@ class TestRnnError(unittest.TestCase):
self
.
assertRaises
(
TypeError
,
test_input_list
)
def
test_initial_states_type
():
cell
=
GRUCell
(
hidden_size
,
name
=
"GRUCell_for_rnn"
)
cell
=
paddle
.
nn
.
GRUCell
(
input_size
,
hidden_size
,
name
=
"GRUCell_for_rnn"
)
error_initial_states
=
np
.
random
.
random
(
(
batch_size
,
hidden_size
)
).
astype
(
"float32"
)
...
...
@@ -417,36 +145,9 @@ class TestRnn(unittest.TestCase):
self
.
seq_len
=
4
def
test_run
(
self
):
inputs_basic_lstm
=
fluid
.
data
(
name
=
'inputs_basic_lstm'
,
shape
=
[
None
,
None
,
self
.
input_size
],
dtype
=
'float32'
,
)
sequence_length
=
fluid
.
data
(
name
=
"sequence_length"
,
shape
=
[
None
],
dtype
=
'int64'
)
inputs_dynamic_rnn
=
paddle
.
transpose
(
inputs_basic_lstm
,
perm
=
[
1
,
0
,
2
])
cell
=
LSTMCell
(
self
.
hidden_size
,
name
=
"LSTMCell_for_rnn"
)
output
,
final_state
=
dynamic_rnn
(
cell
=
cell
,
inputs
=
inputs_dynamic_rnn
,
sequence_length
=
sequence_length
,
is_reverse
=
False
,
)
output_new
=
paddle
.
transpose
(
output
,
perm
=
[
1
,
0
,
2
])
rnn_out
,
last_hidden
,
last_cell
=
basic_lstm
(
inputs_basic_lstm
,
None
,
None
,
self
.
hidden_size
,
num_layers
=
1
,
batch_first
=
False
,
bidirectional
=
False
,
sequence_length
=
sequence_length
,
forget_bias
=
1.0
,
)
numpy_cell
=
LSTMCell
(
self
.
input_size
,
self
.
hidden_size
)
dynamic_cell
=
paddle
.
nn
.
LSTMCell
(
self
.
input_size
,
self
.
hidden_size
)
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
...
...
@@ -455,60 +156,68 @@ class TestRnn(unittest.TestCase):
exe
=
Executor
(
place
)
exe
.
run
(
framework
.
default_startup_program
())
inputs_basic_lstm_np
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
self
.
seq_len
,
self
.
batch_size
,
self
.
input_size
)
).
astype
(
'float32'
)
state
=
numpy_cell
.
parameters
for
k
,
v
in
dynamic_cell
.
named_parameters
():
param
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
size
=
state
[
k
].
shape
).
astype
(
'float64'
)
setattr
(
numpy_cell
,
k
,
param
)
fluid
.
global_scope
().
find_var
(
v
.
name
).
get_tensor
().
set
(
param
,
place
)
sequence_length
=
fluid
.
data
(
name
=
"sequence_length"
,
shape
=
[
None
],
dtype
=
'int64'
)
inputs_rnn
=
fluid
.
data
(
name
=
'inputs_rnn'
,
shape
=
[
None
,
None
,
self
.
input_size
],
dtype
=
'float64'
,
)
pre_hidden
=
fluid
.
data
(
name
=
'pre_hidden'
,
shape
=
[
None
,
self
.
hidden_size
],
dtype
=
'float64'
)
pre_cell
=
fluid
.
data
(
name
=
'pre_cell'
,
shape
=
[
None
,
self
.
hidden_size
],
dtype
=
'float64'
)
dynamic_output
,
dynamic_final_state
=
dynamic_rnn
(
cell
=
dynamic_cell
,
inputs
=
inputs_rnn
,
sequence_length
=
sequence_length
,
initial_states
=
(
pre_hidden
,
pre_cell
),
is_reverse
=
False
,
)
inputs_rnn_np
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
self
.
batch_size
,
self
.
seq_len
,
self
.
input_size
)
).
astype
(
'float64'
)
sequence_length_np
=
(
np
.
ones
(
self
.
batch_size
,
dtype
=
'int64'
)
*
self
.
seq_len
)
inputs_np
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
self
.
batch_size
,
self
.
input_size
)
).
astype
(
'float32'
)
pre_hidden_np
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
self
.
batch_size
,
self
.
hidden_size
)
).
astype
(
'float
32
'
)
).
astype
(
'float
64
'
)
pre_cell_np
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
self
.
batch_size
,
self
.
hidden_size
)
).
astype
(
'float32'
)
param_names
=
[
[
"LSTMCell_for_rnn/BasicLSTMUnit_0.w_0"
,
"basic_lstm_layers_0/BasicLSTMUnit_0.w_0"
,
],
[
"LSTMCell_for_rnn/BasicLSTMUnit_0.b_0"
,
"basic_lstm_layers_0/BasicLSTMUnit_0.b_0"
,
],
]
for
names
in
param_names
:
param
=
np
.
array
(
fluid
.
global_scope
().
find_var
(
names
[
0
]).
get_tensor
()
)
param
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
size
=
param
.
shape
).
astype
(
'float32'
)
fluid
.
global_scope
().
find_var
(
names
[
0
]).
get_tensor
().
set
(
param
,
place
)
fluid
.
global_scope
().
find_var
(
names
[
1
]).
get_tensor
().
set
(
param
,
place
)
).
astype
(
'float64'
)
out
=
exe
.
run
(
o1
,
_
=
numpy_rnn
(
cell
=
numpy_cell
,
inputs
=
inputs_rnn_np
,
initial_states
=
(
pre_hidden_np
,
pre_cell_np
),
sequence_length
=
sequence_length_np
,
is_reverse
=
False
,
)
o2
=
exe
.
run
(
feed
=
{
'inputs_
basic_lstm'
:
inputs_basic_lstm
_np
,
'inputs_
rnn'
:
inputs_rnn
_np
,
'sequence_length'
:
sequence_length_np
,
'inputs'
:
inputs_np
,
'pre_hidden'
:
pre_hidden_np
,
'pre_cell'
:
pre_cell_np
,
},
fetch_list
=
[
output_new
,
rnn_out
],
)
np
.
testing
.
assert_allclose
(
out
[
0
],
out
[
1
],
rtol
=
0.0001
)
fetch_list
=
[
dynamic_output
],
)[
0
]
np
.
testing
.
assert_allclose
(
o1
,
o2
,
rtol
=
0.001
)
class
TestRnnUtil
(
unittest
.
TestCase
):
...
...
@@ -528,218 +237,5 @@ class TestRnnUtil(unittest.TestCase):
pass
class
EncoderCell
(
RNNCell
):
"""Encoder Cell"""
def
__init__
(
self
,
num_layers
,
hidden_size
,
dropout_prob
=
0.0
,
init_scale
=
0.1
,
):
self
.
num_layers
=
num_layers
self
.
hidden_size
=
hidden_size
self
.
dropout_prob
=
dropout_prob
self
.
lstm_cells
=
[]
for
i
in
range
(
num_layers
):
self
.
lstm_cells
.
append
(
LSTMCell
(
hidden_size
))
def
call
(
self
,
step_input
,
states
):
new_states
=
[]
for
i
in
range
(
self
.
num_layers
):
out
,
new_state
=
self
.
lstm_cells
[
i
](
step_input
,
states
[
i
])
step_input
=
(
layers
.
dropout
(
out
,
self
.
dropout_prob
,
)
if
self
.
dropout_prob
else
out
)
new_states
.
append
(
new_state
)
return
step_input
,
new_states
@
property
def
state_shape
(
self
):
return
[
cell
.
state_shape
for
cell
in
self
.
lstm_cells
]
class
DecoderCell
(
RNNCell
):
"""Decoder Cell"""
def
__init__
(
self
,
num_layers
,
hidden_size
,
dropout_prob
=
0.0
):
self
.
num_layers
=
num_layers
self
.
hidden_size
=
hidden_size
self
.
dropout_prob
=
dropout_prob
self
.
lstm_cells
=
[]
for
i
in
range
(
num_layers
):
self
.
lstm_cells
.
append
(
LSTMCell
(
hidden_size
))
def
call
(
self
,
step_input
,
states
):
new_lstm_states
=
[]
for
i
in
range
(
self
.
num_layers
):
out
,
new_lstm_state
=
self
.
lstm_cells
[
i
](
step_input
,
states
[
i
])
step_input
=
(
layers
.
dropout
(
out
,
self
.
dropout_prob
,
)
if
self
.
dropout_prob
else
out
)
new_lstm_states
.
append
(
new_lstm_state
)
return
step_input
,
new_lstm_states
def
def_seq2seq_model
(
num_layers
,
hidden_size
,
dropout_prob
,
src_vocab_size
,
trg_vocab_size
):
"vanilla seq2seq model"
# data
source
=
fluid
.
data
(
name
=
"src"
,
shape
=
[
None
,
None
],
dtype
=
"int64"
)
source_length
=
fluid
.
data
(
name
=
"src_sequence_length"
,
shape
=
[
None
],
dtype
=
"int64"
)
target
=
fluid
.
data
(
name
=
"trg"
,
shape
=
[
None
,
None
],
dtype
=
"int64"
)
target_length
=
fluid
.
data
(
name
=
"trg_sequence_length"
,
shape
=
[
None
],
dtype
=
"int64"
)
label
=
fluid
.
data
(
name
=
"label"
,
shape
=
[
None
,
None
,
1
],
dtype
=
"int64"
)
# embedding
src_emb
=
fluid
.
embedding
(
source
,
(
src_vocab_size
,
hidden_size
))
tar_emb
=
fluid
.
embedding
(
target
,
(
src_vocab_size
,
hidden_size
))
# encoder
enc_cell
=
EncoderCell
(
num_layers
,
hidden_size
,
dropout_prob
)
enc_output
,
enc_final_state
=
dynamic_rnn
(
cell
=
enc_cell
,
inputs
=
src_emb
,
sequence_length
=
source_length
)
# decoder
dec_cell
=
DecoderCell
(
num_layers
,
hidden_size
,
dropout_prob
)
dec_output
,
dec_final_state
=
dynamic_rnn
(
cell
=
dec_cell
,
inputs
=
tar_emb
,
initial_states
=
enc_final_state
)
logits
=
layers
.
fc
(
dec_output
,
size
=
trg_vocab_size
,
num_flatten_dims
=
len
(
dec_output
.
shape
)
-
1
,
bias_attr
=
False
,
)
# loss
loss
=
paddle
.
nn
.
functional
.
softmax_with_cross_entropy
(
logits
=
logits
,
label
=
label
,
soft_label
=
False
)
loss
=
layers
.
unsqueeze
(
loss
,
axes
=
[
2
])
max_tar_seq_len
=
paddle
.
shape
(
target
)[
1
]
tar_mask
=
layers
.
sequence_mask
(
target_length
,
maxlen
=
max_tar_seq_len
,
dtype
=
"float32"
)
loss
=
loss
*
tar_mask
loss
=
paddle
.
mean
(
loss
,
axis
=
[
0
])
loss
=
paddle
.
sum
(
loss
)
# optimizer
optimizer
=
fluid
.
optimizer
.
Adam
(
0.001
)
optimizer
.
minimize
(
loss
)
return
loss
class
TestSeq2SeqModel
(
unittest
.
TestCase
):
"""
Test cases to confirm seq2seq api training correctly.
"""
def
setUp
(
self
):
np
.
random
.
seed
(
123
)
self
.
model_hparams
=
{
"num_layers"
:
2
,
"hidden_size"
:
128
,
"dropout_prob"
:
0.1
,
"src_vocab_size"
:
100
,
"trg_vocab_size"
:
100
,
}
self
.
iter_num
=
iter_num
=
2
self
.
batch_size
=
batch_size
=
4
src_seq_len
=
10
trg_seq_len
=
12
self
.
data
=
{
"src"
:
np
.
random
.
randint
(
2
,
self
.
model_hparams
[
"src_vocab_size"
],
(
iter_num
*
batch_size
,
src_seq_len
),
).
astype
(
"int64"
),
"src_sequence_length"
:
np
.
random
.
randint
(
1
,
src_seq_len
,
(
iter_num
*
batch_size
,)
).
astype
(
"int64"
),
"trg"
:
np
.
random
.
randint
(
2
,
self
.
model_hparams
[
"src_vocab_size"
],
(
iter_num
*
batch_size
,
trg_seq_len
),
).
astype
(
"int64"
),
"trg_sequence_length"
:
np
.
random
.
randint
(
1
,
trg_seq_len
,
(
iter_num
*
batch_size
,)
).
astype
(
"int64"
),
"label"
:
np
.
random
.
randint
(
2
,
self
.
model_hparams
[
"src_vocab_size"
],
(
iter_num
*
batch_size
,
trg_seq_len
,
1
),
).
astype
(
"int64"
),
}
place
=
(
core
.
CUDAPlace
(
0
)
if
core
.
is_compiled_with_cuda
()
else
core
.
CPUPlace
()
)
self
.
exe
=
Executor
(
place
)
def
test_seq2seq_model
(
self
):
main_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main_program
,
startup_program
):
cost
=
def_seq2seq_model
(
**
self
.
model_hparams
)
self
.
exe
.
run
(
startup_program
)
for
iter_idx
in
range
(
self
.
iter_num
):
cost_val
=
self
.
exe
.
run
(
feed
=
{
"src"
:
self
.
data
[
"src"
][
iter_idx
*
self
.
batch_size
:
(
iter_idx
+
1
)
*
self
.
batch_size
,
:,
],
"src_sequence_length"
:
self
.
data
[
"src_sequence_length"
][
iter_idx
*
self
.
batch_size
:
(
iter_idx
+
1
)
*
self
.
batch_size
],
"trg"
:
self
.
data
[
"trg"
][
iter_idx
*
self
.
batch_size
:
(
iter_idx
+
1
)
*
self
.
batch_size
,
:,
],
"trg_sequence_length"
:
self
.
data
[
"trg_sequence_length"
][
iter_idx
*
self
.
batch_size
:
(
iter_idx
+
1
)
*
self
.
batch_size
],
"label"
:
self
.
data
[
"label"
][
iter_idx
*
self
.
batch_size
:
(
iter_idx
+
1
)
*
self
.
batch_size
],
},
fetch_list
=
[
cost
],
)[
0
]
print
(
"iter_idx: %d, cost: %f"
%
(
iter_idx
,
cost_val
))
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
浏览文件 @
acee3dd3
...
...
@@ -19,12 +19,10 @@ import numpy as np
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
paddle.fluid.layers
as
layers
import
paddle.nn
as
nn
from
paddle
import
Model
,
set_device
from
paddle.fluid.dygraph
import
Layer
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.framework
import
_test_eager_guard
from
paddle.nn
import
BeamSearchDecoder
,
dynamic_decode
from
paddle.static
import
InputSpec
as
Input
...
...
@@ -32,257 +30,6 @@ from paddle.static import InputSpec as Input
paddle
.
enable_static
()
class
EncoderCell
(
layers
.
RNNCell
):
def
__init__
(
self
,
num_layers
,
hidden_size
,
dropout_prob
=
0.0
):
self
.
num_layers
=
num_layers
self
.
hidden_size
=
hidden_size
self
.
dropout_prob
=
dropout_prob
self
.
lstm_cells
=
[
layers
.
LSTMCell
(
hidden_size
)
for
i
in
range
(
num_layers
)
]
def
call
(
self
,
step_input
,
states
):
new_states
=
[]
for
i
in
range
(
self
.
num_layers
):
out
,
new_state
=
self
.
lstm_cells
[
i
](
step_input
,
states
[
i
])
step_input
=
(
layers
.
dropout
(
out
,
self
.
dropout_prob
)
if
self
.
dropout_prob
>
0
else
out
)
new_states
.
append
(
new_state
)
return
step_input
,
new_states
@
property
def
state_shape
(
self
):
return
[
cell
.
state_shape
for
cell
in
self
.
lstm_cells
]
class
DecoderCell
(
layers
.
RNNCell
):
def
__init__
(
self
,
num_layers
,
hidden_size
,
dropout_prob
=
0.0
):
self
.
num_layers
=
num_layers
self
.
hidden_size
=
hidden_size
self
.
dropout_prob
=
dropout_prob
self
.
lstm_cells
=
[
layers
.
LSTMCell
(
hidden_size
)
for
i
in
range
(
num_layers
)
]
def
attention
(
self
,
hidden
,
encoder_output
,
encoder_padding_mask
):
query
=
layers
.
fc
(
hidden
,
size
=
encoder_output
.
shape
[
-
1
],
bias_attr
=
False
)
attn_scores
=
paddle
.
matmul
(
layers
.
unsqueeze
(
query
,
[
1
]),
encoder_output
,
transpose_y
=
True
)
if
encoder_padding_mask
is
not
None
:
attn_scores
=
paddle
.
add
(
attn_scores
,
encoder_padding_mask
)
attn_scores
=
paddle
.
nn
.
functional
.
softmax
(
attn_scores
)
attn_out
=
paddle
.
squeeze
(
paddle
.
matmul
(
attn_scores
,
encoder_output
),
[
1
]
)
attn_out
=
layers
.
concat
([
attn_out
,
hidden
],
1
)
attn_out
=
layers
.
fc
(
attn_out
,
size
=
self
.
hidden_size
,
bias_attr
=
False
)
return
attn_out
def
call
(
self
,
step_input
,
states
,
encoder_output
,
encoder_padding_mask
=
None
):
lstm_states
,
input_feed
=
states
new_lstm_states
=
[]
step_input
=
layers
.
concat
([
step_input
,
input_feed
],
1
)
for
i
in
range
(
self
.
num_layers
):
out
,
new_lstm_state
=
self
.
lstm_cells
[
i
](
step_input
,
lstm_states
[
i
])
step_input
=
(
layers
.
dropout
(
out
,
self
.
dropout_prob
)
if
self
.
dropout_prob
>
0
else
out
)
new_lstm_states
.
append
(
new_lstm_state
)
out
=
self
.
attention
(
step_input
,
encoder_output
,
encoder_padding_mask
)
return
out
,
[
new_lstm_states
,
out
]
class
Encoder
:
def
__init__
(
self
,
num_layers
,
hidden_size
,
dropout_prob
=
0.0
):
self
.
encoder_cell
=
EncoderCell
(
num_layers
,
hidden_size
,
dropout_prob
)
def
__call__
(
self
,
src_emb
,
src_sequence_length
):
encoder_output
,
encoder_final_state
=
layers
.
rnn
(
cell
=
self
.
encoder_cell
,
inputs
=
src_emb
,
sequence_length
=
src_sequence_length
,
is_reverse
=
False
,
)
return
encoder_output
,
encoder_final_state
class
Decoder
:
def
__init__
(
self
,
num_layers
,
hidden_size
,
dropout_prob
,
decoding_strategy
=
"infer_sample"
,
max_decoding_length
=
20
,
):
self
.
decoder_cell
=
DecoderCell
(
num_layers
,
hidden_size
,
dropout_prob
)
self
.
decoding_strategy
=
decoding_strategy
self
.
max_decoding_length
=
(
None
if
(
self
.
decoding_strategy
==
"train_greedy"
)
else
max_decoding_length
)
def
__call__
(
self
,
decoder_initial_states
,
encoder_output
,
encoder_padding_mask
,
**
kwargs
):
output_layer
=
kwargs
.
pop
(
"output_layer"
,
None
)
beam_size
=
kwargs
.
get
(
"beam_size"
,
4
)
encoder_output
=
BeamSearchDecoder
.
tile_beam_merge_with_batch
(
encoder_output
,
beam_size
)
encoder_padding_mask
=
BeamSearchDecoder
.
tile_beam_merge_with_batch
(
encoder_padding_mask
,
beam_size
)
decoder
=
BeamSearchDecoder
(
cell
=
self
.
decoder_cell
,
output_fn
=
output_layer
,
**
kwargs
)
(
decoder_output
,
decoder_final_state
,
dec_seq_lengths
,
)
=
layers
.
dynamic_decode
(
decoder
,
inits
=
decoder_initial_states
,
max_step_num
=
self
.
max_decoding_length
,
encoder_output
=
encoder_output
,
encoder_padding_mask
=
encoder_padding_mask
,
impute_finished
=
False
# for test coverage
if
self
.
decoding_strategy
==
"beam_search"
else
True
,
is_test
=
True
if
self
.
decoding_strategy
==
"beam_search"
else
False
,
return_length
=
True
,
)
return
decoder_output
,
decoder_final_state
,
dec_seq_lengths
class
Seq2SeqModel
:
"""Seq2Seq model: RNN encoder-decoder with attention"""
def
__init__
(
self
,
num_layers
,
hidden_size
,
dropout_prob
,
src_vocab_size
,
trg_vocab_size
,
start_token
,
end_token
,
decoding_strategy
=
"infer_sample"
,
max_decoding_length
=
20
,
beam_size
=
4
,
):
self
.
start_token
,
self
.
end_token
=
start_token
,
end_token
self
.
max_decoding_length
,
self
.
beam_size
=
(
max_decoding_length
,
beam_size
,
)
self
.
src_embeder
=
paddle
.
nn
.
Embedding
(
src_vocab_size
,
hidden_size
,
weight_attr
=
fluid
.
ParamAttr
(
name
=
"source_embedding"
),
)
self
.
trg_embeder
=
paddle
.
nn
.
Embedding
(
trg_vocab_size
,
hidden_size
,
weight_attr
=
fluid
.
ParamAttr
(
name
=
"target_embedding"
),
)
self
.
encoder
=
Encoder
(
num_layers
,
hidden_size
,
dropout_prob
)
self
.
decoder
=
Decoder
(
num_layers
,
hidden_size
,
dropout_prob
,
decoding_strategy
,
max_decoding_length
,
)
self
.
output_layer
=
lambda
x
:
layers
.
fc
(
x
,
size
=
trg_vocab_size
,
num_flatten_dims
=
len
(
x
.
shape
)
-
1
,
param_attr
=
fluid
.
ParamAttr
(),
bias_attr
=
False
,
)
def
__call__
(
self
,
src
,
src_length
,
trg
=
None
,
trg_length
=
None
):
# encoder
encoder_output
,
encoder_final_state
=
self
.
encoder
(
self
.
src_embeder
(
src
),
src_length
)
decoder_initial_states
=
[
encoder_final_state
,
self
.
decoder
.
decoder_cell
.
get_initial_states
(
batch_ref
=
encoder_output
,
shape
=
[
encoder_output
.
shape
[
-
1
]]
),
]
src_mask
=
layers
.
sequence_mask
(
src_length
,
maxlen
=
paddle
.
shape
(
src
)[
1
],
dtype
=
"float32"
)
encoder_padding_mask
=
(
src_mask
-
1.0
)
*
1e9
encoder_padding_mask
=
layers
.
unsqueeze
(
encoder_padding_mask
,
[
1
])
# decoder
decoder_kwargs
=
(
{
"inputs"
:
self
.
trg_embeder
(
trg
),
"sequence_length"
:
trg_length
,
}
if
self
.
decoder
.
decoding_strategy
==
"train_greedy"
else
(
{
"embedding_fn"
:
self
.
trg_embeder
,
"beam_size"
:
self
.
beam_size
,
"start_token"
:
self
.
start_token
,
"end_token"
:
self
.
end_token
,
}
if
self
.
decoder
.
decoding_strategy
==
"beam_search"
else
{
"embedding_fn"
:
self
.
trg_embeder
,
"start_tokens"
:
layers
.
fill_constant_batch_size_like
(
input
=
encoder_output
,
shape
=
[
-
1
],
dtype
=
src
.
dtype
,
value
=
self
.
start_token
,
),
"end_token"
:
self
.
end_token
,
}
)
)
decoder_kwargs
[
"output_layer"
]
=
self
.
output_layer
(
decoder_output
,
decoder_final_state
,
dec_seq_lengths
)
=
self
.
decoder
(
decoder_initial_states
,
encoder_output
,
encoder_padding_mask
,
**
decoder_kwargs
)
if
self
.
decoder
.
decoding_strategy
==
"beam_search"
:
# for inference
return
decoder_output
logits
,
samples
,
sample_length
=
(
decoder_output
.
cell_outputs
,
decoder_output
.
sample_ids
,
dec_seq_lengths
,
)
probs
=
paddle
.
nn
.
functional
.
softmax
(
logits
)
return
probs
,
samples
,
sample_length
class
PolicyGradient
:
"""policy gradient"""
...
...
@@ -477,91 +224,6 @@ class SeqPGAgent:
return
results
class
TestDynamicDecode
(
unittest
.
TestCase
):
def
setUp
(
self
):
np
.
random
.
seed
(
123
)
self
.
model_hparams
=
{
"num_layers"
:
2
,
"hidden_size"
:
32
,
"dropout_prob"
:
0.1
,
"src_vocab_size"
:
100
,
"trg_vocab_size"
:
100
,
"start_token"
:
0
,
"end_token"
:
1
,
"decoding_strategy"
:
"infer_greedy"
,
"max_decoding_length"
:
10
,
}
self
.
iter_num
=
iter_num
=
2
self
.
batch_size
=
batch_size
=
4
src_seq_len
=
10
trg_seq_len
=
12
self
.
data
=
{
"src"
:
np
.
random
.
randint
(
2
,
self
.
model_hparams
[
"src_vocab_size"
],
(
iter_num
*
batch_size
,
src_seq_len
),
).
astype
(
"int64"
),
"src_sequence_length"
:
np
.
random
.
randint
(
1
,
src_seq_len
,
(
iter_num
*
batch_size
,)
).
astype
(
"int64"
),
"trg"
:
np
.
random
.
randint
(
2
,
self
.
model_hparams
[
"src_vocab_size"
],
(
iter_num
*
batch_size
,
trg_seq_len
),
).
astype
(
"int64"
),
"trg_sequence_length"
:
np
.
random
.
randint
(
1
,
trg_seq_len
,
(
iter_num
*
batch_size
,)
).
astype
(
"int64"
),
"label"
:
np
.
random
.
randint
(
2
,
self
.
model_hparams
[
"src_vocab_size"
],
(
iter_num
*
batch_size
,
trg_seq_len
,
1
),
).
astype
(
"int64"
),
}
place
=
(
core
.
CUDAPlace
(
0
)
if
core
.
is_compiled_with_cuda
()
else
core
.
CPUPlace
()
)
self
.
exe
=
Executor
(
place
)
def
test_beam_search_infer
(
self
):
paddle
.
set_default_dtype
(
"float32"
)
paddle
.
enable_static
()
self
.
model_hparams
[
"decoding_strategy"
]
=
"beam_search"
main_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main_program
,
startup_program
):
source
=
fluid
.
data
(
name
=
"src"
,
shape
=
[
None
,
None
],
dtype
=
"int64"
)
source_length
=
fluid
.
data
(
name
=
"src_sequence_length"
,
shape
=
[
None
],
dtype
=
"int64"
)
model
=
Seq2SeqModel
(
**
self
.
model_hparams
)
output
=
model
(
source
,
source_length
)
self
.
exe
.
run
(
startup_program
)
for
iter_idx
in
range
(
self
.
iter_num
):
trans_ids
=
self
.
exe
.
run
(
program
=
main_program
,
feed
=
{
"src"
:
self
.
data
[
"src"
][
iter_idx
*
self
.
batch_size
:
(
iter_idx
+
1
)
*
self
.
batch_size
,
:,
],
"src_sequence_length"
:
self
.
data
[
"src_sequence_length"
][
iter_idx
*
self
.
batch_size
:
(
iter_idx
+
1
)
*
self
.
batch_size
],
},
fetch_list
=
[
output
],
)[
0
]
class
ModuleApiTest
(
unittest
.
TestCase
):
@
classmethod
def
setUpClass
(
cls
):
...
...
python/paddle/nn/layer/rnn.py
浏览文件 @
acee3dd3
...
...
@@ -14,26 +14,389 @@
import
math
from
collections.abc
import
Sequence
from
functools
import
reduce
from
functools
import
partial
,
reduce
import
numpy
as
np
import
paddle
from
paddle
import
_C_ops
,
_legacy_C_ops
,
framework
,
in_dynamic_mode
from
paddle.fluid.framework
import
in_dygraph_mode
from
paddle.fluid.layers
import
utils
from
paddle.fluid.data_feeder
import
check_type
,
check_variable_and_dtype
from
paddle.fluid.framework
import
_non_static_mode
,
in_dygraph_mode
from
paddle.fluid.layers
import
control_flow
,
sequence_lod
,
utils
from
paddle.fluid.layers.utils
import
flatten
,
map_structure
from
paddle.framework
import
core
from
paddle.nn
import
Layer
from
paddle.nn
import
functional
as
F
from
paddle.nn
import
initializer
as
I
from
paddle.static
import
default_startup_program
,
program_guard
from
paddle.static
import
Variable
,
default_startup_program
,
program_guard
from
.container
import
LayerList
__all__
=
[]
def
rnn
(
cell
,
inputs
,
initial_states
=
None
,
sequence_length
=
None
,
time_major
=
False
,
is_reverse
=
False
,
**
kwargs
):
r
"""
rnn creates a recurrent neural network specified by RNNCell `cell`,
which performs :code:`cell.call()` (for dygraph mode :code:`cell.forward`)
repeatedly until reaches to the maximum length of `inputs`.
Parameters:
cell(RNNCellBase): An instance of `RNNCellBase`.
inputs(Tensor): the input sequences.
If time_major is True, the shape is
`[time_steps, batch_size, input_size]`
else the shape is `[batch_size, time_steps, input_size]`.
initial_states(Tensor|tuple|list, optional): the initial state of the
rnn cell. Tensor or a possibly nested structure of tensors. If not
provided, `cell.get_initial_states` would be called to produce
the initial state. Defaults to None.
sequence_length (Tensor, optional): shape `[batch_size]`, dtype: int64
or int32. The valid lengths of input sequences. Defaults to None.
If `sequence_length` is not None, the inputs are treated as
padded sequences. In each input sequence, elements whose time step
index are not less than the valid length are treated as paddings.
time_major (bool, optional): Whether the first dimension of the input means the
time steps. Defaults to False.
is_reverse (bool, optional): Indicate whether to calculate in the reverse
order of input sequences. Defaults to False.
**kwargs: Additional keyword arguments to pass to `forward` of the cell.
Returns:
outputs (Tensor|list|tuple): the output sequence. Tensor or nested
structure of Tensors.
If `time_major` is True, the shape of each tensor in outpus is
`[time_steps, batch_size, hidden_size]`, else
`[batch_size, time_steps, hidden_size]`.
final_states (Tensor|list|tuple): final states. A (possibly nested structure of)
tensor[s], representing the final state for RNN. It has the same
structure of intial state. Each tensor in final states has the same
shape and dtype as the corresponding tensor in initial states.
Examples:
.. code-block:: python
import paddle
paddle.disable_static()
cell = paddle.nn.SimpleRNNCell(16, 32)
inputs = paddle.rand((4, 23, 16))
prev_h = paddle.randn((4, 32))
outputs, final_states = paddle.nn.layer.rnn(cell, inputs, prev_h)
"""
if
_non_static_mode
():
return
_rnn_dynamic_graph
(
cell
,
inputs
,
initial_states
,
sequence_length
,
time_major
,
is_reverse
,
**
kwargs
)
else
:
return
_rnn_static_graph
(
cell
,
inputs
,
initial_states
,
sequence_length
,
time_major
,
is_reverse
,
**
kwargs
)
class
ArrayWrapper
:
def
__init__
(
self
,
x
):
self
.
array
=
[
x
]
def
append
(
self
,
x
):
self
.
array
.
append
(
x
)
return
self
def
__getitem__
(
self
,
item
):
return
self
.
array
.
__getitem__
(
item
)
def
_maybe_copy
(
state
,
new_state
,
step_mask
):
"""update rnn state or just pass the old state through"""
new_state
=
paddle
.
tensor
.
math
.
_multiply_with_axis
(
new_state
,
step_mask
,
axis
=
0
)
+
paddle
.
tensor
.
math
.
_multiply_with_axis
(
state
,
(
1
-
step_mask
),
axis
=
0
)
return
new_state
def
_transpose_batch_time
(
x
):
perm
=
[
1
,
0
]
+
list
(
range
(
2
,
len
(
x
.
shape
)))
return
paddle
.
transpose
(
x
,
perm
)
def
_rnn_dynamic_graph
(
cell
,
inputs
,
initial_states
=
None
,
sequence_length
=
None
,
time_major
=
False
,
is_reverse
=
False
,
**
kwargs
):
time_step_index
=
0
if
time_major
else
1
flat_inputs
=
flatten
(
inputs
)
time_steps
=
flat_inputs
[
0
].
shape
[
time_step_index
]
if
initial_states
is
None
:
initial_states
=
cell
.
get_initial_states
(
batch_ref
=
inputs
,
batch_dim_idx
=
1
if
time_major
else
0
)
if
not
time_major
:
inputs
=
map_structure
(
_transpose_batch_time
,
inputs
)
if
sequence_length
is
not
None
:
mask
=
sequence_lod
.
sequence_mask
(
sequence_length
,
maxlen
=
time_steps
,
dtype
=
inputs
.
dtype
)
mask
=
paddle
.
transpose
(
mask
,
[
1
,
0
])
if
is_reverse
:
inputs
=
map_structure
(
lambda
x
:
paddle
.
reverse
(
x
,
axis
=
[
0
]),
inputs
)
mask
=
(
paddle
.
reverse
(
mask
,
axis
=
[
0
])
if
sequence_length
is
not
None
else
None
)
states
=
initial_states
outputs
=
[]
for
i
in
range
(
time_steps
):
step_inputs
=
map_structure
(
lambda
x
:
x
[
i
],
inputs
)
step_outputs
,
new_states
=
cell
(
step_inputs
,
states
,
**
kwargs
)
if
sequence_length
is
not
None
:
new_states
=
map_structure
(
partial
(
_maybe_copy
,
step_mask
=
mask
[
i
]),
states
,
new_states
)
states
=
new_states
outputs
=
(
map_structure
(
lambda
x
:
ArrayWrapper
(
x
),
step_outputs
)
if
i
==
0
else
map_structure
(
lambda
x
,
x_array
:
x_array
.
append
(
x
),
step_outputs
,
outputs
)
)
final_outputs
=
map_structure
(
lambda
x
:
paddle
.
stack
(
x
.
array
,
axis
=
time_step_index
),
outputs
)
if
is_reverse
:
final_outputs
=
map_structure
(
lambda
x
:
paddle
.
reverse
(
x
,
axis
=
time_step_index
),
final_outputs
)
final_states
=
new_states
return
final_outputs
,
final_states
def
_rnn_static_graph
(
cell
,
inputs
,
initial_states
=
None
,
sequence_length
=
None
,
time_major
=
False
,
is_reverse
=
False
,
**
kwargs
):
check_type
(
inputs
,
'inputs'
,
(
Variable
,
list
,
tuple
),
'rnn'
)
if
isinstance
(
inputs
,
(
list
,
tuple
)):
for
i
,
input_x
in
enumerate
(
inputs
):
check_variable_and_dtype
(
input_x
,
'inputs['
+
str
(
i
)
+
']'
,
[
'float32'
,
'float64'
],
'rnn'
)
check_type
(
initial_states
,
'initial_states'
,
(
Variable
,
list
,
tuple
,
type
(
None
)),
'rnn'
,
)
check_type
(
sequence_length
,
'sequence_length'
,
(
Variable
,
type
(
None
)),
'rnn'
)
def
_switch_grad
(
x
,
stop
=
False
):
x
.
stop_gradient
=
stop
return
x
if
initial_states
is
None
:
initial_states
=
cell
.
get_initial_states
(
batch_ref
=
inputs
,
batch_dim_idx
=
1
if
time_major
else
0
)
initial_states
=
map_structure
(
_switch_grad
,
initial_states
)
if
not
time_major
:
inputs
=
map_structure
(
_transpose_batch_time
,
inputs
)
if
sequence_length
:
max_seq_len
=
paddle
.
shape
(
flatten
(
inputs
)[
0
])[
0
]
mask
=
sequence_lod
.
sequence_mask
(
sequence_length
,
maxlen
=
max_seq_len
,
dtype
=
flatten
(
initial_states
)[
0
].
dtype
,
)
mask
=
paddle
.
transpose
(
mask
,
[
1
,
0
])
if
is_reverse
:
inputs
=
map_structure
(
lambda
x
:
paddle
.
reverse
(
x
,
axis
=
[
0
]),
inputs
)
mask
=
paddle
.
reverse
(
mask
,
axis
=
[
0
])
if
sequence_length
else
None
# StaticRNN
rnn
=
control_flow
.
StaticRNN
()
with
rnn
.
step
():
inputs
=
map_structure
(
rnn
.
step_input
,
inputs
)
states
=
map_structure
(
rnn
.
memory
,
initial_states
)
copy_states
=
map_structure
(
lambda
x
:
x
,
states
)
outputs
,
new_states
=
cell
(
inputs
,
copy_states
,
**
kwargs
)
utils
.
assert_same_structure
(
states
,
new_states
)
if
sequence_length
:
step_mask
=
rnn
.
step_input
(
mask
)
new_states
=
map_structure
(
partial
(
_maybe_copy
,
step_mask
=
step_mask
),
states
,
new_states
)
map_structure
(
rnn
.
update_memory
,
states
,
new_states
)
flat_outputs
=
flatten
(
outputs
)
map_structure
(
rnn
.
step_output
,
outputs
)
map_structure
(
rnn
.
step_output
,
new_states
)
rnn_out
=
rnn
()
final_outputs
=
rnn_out
[:
len
(
flat_outputs
)]
final_outputs
=
utils
.
pack_sequence_as
(
outputs
,
final_outputs
)
final_states
=
map_structure
(
lambda
x
:
x
[
-
1
],
rnn_out
[
len
(
flat_outputs
)
:])
final_states
=
utils
.
pack_sequence_as
(
new_states
,
final_states
)
if
is_reverse
:
final_outputs
=
map_structure
(
lambda
x
:
paddle
.
reverse
(
x
,
axis
=
[
0
]),
final_outputs
)
if
not
time_major
:
final_outputs
=
map_structure
(
_transpose_batch_time
,
final_outputs
)
return
(
final_outputs
,
final_states
)
def
birnn
(
cell_fw
,
cell_bw
,
inputs
,
initial_states
=
None
,
sequence_length
=
None
,
time_major
=
False
,
**
kwargs
):
r
"""
birnn creates a bidirectional recurrent neural network specified by
RNNCell `cell_fw` and `cell_bw`, which performs :code:`cell.call()`
(for dygraph mode :code:`cell.forward`) repeatedly until reaches to
the maximum length of `inputs` and then concat the outputs for both RNNs
along the last axis.
Parameters:
cell_fw(RNNCellBase): An instance of `RNNCellBase`.
cell_bw(RNNCellBase): An instance of `RNNCellBase`.
inputs(Tensor): the input sequences.
If time_major is True, the shape is
`[time_steps, batch_size, input_size]`
else the shape is `[batch_size, time_steps, input_size]`.
initial_states(tuple, optional): A tuple of initial states of
`cell_fw` and `cell_bw`.
If not provided, `cell.get_initial_states` would be called to
produce initial state for each cell. Defaults to None.
sequence_length (Tensor, optional): shape `[batch_size]`, dtype: int64
or int32. The valid lengths of input sequences. Defaults to None.
If `sequence_length` is not None, the inputs are treated as
padded sequences. In each input sequence, elements whose time step
index are not less than the valid length are treated as paddings.
time_major (bool): Whether the first dimension of the input means the
time steps. Defaults to False.
**kwargs: Additional keyword arguments to pass to `forward` of each cell.
Returns:
outputs (Tensor): the outputs of the bidirectional RNN. It is the
concatenation of the outputs from the forward RNN and backward
RNN along the last axis.
If time major is True, the shape is `[time_steps, batch_size, size]`,
else the shape is `[batch_size, time_steps, size]`, where size is
`cell_fw.hidden_size + cell_bw.hidden_size`.
final_states (tuple): A tuple of the final states of the forward
cell and backward cell.
Examples:
.. code-block:: python
import paddle
paddle.disable_static()
cell_fw = paddle.nn.LSTMCell(16, 32)
cell_bw = paddle.nn.LSTMCell(16, 32)
inputs = paddle.rand((4, 23, 16))
hf, cf = paddle.rand((4, 32)), paddle.rand((4, 32))
hb, cb = paddle.rand((4, 32)), paddle.rand((4, 32))
initial_states = ((hf, cf), (hb, cb))
outputs, final_states = paddle.nn.layer.birnn(
cell_fw, cell_bw, inputs, initial_states)
"""
if
initial_states
is
None
:
states_fw
=
cell_fw
.
get_initial_states
(
batch_ref
=
inputs
,
batch_dim_idx
=
1
if
time_major
else
0
)
states_bw
=
cell_fw
.
get_initial_states
(
batch_ref
=
inputs
,
batch_dim_idx
=
1
if
time_major
else
0
)
else
:
states_fw
,
states_bw
=
initial_states
outputs_fw
,
states_fw
=
rnn
(
cell_fw
,
inputs
,
states_fw
,
sequence_length
,
time_major
=
time_major
,
**
kwargs
)
outputs_bw
,
states_bw
=
rnn
(
cell_bw
,
inputs
,
states_bw
,
sequence_length
,
time_major
=
time_major
,
is_reverse
=
True
,
**
kwargs
)
outputs
=
map_structure
(
lambda
x
,
y
:
paddle
.
concat
([
x
,
y
],
-
1
),
outputs_fw
,
outputs_bw
)
final_states
=
(
states_fw
,
states_bw
)
return
outputs
,
final_states
def
split_states
(
states
,
bidirectional
=
False
,
state_components
=
1
):
r
"""
Split states of RNN network into possibly nested list or tuple of
...
...
@@ -779,7 +1142,7 @@ class RNN(Layer):
def
forward
(
self
,
inputs
,
initial_states
=
None
,
sequence_length
=
None
,
**
kwargs
):
final_outputs
,
final_states
=
paddle
.
fluid
.
layers
.
rnn
(
final_outputs
,
final_states
=
rnn
(
self
.
cell
,
inputs
,
initial_states
=
initial_states
,
...
...
@@ -866,7 +1229,7 @@ class BiRNN(Layer):
len
(
initial_states
)
==
2
),
"length of initial_states should be 2 when it is a list/tuple"
outputs
,
final_states
=
paddle
.
fluid
.
layers
.
birnn
(
outputs
,
final_states
=
birnn
(
self
.
cell_fw
,
self
.
cell_bw
,
inputs
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录