未验证 提交 acee3dd3 编写于 作者: L lugimzzz 提交者: GitHub

[fluid clean] remove 4 fluid.layers api and imigrate 2 fluid.layer api (#48972)

* fluid clean layer

* docs
上级 b06a5946
......@@ -36,503 +36,15 @@ from ..data_feeder import check_variable_and_dtype, check_type, check_dtype
from collections.abc import Sequence
__all__ = [
'RNNCell',
'GRUCell',
'LSTMCell',
'rnn',
'birnn',
'dynamic_decode',
'dynamic_lstm',
'dynamic_lstmp',
'dynamic_gru',
'gru_unit',
'lstm_unit',
'lstm',
]
class RNNCell:
"""
:api_attr: Static Graph
RNNCell is the base class for abstraction representing the calculations
mapping the input and state to the output and new state. It is suitable to
and mostly used in RNN.
"""
def call(self, inputs, states, **kwargs):
r"""
Every cell must implement this method to do the calculations mapping the
inputs and states to the output and new states.
To be more flexible, both inputs and states can be a tensor variable or
a nested structure (list|tuple|namedtuple|dict) of tensor variable, that
is, a (possibly nested structure of) tensor variable[s].
Parameters:
inputs: A (possibly nested structure of) tensor variable[s].
states: A (possibly nested structure of) tensor variable[s].
**kwargs: Additional keyword arguments, provided by the caller.
Returns:
tuple: outputs and new_states pair. outputs and new_states both \
can be nested structure of tensor variables. new_states must \
have the same structure with states.
"""
raise NotImplementedError("RNNCell must implent the call function.")
def __call__(self, inputs, states, **kwargs):
return self.call(inputs, states, **kwargs)
def get_initial_states(
self,
batch_ref,
shape=None,
dtype='float32',
init_value=0,
batch_dim_idx=0,
):
r"""
Generate initialized states according to provided shape, data type and
value.
Parameters:
batch_ref: A (possibly nested structure of) tensor variable[s].
The first dimension of the tensor will be used as batch size to
initialize states.
shape: A (possibly nested structure of) shape[s], where a shape is
represented as a list/tuple of integer). -1(for batch size) will
beautomatically inserted if shape is not started with it. If None,
property `state_shape` will be used. The default value is None.
dtype: A (possibly nested structure of) data type[s]. The structure
must be same as that of `shape`, except when all tensors' in states
has the same data type, a single data type can be used. If
property `cell.state_shape` is not available, float32 will be used
as the data type. The default value is float32.
init_value: A float value used to initialize states.
batch_dim_idx: An integer indicating which dimension of the tensor in
inputs represents batch size. The default value is 0.
Returns:
Variable: tensor variable[s] packed in the same structure provided \
by shape, representing the initialized states.
"""
check_variable_and_dtype(
batch_ref,
'batch_ref',
['float32', 'float64', 'int32', 'int64'],
'RNNCell',
)
check_type(shape, 'shape', (list, tuple, type(None), int), 'RNNCell')
if isinstance(shape, (list, tuple)):
shapes = map_structure(lambda x: x, shape)
if isinstance(shape, list):
for i, _shape in enumerate(shapes):
check_type(_shape, 'shapes[' + str(i) + ']', int, 'RNNCell')
else:
check_type(shapes, 'shapes', int, 'RNNCell')
check_dtype(dtype, 'dtype', ['float32', 'float64'], 'RNNCell')
# TODO: use inputs and batch_size
batch_ref = flatten(batch_ref)[0]
def _is_shape_sequence(seq):
"""For shape, list/tuple of integer is the finest-grained objection"""
if isinstance(seq, list) or isinstance(seq, tuple):
if reduce(
lambda flag, x: isinstance(x, int) and flag, seq, True
):
return False
# TODO: Add check for the illegal
if isinstance(seq, dict):
return True
return isinstance(seq, Sequence) and not isinstance(seq, str)
class Shape:
def __init__(self, shape):
self.shape = shape if shape[0] == -1 else ([-1] + list(shape))
# nested structure of shapes
states_shapes = self.state_shape if shape is None else shape
is_sequence_ori = utils.is_sequence
utils.is_sequence = _is_shape_sequence
states_shapes = map_structure(lambda shape: Shape(shape), states_shapes)
utils.is_sequence = is_sequence_ori
# nested structure of dtypes
try:
states_dtypes = self.state_dtype if dtype is None else dtype
except NotImplementedError: # use fp32 as default
states_dtypes = "float32"
if len(flatten(states_dtypes)) == 1:
dtype = flatten(states_dtypes)[0]
states_dtypes = map_structure(lambda shape: dtype, states_shapes)
init_states = map_structure(
lambda shape, dtype: tensor.fill_constant_batch_size_like(
input=batch_ref,
shape=shape.shape,
dtype=dtype,
value=init_value,
input_dim_idx=batch_dim_idx,
),
states_shapes,
states_dtypes,
)
return init_states
@property
def state_shape(self):
"""
Abstract method (property).
Used to initialize states.
A (possibly nested structure of) shape[s], where a shape is represented
as a list/tuple of integers (-1 for batch size would be automatically
inserted into a shape if shape is not started with it).
Not necessary to be implemented if states are not initialized by
`get_initial_states` or the `shape` argument is provided when using
`get_initial_states`.
"""
raise NotImplementedError(
"Please add implementaion for `state_shape` in the used cell."
)
@property
def state_dtype(self):
"""
Abstract method (property).
Used to initialize states.
A (possibly nested structure of) data types[s]. The structure must be
same as that of `shape`, except when all tensors' in states has the same
data type, a single data type can be used.
Not necessary to be implemented if states are not initialized
by `get_initial_states` or the `dtype` argument is provided when using
`get_initial_states`.
"""
raise NotImplementedError(
"Please add implementaion for `state_dtype` in the used cell."
)
class GRUCell(RNNCell):
r"""
:api_attr: Static Graph
Gated Recurrent Unit cell. It is a wrapper for
`fluid.contrib.layers.rnn_impl.BasicGRUUnit` to make it adapt to RNNCell.
The formula used is as follow:
.. math::
u_t & = act_g(W_{ux}x_{t} + W_{uh}h_{t-1} + b_u)
r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r)
\\tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c)
h_t & = u_t \odot h_{t-1} + (1-u_t) \odot \\tilde{h_t}
For more details, please refer to `Learning Phrase Representations using
RNN Encoder Decoder for Statistical Machine Translation <https://arxiv.org/pdf/1406.1078.pdf>`_
Examples:
.. code-block:: python
import paddle.fluid.layers as layers
cell = layers.GRUCell(hidden_size=256)
"""
def __init__(
self,
hidden_size,
param_attr=None,
bias_attr=None,
gate_activation=None,
activation=None,
dtype="float32",
name="GRUCell",
):
"""
Constructor of GRUCell.
Parameters:
hidden_size (int): The hidden size in the GRU cell.
param_attr(ParamAttr, optional): The parameter attribute for the learnable
weight matrix. Default: None.
bias_attr (ParamAttr, optional): The parameter attribute for the bias
of GRU. Default: None.
gate_activation (function, optional): The activation function for :math:`act_g`.
Default: `fluid.layers.sigmoid`.
activation (function, optional): The activation function for :math:`act_c`.
Default: `fluid.layers.tanh`.
dtype(string, optional): The data type used in this cell. Default float32.
name(string, optional) : The name scope used to identify parameters and biases.
"""
check_type(hidden_size, 'hidden_size', (int), 'GRUCell')
check_dtype(dtype, 'dtype', ['float32', 'float64'], 'GRUCell')
self.hidden_size = hidden_size
from .. import contrib # TODO: resolve recurrent import
self.gru_unit = contrib.layers.rnn_impl.BasicGRUUnit(
name,
hidden_size,
param_attr,
bias_attr,
gate_activation,
activation,
dtype,
)
def call(self, inputs, states):
r"""
Perform calculations of GRU.
Parameters:
inputs(Variable): A tensor with shape `[batch_size, input_size]`,
corresponding to :math:`x_t` in the formula. The data type
should be float32 or float64.
states(Variable): A tensor with shape `[batch_size, hidden_size]`.
corresponding to :math:`h_{t-1}` in the formula. The data type
should be float32 or float64.
Returns:
tuple: A tuple( :code:`(outputs, new_states)` ), where `outputs` and \
`new_states` is the same tensor shaped `[batch_size, hidden_size]`, \
corresponding to :math:`h_t` in the formula. The data type of the \
tensor is same as that of `states`.
"""
check_variable_and_dtype(
inputs, 'inputs', ['float32', 'float64'], 'GRUCell'
)
check_variable_and_dtype(
states, 'states', ['float32', 'float64'], 'GRUCell'
)
new_hidden = self.gru_unit(inputs, states)
return new_hidden, new_hidden
@property
def state_shape(self):
"""
The `state_shape` of GRUCell is a shape `[hidden_size]` (-1 for batch
size would be automatically inserted into shape). The shape corresponds
to :math:`h_{t-1}`.
"""
return [self.hidden_size]
class LSTMCell(RNNCell):
r"""
:api_attr: Static Graph
Long-Short Term Memory cell. It is a wrapper for
`fluid.contrib.layers.rnn_impl.BasicLSTMUnit` to make it adapt to RNNCell.
The formula used is as follow:
.. math::
i_{t} & = act_g(W_{x_{i}}x_{t} + W_{h_{i}}h_{t-1} + b_{i})
f_{t} & = act_g(W_{x_{f}}x_{t} + W_{h_{f}}h_{t-1} + b_{f} + forget\\_bias)
c_{t} & = f_{t}c_{t-1} + i_{t} act_c (W_{x_{c}}x_{t} + W_{h_{c}}h_{t-1} + b_{c})
o_{t} & = act_g(W_{x_{o}}x_{t} + W_{h_{o}}h_{t-1} + b_{o})
h_{t} & = o_{t} act_c (c_{t})
For more details, please refer to `RECURRENT NEURAL NETWORK REGULARIZATION <http://arxiv.org/abs/1409.2329>`_
Examples:
.. code-block:: python
import paddle.fluid.layers as layers
cell = layers.LSTMCell(hidden_size=256)
"""
def __init__(
self,
hidden_size,
param_attr=None,
bias_attr=None,
gate_activation=None,
activation=None,
forget_bias=1.0,
dtype="float32",
name="LSTMCell",
):
"""
Constructor of LSTMCell.
Parameters:
hidden_size (int): The hidden size in the LSTM cell.
param_attr(ParamAttr, optional): The parameter attribute for the learnable
weight matrix. Default: None.
bias_attr (ParamAttr, optional): The parameter attribute for the bias
of LSTM. Default: None.
gate_activation (function, optional): The activation function for :math:`act_g`.
Default: 'fluid.layers.sigmoid'.
activation (function, optional): The activation function for :math:`act_h`.
Default: 'fluid.layers.tanh'.
forget_bias(float, optional): forget bias used when computing forget gate.
Default 1.0
dtype(string, optional): The data type used in this cell. Default float32.
name(string, optional) : The name scope used to identify parameters and biases.
"""
check_type(hidden_size, 'hidden_size', (int), 'LSTMCell')
check_dtype(dtype, 'dtype', ['float32', 'float64'], 'LSTMCell')
self.hidden_size = hidden_size
from .. import contrib # TODO: resolve recurrent import
self.lstm_unit = contrib.layers.rnn_impl.BasicLSTMUnit(
name,
hidden_size,
param_attr,
bias_attr,
gate_activation,
activation,
forget_bias,
dtype,
)
def call(self, inputs, states):
r"""
Perform calculations of LSTM.
Parameters:
inputs(Variable): A tensor with shape `[batch_size, input_size]`,
corresponding to :math:`x_t` in the formula. The data type
should be float32 or float64.
states(Variable): A list of containing two tensors, each shaped
`[batch_size, hidden_size]`, corresponding to :math:`h_{t-1}, c_{t-1}`
in the formula. The data type should be float32 or float64.
Returns:
tuple: A tuple( :code:`(outputs, new_states)` ), where `outputs` is \
a tensor with shape `[batch_size, hidden_size]`, corresponding \
to :math:`h_{t}` in the formula; `new_states` is a list containing \
two tenser variables shaped `[batch_size, hidden_size]`, corresponding \
to :math:`h_{t}, c_{t}` in the formula. The data type of these \
tensors all is same as that of `states`.
"""
check_variable_and_dtype(
inputs, 'inputs', ['float32', 'float64'], 'LSTMCell'
)
check_type(states, 'states', list, 'LSTMCell')
if isinstance(states, list):
for i, state in enumerate(states):
check_variable_and_dtype(
state,
'state[' + str(i) + ']',
['float32', 'float64'],
'LSTMCell',
)
pre_hidden, pre_cell = states
new_hidden, new_cell = self.lstm_unit(inputs, pre_hidden, pre_cell)
return new_hidden, [new_hidden, new_cell]
@property
def state_shape(self):
"""
The `state_shape` of LSTMCell is a list with two shapes: `[[hidden_size], [hidden_size]]`
(-1 for batch size would be automatically inserted into shape). These two
shapes correspond to :math:`h_{t-1}` and :math:`c_{t-1}` separately.
"""
return [[self.hidden_size], [self.hidden_size]]
def rnn(
cell,
inputs,
initial_states=None,
sequence_length=None,
time_major=False,
is_reverse=False,
**kwargs
):
"""
rnn creates a recurrent neural network specified by RNNCell `cell`,
which performs :code:`cell.call()` (for dygraph mode :code:`cell.forward`)
repeatedly until reaches to the maximum length of `inputs`.
Arguments:
cell(RNNCellBase): An instance of `RNNCellBase`.
inputs(Tensor): the input sequences.
If time_major is True, the shape is
`[time_steps, batch_size, input_size]`
else the shape is `[batch_size, time_steps, input_size]`.
initial_states(Tensor|tuple|list, optional): the initial state of the
rnn cell. Tensor or a possibly nested structure of tensors. If not
provided, `cell.get_initial_states` would be called to produce
the initial state. Defaults to None.
sequence_length (Tensor, optional): shape `[batch_size]`, dtype: int64
or int32. The valid lengths of input sequences. Defaults to None.
If `sequence_length` is not None, the inputs are treated as
padded sequences. In each input sequence, elements whose time step
index are not less than the valid length are treated as paddings.
time_major (bool): Whether the first dimension of the input means the
time steps. Defaults to False.
is_reverse (bool, optional): Indicate whether to calculate in the reverse
order of input sequences. Defaults to False.
**kwargs: Additional keyword arguments to pass to `forward` of the cell.
Returns:
(outputs, final_states)
outputs (Tensor|list|tuple): the output sequence. Tensor or nested
structure of Tensors.
If `time_major` is True, the shape of each tensor in outpus is
`[time_steps, batch_size, hidden_size]`, else
`[batch_size, time_steps, hidden_size]`.
final_states (Tensor|list|tuple): final states. A (possibly nested structure of)
tensor[s], representing the final state for RNN. It has the same
structure of intial state. Each tensor in final states has the same
shape and dtype as the corresponding tensor in initial states.
Examples:
.. code-block:: python
import paddle
paddle.disable_static()
cell = paddle.nn.SimpleRNNCell(16, 32)
inputs = paddle.rand((4, 23, 16))
prev_h = paddle.randn((4, 32))
outputs, final_states = paddle.fluid.layers.rnn(cell, inputs, prev_h)
"""
if _non_static_mode():
return _rnn_dynamic_graph(
cell,
inputs,
initial_states,
sequence_length,
time_major,
is_reverse,
**kwargs
)
else:
return _rnn_static_graph(
cell,
inputs,
initial_states,
sequence_length,
time_major,
is_reverse,
**kwargs
)
class ArrayWrapper:
def __init__(self, x):
self.array = [x]
......@@ -545,273 +57,6 @@ class ArrayWrapper:
return self.array.__getitem__(item)
def _maybe_copy(state, new_state, step_mask):
"""update rnn state or just pass the old state through"""
new_state = paddle.tensor.math._multiply_with_axis(
new_state, step_mask, axis=0
) + paddle.tensor.math._multiply_with_axis(state, (1 - step_mask), axis=0)
return new_state
def _transpose_batch_time(x):
perm = [1, 0] + list(range(2, len(x.shape)))
return paddle.transpose(x, perm)
def _rnn_dynamic_graph(
cell,
inputs,
initial_states=None,
sequence_length=None,
time_major=False,
is_reverse=False,
**kwargs
):
time_step_index = 0 if time_major else 1
flat_inputs = flatten(inputs)
time_steps = flat_inputs[0].shape[time_step_index]
if initial_states is None:
initial_states = cell.get_initial_states(
batch_ref=inputs, batch_dim_idx=1 if time_major else 0
)
if not time_major:
inputs = map_structure(_transpose_batch_time, inputs)
if sequence_length is not None:
mask = sequence_lod.sequence_mask(
sequence_length, maxlen=time_steps, dtype=inputs.dtype
)
mask = paddle.transpose(mask, [1, 0])
if is_reverse:
inputs = map_structure(lambda x: paddle.reverse(x, axis=[0]), inputs)
mask = (
paddle.reverse(mask, axis=[0])
if sequence_length is not None
else None
)
states = initial_states
outputs = []
for i in range(time_steps):
step_inputs = map_structure(lambda x: x[i], inputs)
step_outputs, new_states = cell(step_inputs, states, **kwargs)
if sequence_length is not None:
new_states = map_structure(
partial(_maybe_copy, step_mask=mask[i]), states, new_states
)
states = new_states
outputs = (
map_structure(lambda x: ArrayWrapper(x), step_outputs)
if i == 0
else map_structure(
lambda x, x_array: x_array.append(x), step_outputs, outputs
)
)
final_outputs = map_structure(
lambda x: paddle.stack(x.array, axis=time_step_index), outputs
)
if is_reverse:
final_outputs = map_structure(
lambda x: paddle.reverse(x, axis=time_step_index), final_outputs
)
final_states = new_states
return final_outputs, final_states
def _rnn_static_graph(
cell,
inputs,
initial_states=None,
sequence_length=None,
time_major=False,
is_reverse=False,
**kwargs
):
check_type(inputs, 'inputs', (Variable, list, tuple), 'rnn')
if isinstance(inputs, (list, tuple)):
for i, input_x in enumerate(inputs):
check_variable_and_dtype(
input_x, 'inputs[' + str(i) + ']', ['float32', 'float64'], 'rnn'
)
check_type(
initial_states,
'initial_states',
(Variable, list, tuple, type(None)),
'rnn',
)
check_type(
sequence_length, 'sequence_length', (Variable, type(None)), 'rnn'
)
def _switch_grad(x, stop=False):
x.stop_gradient = stop
return x
if initial_states is None:
initial_states = cell.get_initial_states(
batch_ref=inputs, batch_dim_idx=1 if time_major else 0
)
initial_states = map_structure(_switch_grad, initial_states)
if not time_major:
inputs = map_structure(_transpose_batch_time, inputs)
if sequence_length:
max_seq_len = paddle.shape(flatten(inputs)[0])[0]
mask = sequence_lod.sequence_mask(
sequence_length,
maxlen=max_seq_len,
dtype=flatten(initial_states)[0].dtype,
)
mask = paddle.transpose(mask, [1, 0])
if is_reverse:
inputs = map_structure(lambda x: paddle.reverse(x, axis=[0]), inputs)
mask = paddle.reverse(mask, axis=[0]) if sequence_length else None
# StaticRNN
rnn = control_flow.StaticRNN()
with rnn.step():
inputs = map_structure(rnn.step_input, inputs)
states = map_structure(rnn.memory, initial_states)
copy_states = map_structure(lambda x: x, states)
outputs, new_states = cell(inputs, copy_states, **kwargs)
assert_same_structure(states, new_states)
if sequence_length:
step_mask = rnn.step_input(mask)
new_states = map_structure(
partial(_maybe_copy, step_mask=step_mask), states, new_states
)
map_structure(rnn.update_memory, states, new_states)
flat_outputs = flatten(outputs)
map_structure(rnn.step_output, outputs)
map_structure(rnn.step_output, new_states)
rnn_out = rnn()
final_outputs = rnn_out[: len(flat_outputs)]
final_outputs = pack_sequence_as(outputs, final_outputs)
final_states = map_structure(lambda x: x[-1], rnn_out[len(flat_outputs) :])
final_states = pack_sequence_as(new_states, final_states)
if is_reverse:
final_outputs = map_structure(
lambda x: paddle.reverse(x, axis=[0]), final_outputs
)
if not time_major:
final_outputs = map_structure(_transpose_batch_time, final_outputs)
return (final_outputs, final_states)
def birnn(
cell_fw,
cell_bw,
inputs,
initial_states=None,
sequence_length=None,
time_major=False,
**kwargs
):
"""
birnn creates a bidirectional recurrent neural network specified by
RNNCell `cell_fw` and `cell_bw`, which performs :code:`cell.call()`
(for dygraph mode :code:`cell.forward`) repeatedly until reaches to
the maximum length of `inputs` and then concat the outputs for both RNNs
along the last axis.
Arguments:
cell_fw(RNNCellBase): An instance of `RNNCellBase`.
cell_bw(RNNCellBase): An instance of `RNNCellBase`.
inputs(Tensor): the input sequences.
If time_major is True, the shape is
`[time_steps, batch_size, input_size]`
else the shape is `[batch_size, time_steps, input_size]`.
initial_states(tuple, optional): A tuple of initial states of
`cell_fw` and `cell_bw`.
If not provided, `cell.get_initial_states` would be called to
produce initial state for each cell. Defaults to None.
sequence_length (Tensor, optional): shape `[batch_size]`, dtype: int64
or int32. The valid lengths of input sequences. Defaults to None.
If `sequence_length` is not None, the inputs are treated as
padded sequences. In each input sequence, elements whose time step
index are not less than the valid length are treated as paddings.
time_major (bool): Whether the first dimension of the input means the
time steps. Defaults to False.
**kwargs: Additional keyword arguments to pass to `forward` of each cell.
Returns:
(outputs, final_states)
outputs (Tensor): the outputs of the bidirectional RNN. It is the
concatenation of the outputs from the forward RNN and backward
RNN along the last axis.
If time major is True, the shape is `[time_steps, batch_size, size]`,
else the shape is `[batch_size, time_steps, size]`, where size is
`cell_fw.hidden_size + cell_bw.hidden_size`.
final_states (tuple): A tuple of the final states of the forward
cell and backward cell.
Examples:
.. code-block:: python
import paddle
paddle.disable_static()
cell_fw = paddle.nn.LSTMCell(16, 32)
cell_bw = paddle.nn.LSTMCell(16, 32)
inputs = paddle.rand((4, 23, 16))
hf, cf = paddle.rand((4, 32)), paddle.rand((4, 32))
hb, cb = paddle.rand((4, 32)), paddle.rand((4, 32))
initial_states = ((hf, cf), (hb, cb))
outputs, final_states = paddle.fluid.layers.birnn(
cell_fw, cell_bw, inputs, initial_states)
"""
if initial_states is None:
states_fw = cell_fw.get_initial_states(
batch_ref=inputs, batch_dim_idx=1 if time_major else 0
)
states_bw = cell_fw.get_initial_states(
batch_ref=inputs, batch_dim_idx=1 if time_major else 0
)
else:
states_fw, states_bw = initial_states
outputs_fw, states_fw = rnn(
cell_fw,
inputs,
states_fw,
sequence_length,
time_major=time_major,
**kwargs
)
outputs_bw, states_bw = rnn(
cell_bw,
inputs,
states_bw,
sequence_length,
time_major=time_major,
is_reverse=True,
**kwargs
)
outputs = map_structure(
lambda x, y: tensor.concat([x, y], -1), outputs_fw, outputs_bw
)
final_states = (states_fw, states_bw)
return outputs, final_states
def _dynamic_decode_imperative(
decoder,
inits=None,
......@@ -2175,151 +1420,3 @@ def gru_unit(
)
return updated_hidden, reset_hidden_pre, gate
def lstm_unit(
x_t,
hidden_t_prev,
cell_t_prev,
forget_bias=0.0,
param_attr=None,
bias_attr=None,
name=None,
):
r"""
:api_attr: Static Graph
Long-Short Term Memory (LSTM) RNN cell. This operator performs LSTM calculations for
one time step, whose implementation is based on calculations described in `RECURRENT
NEURAL NETWORK REGULARIZATION <http://arxiv.org/abs/1409.2329>`_ .
We add forget_bias to the biases of the forget gate in order to
reduce the scale of forgetting. The formula is as follows:
.. math::
i_{t} & = \sigma(W_{x_{i}}x_{t} + W_{h_{i}}h_{t-1} + b_{i})
f_{t} & = \sigma(W_{x_{f}}x_{t} + W_{h_{f}}h_{t-1} + b_{f} + forget\\_bias)
c_{t} & = f_{t}c_{t-1} + i_{t} tanh (W_{x_{c}}x_{t} + W_{h_{c}}h_{t-1} + b_{c})
o_{t} & = \sigma(W_{x_{o}}x_{t} + W_{h_{o}}h_{t-1} + b_{o})
h_{t} & = o_{t} tanh (c_{t})
:math:`x_{t}` stands for ``x_t`` , corresponding to the input of current time step;
:math:`h_{t-1}` and :math:`c_{t-1}` correspond to ``hidden_t_prev`` and ``cell_t_prev`` ,
representing the output of from previous time step.
:math:`i_{t}, f_{t}, c_{t}, o_{t}, h_{t}` are input gate, forget gate, cell, output gate
and hidden calculation.
Args:
x_t(Variable): A 2D Tensor representing the input of current time step.
Its shape should be :math:`[N, M]` , where :math:`N` stands for batch
size, :math:`M` for the feature size of input. The data type should
be float32 or float64.
hidden_t_prev(Variable): A 2D Tensor representing the hidden value from
previous step. Its shape should be :math:`[N, D]` , where :math:`N`
stands for batch size, :math:`D` for the hidden size. The data type
should be same as ``x_t`` .
cell_t_prev(Variable): A 2D Tensor representing the cell value from
previous step. It has the same shape and data type with ``hidden_t_prev`` .
forget_bias (float, optional): :math:`forget\\_bias` added to the biases
of the forget gate. Default 0.
param_attr(ParamAttr, optional): To specify the weight parameter property.
Default: None, which means the default weight parameter property is used.
See usage for details in :ref:`api_fluid_ParamAttr` .
bias_attr (ParamAttr, optional): To specify the bias parameter property.
Default: None, which means the default bias parameter property is used.
See usage for details in :ref:`api_fluid_ParamAttr` .
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
tuple: The tuple contains two Tensor variables with the same shape and \
data type with ``hidden_t_prev`` , representing the hidden value and \
cell value which correspond to :math:`h_{t}` and :math:`c_{t}` in \
the formula.
Raises:
ValueError: Rank of x_t must be 2.
ValueError: Rank of hidden_t_prev must be 2.
ValueError: Rank of cell_t_prev must be 2.
ValueError: The 1st dimensions of x_t, hidden_t_prev and cell_t_prev must be the same.
ValueError: The 2nd dimensions of hidden_t_prev and cell_t_prev must be the same.
Examples:
.. code-block:: python
import paddle.fluid as fluid
dict_dim, emb_dim, hidden_dim = 128, 64, 512
data = fluid.data(name='step_data', shape=[None], dtype='int64')
x = fluid.embedding(input=data, size=[dict_dim, emb_dim])
pre_hidden = fluid.data(
name='pre_hidden', shape=[None, hidden_dim], dtype='float32')
pre_cell = fluid.data(
name='pre_cell', shape=[None, hidden_dim], dtype='float32')
hidden = fluid.layers.lstm_unit(
x_t=x,
hidden_t_prev=pre_hidden,
cell_t_prev=pre_cell)
"""
helper = LayerHelper('lstm_unit', **locals())
check_variable_and_dtype(x_t, 'x_t', ['float32', 'float64'], 'lstm_unit')
check_variable_and_dtype(
hidden_t_prev, 'hidden_t_prev', ['float32', 'float64'], 'lstm_unit'
)
check_variable_and_dtype(
cell_t_prev, 'cell_t_prev', ['float32', 'float64'], 'lstm_unit'
)
if len(x_t.shape) != 2:
raise ValueError("Rank of x_t must be 2.")
if len(hidden_t_prev.shape) != 2:
raise ValueError("Rank of hidden_t_prev must be 2.")
if len(cell_t_prev.shape) != 2:
raise ValueError("Rank of cell_t_prev must be 2.")
if (
x_t.shape[0] != hidden_t_prev.shape[0]
or x_t.shape[0] != cell_t_prev.shape[0]
):
raise ValueError(
"The 1st dimensions of x_t, hidden_t_prev and "
"cell_t_prev must be the same."
)
if hidden_t_prev.shape[1] != cell_t_prev.shape[1]:
raise ValueError(
"The 2nd dimensions of hidden_t_prev and "
"cell_t_prev must be the same."
)
if bias_attr is None:
bias_attr = ParamAttr()
size = cell_t_prev.shape[1]
concat_out = nn.concat(input=[x_t, hidden_t_prev], axis=1)
fc_out = nn.fc(
input=concat_out,
size=4 * size,
param_attr=param_attr,
bias_attr=bias_attr,
)
dtype = x_t.dtype
c = helper.create_variable_for_type_inference(dtype)
h = helper.create_variable_for_type_inference(dtype)
helper.append_op(
type='lstm_unit',
inputs={"X": fc_out, "C_prev": cell_t_prev},
outputs={"C": c, "H": h},
attrs={"forget_bias": forget_bias},
)
return h, c
......@@ -2179,26 +2179,6 @@ class TestBook(LayerTest):
x, kernel_size=[5, 3], stride=[1, 2], padding=(2, 1)
)
def make_lstm_unit(self):
with program_guard(
fluid.default_main_program(), fluid.default_startup_program()
):
x_t_data = self._get_data(
name='x_t_data', shape=[10, 10], dtype='float32'
)
x_t = layers.fc(input=x_t_data, size=10)
prev_hidden_data = self._get_data(
name='prev_hidden_data', shape=[10, 30], dtype='float32'
)
prev_hidden = layers.fc(input=prev_hidden_data, size=30)
prev_cell_data = self._get_data(
name='prev_cell', shape=[10, 30], dtype='float32'
)
prev_cell = layers.fc(input=prev_cell_data, size=30)
return layers.lstm_unit(
x_t=x_t, hidden_t_prev=prev_hidden, cell_t_prev=prev_cell
)
def make_softmax(self):
with program_guard(
fluid.default_main_program(), fluid.default_startup_program()
......
......@@ -17,10 +17,6 @@ import unittest
import numpy as np
from op_test import OpTest
from paddle import fluid
from paddle.fluid.framework import Program, program_guard
from paddle.fluid.layers import lstm_unit
def sigmoid_np(x):
return 1.0 / (1.0 + np.exp(-x))
......@@ -30,79 +26,6 @@ def tanh_np(x):
return 2 * sigmoid_np(2.0 * x) - 1.0
class LstmUnitTestError(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
batch_size, dict_dim, emb_dim, hidden_dim = 32, 128, 64, 512
data = fluid.data(
name='step_data', shape=[batch_size], dtype='int64'
)
inputs = fluid.embedding(input=data, size=[dict_dim, emb_dim])
pre_hidden = fluid.data(
name='pre_hidden',
shape=[batch_size, hidden_dim],
dtype='float32',
)
pre_cell = fluid.data(
name='pre_cell', shape=[batch_size, hidden_dim], dtype='float32'
)
np_input = np.random.uniform(
-0.1, 0.1, (batch_size, emb_dim)
).astype('float64')
np_pre_hidden = np.random.uniform(
-0.1, 0.1, (batch_size, hidden_dim)
).astype('float64')
np_pre_cell = np.random.uniform(
-0.1, 0.1, (batch_size, hidden_dim)
).astype('float64')
def test_input_Variable():
lstm_unit(np_input, pre_hidden, pre_cell)
self.assertRaises(TypeError, test_input_Variable)
def test_pre_hidden_Variable():
lstm_unit(inputs, np_pre_hidden, pre_cell)
self.assertRaises(TypeError, test_pre_hidden_Variable)
def test_pre_cell_Variable():
lstm_unit(inputs, pre_hidden, np_pre_cell)
self.assertRaises(TypeError, test_pre_cell_Variable)
def test_input_type():
error_input = fluid.data(
name='error_input',
shape=[batch_size, emb_dim],
dtype='int32',
)
lstm_unit(error_input, pre_hidden, pre_cell)
self.assertRaises(TypeError, test_input_type)
def test_pre_hidden_type():
error_pre_hidden = fluid.data(
name='error_pre_hidden',
shape=[batch_size, hidden_dim],
dtype='int32',
)
lstm_unit(inputs, error_pre_hidden, pre_cell)
self.assertRaises(TypeError, test_pre_hidden_type)
def test_pre_cell_type():
error_pre_cell = fluid.data(
name='error_pre_cell',
shape=[batch_size, hidden_dim],
dtype='int32',
)
lstm_unit(inputs, pre_hidden, error_pre_cell)
self.assertRaises(TypeError, test_pre_cell_type)
class LstmUnitTest(OpTest):
def setUp(self):
self.op_type = "lstm_unit"
......
......@@ -16,296 +16,20 @@ import unittest
import numpy
import numpy as np
from rnn.rnn_numpy import LSTMCell
from rnn.rnn_numpy import rnn as numpy_rnn
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.layers as layers
import paddle.fluid.layers.utils as utils
from paddle.fluid import contrib, framework
from paddle.fluid.contrib.layers import basic_lstm
from paddle.fluid import framework
from paddle.fluid.executor import Executor
from paddle.fluid.framework import Program, program_guard
from paddle.fluid.layers import rnn as dynamic_rnn
from paddle.fluid.layers.rnn import GRUCell, LSTMCell, RNNCell
from paddle.nn.layer.rnn import rnn as dynamic_rnn
class TestLSTMCellError(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
batch_size, input_size, hidden_size = 4, 16, 16
inputs = fluid.data(
name='inputs', shape=[None, input_size], dtype='float32'
)
pre_hidden = fluid.data(
name='pre_hidden', shape=[None, hidden_size], dtype='float32'
)
pre_cell = fluid.data(
name='pre_cell', shape=[None, hidden_size], dtype='float32'
)
cell = LSTMCell(hidden_size)
def test_input_Variable():
np_input = np.random.random((batch_size, input_size)).astype(
"float32"
)
cell(np_input, [pre_hidden, pre_cell])
self.assertRaises(TypeError, test_input_Variable)
def test_pre_hidden_Variable():
np_pre_hidden = np.random.random(
(batch_size, hidden_size)
).astype("float32")
cell(inputs, [np_pre_hidden, pre_cell])
self.assertRaises(TypeError, test_pre_hidden_Variable)
def test_pre_cell_Variable():
np_pre_cell = np.random.random((batch_size, input_size)).astype(
"float32"
)
cell(inputs, [pre_hidden, np_pre_cell])
self.assertRaises(TypeError, test_pre_cell_Variable)
def test_input_type():
error_inputs = fluid.data(
name='error_inputs', shape=[None, input_size], dtype='int32'
)
cell(error_inputs, [pre_hidden, pre_cell])
self.assertRaises(TypeError, test_input_type)
def test_pre_hidden_type():
error_pre_hidden = fluid.data(
name='error_pre_hidden',
shape=[None, hidden_size],
dtype='int32',
)
cell(inputs, [error_pre_hidden, pre_cell])
self.assertRaises(TypeError, test_pre_hidden_type)
def test_pre_cell_type():
error_pre_cell = fluid.data(
name='error_pre_cell',
shape=[None, hidden_size],
dtype='int32',
)
cell(inputs, [pre_hidden, error_pre_cell])
self.assertRaises(TypeError, test_pre_cell_type)
def test_dtype():
# the input type must be Variable
LSTMCell(hidden_size, dtype="int32")
self.assertRaises(TypeError, test_dtype)
class TestLSTMCell(unittest.TestCase):
def setUp(self):
self.batch_size = 4
self.input_size = 16
self.hidden_size = 16
def test_run(self):
inputs = fluid.data(
name='inputs', shape=[None, self.input_size], dtype='float32'
)
pre_hidden = fluid.data(
name='pre_hidden', shape=[None, self.hidden_size], dtype='float32'
)
pre_cell = fluid.data(
name='pre_cell', shape=[None, self.hidden_size], dtype='float32'
)
cell = LSTMCell(self.hidden_size)
lstm_hidden_new, lstm_states_new = cell(inputs, [pre_hidden, pre_cell])
lstm_unit = contrib.layers.rnn_impl.BasicLSTMUnit(
"basicLSTM",
self.hidden_size,
None,
None,
None,
None,
1.0,
"float32",
)
lstm_hidden, lstm_cell = lstm_unit(inputs, pre_hidden, pre_cell)
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
else:
place = core.CPUPlace()
exe = Executor(place)
exe.run(framework.default_startup_program())
inputs_np = np.random.uniform(
-0.1, 0.1, (self.batch_size, self.input_size)
).astype('float32')
pre_hidden_np = np.random.uniform(
-0.1, 0.1, (self.batch_size, self.hidden_size)
).astype('float32')
pre_cell_np = np.random.uniform(
-0.1, 0.1, (self.batch_size, self.hidden_size)
).astype('float32')
param_names = [
["LSTMCell/BasicLSTMUnit_0.w_0", "basicLSTM/BasicLSTMUnit_0.w_0"],
["LSTMCell/BasicLSTMUnit_0.b_0", "basicLSTM/BasicLSTMUnit_0.b_0"],
]
for names in param_names:
param = np.array(
fluid.global_scope().find_var(names[0]).get_tensor()
)
param = np.random.uniform(-0.1, 0.1, size=param.shape).astype(
'float32'
)
fluid.global_scope().find_var(names[0]).get_tensor().set(
param, place
)
fluid.global_scope().find_var(names[1]).get_tensor().set(
param, place
)
out = exe.run(
feed={
'inputs': inputs_np,
'pre_hidden': pre_hidden_np,
'pre_cell': pre_cell_np,
},
fetch_list=[lstm_hidden_new, lstm_hidden],
)
np.testing.assert_allclose(out[0], out[1], rtol=0.0001, atol=0)
class TestGRUCellError(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
batch_size, input_size, hidden_size = 4, 16, 16
inputs = fluid.data(
name='inputs', shape=[None, input_size], dtype='float32'
)
pre_hidden = layers.data(
name='pre_hidden',
shape=[None, hidden_size],
append_batch_size=False,
dtype='float32',
)
cell = GRUCell(hidden_size)
def test_input_Variable():
np_input = np.random.random((batch_size, input_size)).astype(
"float32"
)
cell(np_input, pre_hidden)
self.assertRaises(TypeError, test_input_Variable)
def test_pre_hidden_Variable():
np_pre_hidden = np.random.random(
(batch_size, hidden_size)
).astype("float32")
cell(inputs, np_pre_hidden)
self.assertRaises(TypeError, test_pre_hidden_Variable)
def test_input_type():
error_inputs = fluid.data(
name='error_inputs', shape=[None, input_size], dtype='int32'
)
cell(error_inputs, pre_hidden)
self.assertRaises(TypeError, test_input_type)
def test_pre_hidden_type():
error_pre_hidden = fluid.data(
name='error_pre_hidden',
shape=[None, hidden_size],
dtype='int32',
)
cell(inputs, error_pre_hidden)
self.assertRaises(TypeError, test_pre_hidden_type)
def test_dtype():
# the input type must be Variable
GRUCell(hidden_size, dtype="int32")
self.assertRaises(TypeError, test_dtype)
class TestGRUCell(unittest.TestCase):
def setUp(self):
self.batch_size = 4
self.input_size = 16
self.hidden_size = 16
def test_run(self):
inputs = fluid.data(
name='inputs', shape=[None, self.input_size], dtype='float32'
)
pre_hidden = layers.data(
name='pre_hidden',
shape=[None, self.hidden_size],
append_batch_size=False,
dtype='float32',
)
cell = GRUCell(self.hidden_size)
gru_hidden_new, _ = cell(inputs, pre_hidden)
gru_unit = contrib.layers.rnn_impl.BasicGRUUnit(
"basicGRU", self.hidden_size, None, None, None, None, "float32"
)
gru_hidden = gru_unit(inputs, pre_hidden)
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
else:
place = core.CPUPlace()
exe = Executor(place)
exe.run(framework.default_startup_program())
inputs_np = np.random.uniform(
-0.1, 0.1, (self.batch_size, self.input_size)
).astype('float32')
pre_hidden_np = np.random.uniform(
-0.1, 0.1, (self.batch_size, self.hidden_size)
).astype('float32')
param_names = [
["GRUCell/BasicGRUUnit_0.w_0", "basicGRU/BasicGRUUnit_0.w_0"],
["GRUCell/BasicGRUUnit_0.w_1", "basicGRU/BasicGRUUnit_0.w_1"],
["GRUCell/BasicGRUUnit_0.b_0", "basicGRU/BasicGRUUnit_0.b_0"],
["GRUCell/BasicGRUUnit_0.b_1", "basicGRU/BasicGRUUnit_0.b_1"],
]
for names in param_names:
param = np.array(
fluid.global_scope().find_var(names[0]).get_tensor()
)
param = np.random.uniform(-0.1, 0.1, size=param.shape).astype(
'float32'
)
fluid.global_scope().find_var(names[0]).get_tensor().set(
param, place
)
fluid.global_scope().find_var(names[1]).get_tensor().set(
param, place
)
out = exe.run(
feed={'inputs': inputs_np, 'pre_hidden': pre_hidden_np},
fetch_list=[gru_hidden_new, gru_hidden],
)
np.testing.assert_allclose(out[0], out[1], rtol=0.0001, atol=0)
paddle.enable_static()
class TestRnnError(unittest.TestCase):
......@@ -336,7 +60,9 @@ class TestRnnError(unittest.TestCase):
inputs_dynamic_rnn = paddle.transpose(
inputs_basic_lstm, perm=[1, 0, 2]
)
cell = LSTMCell(hidden_size, name="LSTMCell_for_rnn")
cell = paddle.nn.LSTMCell(
input_size, hidden_size, name="LSTMCell_for_rnn"
)
np_inputs_dynamic_rnn = np.random.random(
(seq_len, batch_size, input_size)
).astype("float32")
......@@ -362,7 +88,9 @@ class TestRnnError(unittest.TestCase):
self.assertRaises(TypeError, test_input_list)
def test_initial_states_type():
cell = GRUCell(hidden_size, name="GRUCell_for_rnn")
cell = paddle.nn.GRUCell(
input_size, hidden_size, name="GRUCell_for_rnn"
)
error_initial_states = np.random.random(
(batch_size, hidden_size)
).astype("float32")
......@@ -417,36 +145,9 @@ class TestRnn(unittest.TestCase):
self.seq_len = 4
def test_run(self):
inputs_basic_lstm = fluid.data(
name='inputs_basic_lstm',
shape=[None, None, self.input_size],
dtype='float32',
)
sequence_length = fluid.data(
name="sequence_length", shape=[None], dtype='int64'
)
inputs_dynamic_rnn = paddle.transpose(inputs_basic_lstm, perm=[1, 0, 2])
cell = LSTMCell(self.hidden_size, name="LSTMCell_for_rnn")
output, final_state = dynamic_rnn(
cell=cell,
inputs=inputs_dynamic_rnn,
sequence_length=sequence_length,
is_reverse=False,
)
output_new = paddle.transpose(output, perm=[1, 0, 2])
rnn_out, last_hidden, last_cell = basic_lstm(
inputs_basic_lstm,
None,
None,
self.hidden_size,
num_layers=1,
batch_first=False,
bidirectional=False,
sequence_length=sequence_length,
forget_bias=1.0,
)
numpy_cell = LSTMCell(self.input_size, self.hidden_size)
dynamic_cell = paddle.nn.LSTMCell(self.input_size, self.hidden_size)
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
......@@ -455,60 +156,68 @@ class TestRnn(unittest.TestCase):
exe = Executor(place)
exe.run(framework.default_startup_program())
inputs_basic_lstm_np = np.random.uniform(
-0.1, 0.1, (self.seq_len, self.batch_size, self.input_size)
).astype('float32')
state = numpy_cell.parameters
for k, v in dynamic_cell.named_parameters():
param = np.random.uniform(-0.1, 0.1, size=state[k].shape).astype(
'float64'
)
setattr(numpy_cell, k, param)
fluid.global_scope().find_var(v.name).get_tensor().set(param, place)
sequence_length = fluid.data(
name="sequence_length", shape=[None], dtype='int64'
)
inputs_rnn = fluid.data(
name='inputs_rnn',
shape=[None, None, self.input_size],
dtype='float64',
)
pre_hidden = fluid.data(
name='pre_hidden', shape=[None, self.hidden_size], dtype='float64'
)
pre_cell = fluid.data(
name='pre_cell', shape=[None, self.hidden_size], dtype='float64'
)
dynamic_output, dynamic_final_state = dynamic_rnn(
cell=dynamic_cell,
inputs=inputs_rnn,
sequence_length=sequence_length,
initial_states=(pre_hidden, pre_cell),
is_reverse=False,
)
inputs_rnn_np = np.random.uniform(
-0.1, 0.1, (self.batch_size, self.seq_len, self.input_size)
).astype('float64')
sequence_length_np = (
np.ones(self.batch_size, dtype='int64') * self.seq_len
)
inputs_np = np.random.uniform(
-0.1, 0.1, (self.batch_size, self.input_size)
).astype('float32')
pre_hidden_np = np.random.uniform(
-0.1, 0.1, (self.batch_size, self.hidden_size)
).astype('float32')
).astype('float64')
pre_cell_np = np.random.uniform(
-0.1, 0.1, (self.batch_size, self.hidden_size)
).astype('float32')
param_names = [
[
"LSTMCell_for_rnn/BasicLSTMUnit_0.w_0",
"basic_lstm_layers_0/BasicLSTMUnit_0.w_0",
],
[
"LSTMCell_for_rnn/BasicLSTMUnit_0.b_0",
"basic_lstm_layers_0/BasicLSTMUnit_0.b_0",
],
]
for names in param_names:
param = np.array(
fluid.global_scope().find_var(names[0]).get_tensor()
)
param = np.random.uniform(-0.1, 0.1, size=param.shape).astype(
'float32'
)
fluid.global_scope().find_var(names[0]).get_tensor().set(
param, place
)
fluid.global_scope().find_var(names[1]).get_tensor().set(
param, place
)
).astype('float64')
out = exe.run(
o1, _ = numpy_rnn(
cell=numpy_cell,
inputs=inputs_rnn_np,
initial_states=(pre_hidden_np, pre_cell_np),
sequence_length=sequence_length_np,
is_reverse=False,
)
o2 = exe.run(
feed={
'inputs_basic_lstm': inputs_basic_lstm_np,
'inputs_rnn': inputs_rnn_np,
'sequence_length': sequence_length_np,
'inputs': inputs_np,
'pre_hidden': pre_hidden_np,
'pre_cell': pre_cell_np,
},
fetch_list=[output_new, rnn_out],
)
np.testing.assert_allclose(out[0], out[1], rtol=0.0001)
fetch_list=[dynamic_output],
)[0]
np.testing.assert_allclose(o1, o2, rtol=0.001)
class TestRnnUtil(unittest.TestCase):
......@@ -528,218 +237,5 @@ class TestRnnUtil(unittest.TestCase):
pass
class EncoderCell(RNNCell):
"""Encoder Cell"""
def __init__(
self,
num_layers,
hidden_size,
dropout_prob=0.0,
init_scale=0.1,
):
self.num_layers = num_layers
self.hidden_size = hidden_size
self.dropout_prob = dropout_prob
self.lstm_cells = []
for i in range(num_layers):
self.lstm_cells.append(LSTMCell(hidden_size))
def call(self, step_input, states):
new_states = []
for i in range(self.num_layers):
out, new_state = self.lstm_cells[i](step_input, states[i])
step_input = (
layers.dropout(
out,
self.dropout_prob,
)
if self.dropout_prob
else out
)
new_states.append(new_state)
return step_input, new_states
@property
def state_shape(self):
return [cell.state_shape for cell in self.lstm_cells]
class DecoderCell(RNNCell):
"""Decoder Cell"""
def __init__(self, num_layers, hidden_size, dropout_prob=0.0):
self.num_layers = num_layers
self.hidden_size = hidden_size
self.dropout_prob = dropout_prob
self.lstm_cells = []
for i in range(num_layers):
self.lstm_cells.append(LSTMCell(hidden_size))
def call(self, step_input, states):
new_lstm_states = []
for i in range(self.num_layers):
out, new_lstm_state = self.lstm_cells[i](step_input, states[i])
step_input = (
layers.dropout(
out,
self.dropout_prob,
)
if self.dropout_prob
else out
)
new_lstm_states.append(new_lstm_state)
return step_input, new_lstm_states
def def_seq2seq_model(
num_layers, hidden_size, dropout_prob, src_vocab_size, trg_vocab_size
):
"vanilla seq2seq model"
# data
source = fluid.data(name="src", shape=[None, None], dtype="int64")
source_length = fluid.data(
name="src_sequence_length", shape=[None], dtype="int64"
)
target = fluid.data(name="trg", shape=[None, None], dtype="int64")
target_length = fluid.data(
name="trg_sequence_length", shape=[None], dtype="int64"
)
label = fluid.data(name="label", shape=[None, None, 1], dtype="int64")
# embedding
src_emb = fluid.embedding(source, (src_vocab_size, hidden_size))
tar_emb = fluid.embedding(target, (src_vocab_size, hidden_size))
# encoder
enc_cell = EncoderCell(num_layers, hidden_size, dropout_prob)
enc_output, enc_final_state = dynamic_rnn(
cell=enc_cell, inputs=src_emb, sequence_length=source_length
)
# decoder
dec_cell = DecoderCell(num_layers, hidden_size, dropout_prob)
dec_output, dec_final_state = dynamic_rnn(
cell=dec_cell, inputs=tar_emb, initial_states=enc_final_state
)
logits = layers.fc(
dec_output,
size=trg_vocab_size,
num_flatten_dims=len(dec_output.shape) - 1,
bias_attr=False,
)
# loss
loss = paddle.nn.functional.softmax_with_cross_entropy(
logits=logits, label=label, soft_label=False
)
loss = layers.unsqueeze(loss, axes=[2])
max_tar_seq_len = paddle.shape(target)[1]
tar_mask = layers.sequence_mask(
target_length, maxlen=max_tar_seq_len, dtype="float32"
)
loss = loss * tar_mask
loss = paddle.mean(loss, axis=[0])
loss = paddle.sum(loss)
# optimizer
optimizer = fluid.optimizer.Adam(0.001)
optimizer.minimize(loss)
return loss
class TestSeq2SeqModel(unittest.TestCase):
"""
Test cases to confirm seq2seq api training correctly.
"""
def setUp(self):
np.random.seed(123)
self.model_hparams = {
"num_layers": 2,
"hidden_size": 128,
"dropout_prob": 0.1,
"src_vocab_size": 100,
"trg_vocab_size": 100,
}
self.iter_num = iter_num = 2
self.batch_size = batch_size = 4
src_seq_len = 10
trg_seq_len = 12
self.data = {
"src": np.random.randint(
2,
self.model_hparams["src_vocab_size"],
(iter_num * batch_size, src_seq_len),
).astype("int64"),
"src_sequence_length": np.random.randint(
1, src_seq_len, (iter_num * batch_size,)
).astype("int64"),
"trg": np.random.randint(
2,
self.model_hparams["src_vocab_size"],
(iter_num * batch_size, trg_seq_len),
).astype("int64"),
"trg_sequence_length": np.random.randint(
1, trg_seq_len, (iter_num * batch_size,)
).astype("int64"),
"label": np.random.randint(
2,
self.model_hparams["src_vocab_size"],
(iter_num * batch_size, trg_seq_len, 1),
).astype("int64"),
}
place = (
core.CUDAPlace(0)
if core.is_compiled_with_cuda()
else core.CPUPlace()
)
self.exe = Executor(place)
def test_seq2seq_model(self):
main_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(main_program, startup_program):
cost = def_seq2seq_model(**self.model_hparams)
self.exe.run(startup_program)
for iter_idx in range(self.iter_num):
cost_val = self.exe.run(
feed={
"src": self.data["src"][
iter_idx
* self.batch_size : (iter_idx + 1)
* self.batch_size,
:,
],
"src_sequence_length": self.data["src_sequence_length"][
iter_idx
* self.batch_size : (iter_idx + 1)
* self.batch_size
],
"trg": self.data["trg"][
iter_idx
* self.batch_size : (iter_idx + 1)
* self.batch_size,
:,
],
"trg_sequence_length": self.data["trg_sequence_length"][
iter_idx
* self.batch_size : (iter_idx + 1)
* self.batch_size
],
"label": self.data["label"][
iter_idx
* self.batch_size : (iter_idx + 1)
* self.batch_size
],
},
fetch_list=[cost],
)[0]
print("iter_idx: %d, cost: %f" % (iter_idx, cost_val))
if __name__ == '__main__':
unittest.main()
......@@ -19,12 +19,10 @@ import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.layers as layers
import paddle.nn as nn
from paddle import Model, set_device
from paddle.fluid.dygraph import Layer
from paddle.fluid.executor import Executor
from paddle.fluid.framework import _test_eager_guard
from paddle.nn import BeamSearchDecoder, dynamic_decode
from paddle.static import InputSpec as Input
......@@ -32,257 +30,6 @@ from paddle.static import InputSpec as Input
paddle.enable_static()
class EncoderCell(layers.RNNCell):
def __init__(self, num_layers, hidden_size, dropout_prob=0.0):
self.num_layers = num_layers
self.hidden_size = hidden_size
self.dropout_prob = dropout_prob
self.lstm_cells = [
layers.LSTMCell(hidden_size) for i in range(num_layers)
]
def call(self, step_input, states):
new_states = []
for i in range(self.num_layers):
out, new_state = self.lstm_cells[i](step_input, states[i])
step_input = (
layers.dropout(out, self.dropout_prob)
if self.dropout_prob > 0
else out
)
new_states.append(new_state)
return step_input, new_states
@property
def state_shape(self):
return [cell.state_shape for cell in self.lstm_cells]
class DecoderCell(layers.RNNCell):
def __init__(self, num_layers, hidden_size, dropout_prob=0.0):
self.num_layers = num_layers
self.hidden_size = hidden_size
self.dropout_prob = dropout_prob
self.lstm_cells = [
layers.LSTMCell(hidden_size) for i in range(num_layers)
]
def attention(self, hidden, encoder_output, encoder_padding_mask):
query = layers.fc(
hidden, size=encoder_output.shape[-1], bias_attr=False
)
attn_scores = paddle.matmul(
layers.unsqueeze(query, [1]), encoder_output, transpose_y=True
)
if encoder_padding_mask is not None:
attn_scores = paddle.add(attn_scores, encoder_padding_mask)
attn_scores = paddle.nn.functional.softmax(attn_scores)
attn_out = paddle.squeeze(
paddle.matmul(attn_scores, encoder_output), [1]
)
attn_out = layers.concat([attn_out, hidden], 1)
attn_out = layers.fc(attn_out, size=self.hidden_size, bias_attr=False)
return attn_out
def call(
self, step_input, states, encoder_output, encoder_padding_mask=None
):
lstm_states, input_feed = states
new_lstm_states = []
step_input = layers.concat([step_input, input_feed], 1)
for i in range(self.num_layers):
out, new_lstm_state = self.lstm_cells[i](step_input, lstm_states[i])
step_input = (
layers.dropout(out, self.dropout_prob)
if self.dropout_prob > 0
else out
)
new_lstm_states.append(new_lstm_state)
out = self.attention(step_input, encoder_output, encoder_padding_mask)
return out, [new_lstm_states, out]
class Encoder:
def __init__(self, num_layers, hidden_size, dropout_prob=0.0):
self.encoder_cell = EncoderCell(num_layers, hidden_size, dropout_prob)
def __call__(self, src_emb, src_sequence_length):
encoder_output, encoder_final_state = layers.rnn(
cell=self.encoder_cell,
inputs=src_emb,
sequence_length=src_sequence_length,
is_reverse=False,
)
return encoder_output, encoder_final_state
class Decoder:
def __init__(
self,
num_layers,
hidden_size,
dropout_prob,
decoding_strategy="infer_sample",
max_decoding_length=20,
):
self.decoder_cell = DecoderCell(num_layers, hidden_size, dropout_prob)
self.decoding_strategy = decoding_strategy
self.max_decoding_length = (
None
if (self.decoding_strategy == "train_greedy")
else max_decoding_length
)
def __call__(
self,
decoder_initial_states,
encoder_output,
encoder_padding_mask,
**kwargs
):
output_layer = kwargs.pop("output_layer", None)
beam_size = kwargs.get("beam_size", 4)
encoder_output = BeamSearchDecoder.tile_beam_merge_with_batch(
encoder_output, beam_size
)
encoder_padding_mask = BeamSearchDecoder.tile_beam_merge_with_batch(
encoder_padding_mask, beam_size
)
decoder = BeamSearchDecoder(
cell=self.decoder_cell, output_fn=output_layer, **kwargs
)
(
decoder_output,
decoder_final_state,
dec_seq_lengths,
) = layers.dynamic_decode(
decoder,
inits=decoder_initial_states,
max_step_num=self.max_decoding_length,
encoder_output=encoder_output,
encoder_padding_mask=encoder_padding_mask,
impute_finished=False # for test coverage
if self.decoding_strategy == "beam_search"
else True,
is_test=True if self.decoding_strategy == "beam_search" else False,
return_length=True,
)
return decoder_output, decoder_final_state, dec_seq_lengths
class Seq2SeqModel:
"""Seq2Seq model: RNN encoder-decoder with attention"""
def __init__(
self,
num_layers,
hidden_size,
dropout_prob,
src_vocab_size,
trg_vocab_size,
start_token,
end_token,
decoding_strategy="infer_sample",
max_decoding_length=20,
beam_size=4,
):
self.start_token, self.end_token = start_token, end_token
self.max_decoding_length, self.beam_size = (
max_decoding_length,
beam_size,
)
self.src_embeder = paddle.nn.Embedding(
src_vocab_size,
hidden_size,
weight_attr=fluid.ParamAttr(name="source_embedding"),
)
self.trg_embeder = paddle.nn.Embedding(
trg_vocab_size,
hidden_size,
weight_attr=fluid.ParamAttr(name="target_embedding"),
)
self.encoder = Encoder(num_layers, hidden_size, dropout_prob)
self.decoder = Decoder(
num_layers,
hidden_size,
dropout_prob,
decoding_strategy,
max_decoding_length,
)
self.output_layer = lambda x: layers.fc(
x,
size=trg_vocab_size,
num_flatten_dims=len(x.shape) - 1,
param_attr=fluid.ParamAttr(),
bias_attr=False,
)
def __call__(self, src, src_length, trg=None, trg_length=None):
# encoder
encoder_output, encoder_final_state = self.encoder(
self.src_embeder(src), src_length
)
decoder_initial_states = [
encoder_final_state,
self.decoder.decoder_cell.get_initial_states(
batch_ref=encoder_output, shape=[encoder_output.shape[-1]]
),
]
src_mask = layers.sequence_mask(
src_length, maxlen=paddle.shape(src)[1], dtype="float32"
)
encoder_padding_mask = (src_mask - 1.0) * 1e9
encoder_padding_mask = layers.unsqueeze(encoder_padding_mask, [1])
# decoder
decoder_kwargs = (
{
"inputs": self.trg_embeder(trg),
"sequence_length": trg_length,
}
if self.decoder.decoding_strategy == "train_greedy"
else (
{
"embedding_fn": self.trg_embeder,
"beam_size": self.beam_size,
"start_token": self.start_token,
"end_token": self.end_token,
}
if self.decoder.decoding_strategy == "beam_search"
else {
"embedding_fn": self.trg_embeder,
"start_tokens": layers.fill_constant_batch_size_like(
input=encoder_output,
shape=[-1],
dtype=src.dtype,
value=self.start_token,
),
"end_token": self.end_token,
}
)
)
decoder_kwargs["output_layer"] = self.output_layer
(decoder_output, decoder_final_state, dec_seq_lengths) = self.decoder(
decoder_initial_states,
encoder_output,
encoder_padding_mask,
**decoder_kwargs
)
if self.decoder.decoding_strategy == "beam_search": # for inference
return decoder_output
logits, samples, sample_length = (
decoder_output.cell_outputs,
decoder_output.sample_ids,
dec_seq_lengths,
)
probs = paddle.nn.functional.softmax(logits)
return probs, samples, sample_length
class PolicyGradient:
"""policy gradient"""
......@@ -477,91 +224,6 @@ class SeqPGAgent:
return results
class TestDynamicDecode(unittest.TestCase):
def setUp(self):
np.random.seed(123)
self.model_hparams = {
"num_layers": 2,
"hidden_size": 32,
"dropout_prob": 0.1,
"src_vocab_size": 100,
"trg_vocab_size": 100,
"start_token": 0,
"end_token": 1,
"decoding_strategy": "infer_greedy",
"max_decoding_length": 10,
}
self.iter_num = iter_num = 2
self.batch_size = batch_size = 4
src_seq_len = 10
trg_seq_len = 12
self.data = {
"src": np.random.randint(
2,
self.model_hparams["src_vocab_size"],
(iter_num * batch_size, src_seq_len),
).astype("int64"),
"src_sequence_length": np.random.randint(
1, src_seq_len, (iter_num * batch_size,)
).astype("int64"),
"trg": np.random.randint(
2,
self.model_hparams["src_vocab_size"],
(iter_num * batch_size, trg_seq_len),
).astype("int64"),
"trg_sequence_length": np.random.randint(
1, trg_seq_len, (iter_num * batch_size,)
).astype("int64"),
"label": np.random.randint(
2,
self.model_hparams["src_vocab_size"],
(iter_num * batch_size, trg_seq_len, 1),
).astype("int64"),
}
place = (
core.CUDAPlace(0)
if core.is_compiled_with_cuda()
else core.CPUPlace()
)
self.exe = Executor(place)
def test_beam_search_infer(self):
paddle.set_default_dtype("float32")
paddle.enable_static()
self.model_hparams["decoding_strategy"] = "beam_search"
main_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(main_program, startup_program):
source = fluid.data(name="src", shape=[None, None], dtype="int64")
source_length = fluid.data(
name="src_sequence_length", shape=[None], dtype="int64"
)
model = Seq2SeqModel(**self.model_hparams)
output = model(source, source_length)
self.exe.run(startup_program)
for iter_idx in range(self.iter_num):
trans_ids = self.exe.run(
program=main_program,
feed={
"src": self.data["src"][
iter_idx
* self.batch_size : (iter_idx + 1)
* self.batch_size,
:,
],
"src_sequence_length": self.data["src_sequence_length"][
iter_idx
* self.batch_size : (iter_idx + 1)
* self.batch_size
],
},
fetch_list=[output],
)[0]
class ModuleApiTest(unittest.TestCase):
@classmethod
def setUpClass(cls):
......
......@@ -14,26 +14,389 @@
import math
from collections.abc import Sequence
from functools import reduce
from functools import partial, reduce
import numpy as np
import paddle
from paddle import _C_ops, _legacy_C_ops, framework, in_dynamic_mode
from paddle.fluid.framework import in_dygraph_mode
from paddle.fluid.layers import utils
from paddle.fluid.data_feeder import check_type, check_variable_and_dtype
from paddle.fluid.framework import _non_static_mode, in_dygraph_mode
from paddle.fluid.layers import control_flow, sequence_lod, utils
from paddle.fluid.layers.utils import flatten, map_structure
from paddle.framework import core
from paddle.nn import Layer
from paddle.nn import functional as F
from paddle.nn import initializer as I
from paddle.static import default_startup_program, program_guard
from paddle.static import Variable, default_startup_program, program_guard
from .container import LayerList
__all__ = []
def rnn(
cell,
inputs,
initial_states=None,
sequence_length=None,
time_major=False,
is_reverse=False,
**kwargs
):
r"""
rnn creates a recurrent neural network specified by RNNCell `cell`,
which performs :code:`cell.call()` (for dygraph mode :code:`cell.forward`)
repeatedly until reaches to the maximum length of `inputs`.
Parameters:
cell(RNNCellBase): An instance of `RNNCellBase`.
inputs(Tensor): the input sequences.
If time_major is True, the shape is
`[time_steps, batch_size, input_size]`
else the shape is `[batch_size, time_steps, input_size]`.
initial_states(Tensor|tuple|list, optional): the initial state of the
rnn cell. Tensor or a possibly nested structure of tensors. If not
provided, `cell.get_initial_states` would be called to produce
the initial state. Defaults to None.
sequence_length (Tensor, optional): shape `[batch_size]`, dtype: int64
or int32. The valid lengths of input sequences. Defaults to None.
If `sequence_length` is not None, the inputs are treated as
padded sequences. In each input sequence, elements whose time step
index are not less than the valid length are treated as paddings.
time_major (bool, optional): Whether the first dimension of the input means the
time steps. Defaults to False.
is_reverse (bool, optional): Indicate whether to calculate in the reverse
order of input sequences. Defaults to False.
**kwargs: Additional keyword arguments to pass to `forward` of the cell.
Returns:
outputs (Tensor|list|tuple): the output sequence. Tensor or nested
structure of Tensors.
If `time_major` is True, the shape of each tensor in outpus is
`[time_steps, batch_size, hidden_size]`, else
`[batch_size, time_steps, hidden_size]`.
final_states (Tensor|list|tuple): final states. A (possibly nested structure of)
tensor[s], representing the final state for RNN. It has the same
structure of intial state. Each tensor in final states has the same
shape and dtype as the corresponding tensor in initial states.
Examples:
.. code-block:: python
import paddle
paddle.disable_static()
cell = paddle.nn.SimpleRNNCell(16, 32)
inputs = paddle.rand((4, 23, 16))
prev_h = paddle.randn((4, 32))
outputs, final_states = paddle.nn.layer.rnn(cell, inputs, prev_h)
"""
if _non_static_mode():
return _rnn_dynamic_graph(
cell,
inputs,
initial_states,
sequence_length,
time_major,
is_reverse,
**kwargs
)
else:
return _rnn_static_graph(
cell,
inputs,
initial_states,
sequence_length,
time_major,
is_reverse,
**kwargs
)
class ArrayWrapper:
def __init__(self, x):
self.array = [x]
def append(self, x):
self.array.append(x)
return self
def __getitem__(self, item):
return self.array.__getitem__(item)
def _maybe_copy(state, new_state, step_mask):
"""update rnn state or just pass the old state through"""
new_state = paddle.tensor.math._multiply_with_axis(
new_state, step_mask, axis=0
) + paddle.tensor.math._multiply_with_axis(state, (1 - step_mask), axis=0)
return new_state
def _transpose_batch_time(x):
perm = [1, 0] + list(range(2, len(x.shape)))
return paddle.transpose(x, perm)
def _rnn_dynamic_graph(
cell,
inputs,
initial_states=None,
sequence_length=None,
time_major=False,
is_reverse=False,
**kwargs
):
time_step_index = 0 if time_major else 1
flat_inputs = flatten(inputs)
time_steps = flat_inputs[0].shape[time_step_index]
if initial_states is None:
initial_states = cell.get_initial_states(
batch_ref=inputs, batch_dim_idx=1 if time_major else 0
)
if not time_major:
inputs = map_structure(_transpose_batch_time, inputs)
if sequence_length is not None:
mask = sequence_lod.sequence_mask(
sequence_length, maxlen=time_steps, dtype=inputs.dtype
)
mask = paddle.transpose(mask, [1, 0])
if is_reverse:
inputs = map_structure(lambda x: paddle.reverse(x, axis=[0]), inputs)
mask = (
paddle.reverse(mask, axis=[0])
if sequence_length is not None
else None
)
states = initial_states
outputs = []
for i in range(time_steps):
step_inputs = map_structure(lambda x: x[i], inputs)
step_outputs, new_states = cell(step_inputs, states, **kwargs)
if sequence_length is not None:
new_states = map_structure(
partial(_maybe_copy, step_mask=mask[i]), states, new_states
)
states = new_states
outputs = (
map_structure(lambda x: ArrayWrapper(x), step_outputs)
if i == 0
else map_structure(
lambda x, x_array: x_array.append(x), step_outputs, outputs
)
)
final_outputs = map_structure(
lambda x: paddle.stack(x.array, axis=time_step_index), outputs
)
if is_reverse:
final_outputs = map_structure(
lambda x: paddle.reverse(x, axis=time_step_index), final_outputs
)
final_states = new_states
return final_outputs, final_states
def _rnn_static_graph(
cell,
inputs,
initial_states=None,
sequence_length=None,
time_major=False,
is_reverse=False,
**kwargs
):
check_type(inputs, 'inputs', (Variable, list, tuple), 'rnn')
if isinstance(inputs, (list, tuple)):
for i, input_x in enumerate(inputs):
check_variable_and_dtype(
input_x, 'inputs[' + str(i) + ']', ['float32', 'float64'], 'rnn'
)
check_type(
initial_states,
'initial_states',
(Variable, list, tuple, type(None)),
'rnn',
)
check_type(
sequence_length, 'sequence_length', (Variable, type(None)), 'rnn'
)
def _switch_grad(x, stop=False):
x.stop_gradient = stop
return x
if initial_states is None:
initial_states = cell.get_initial_states(
batch_ref=inputs, batch_dim_idx=1 if time_major else 0
)
initial_states = map_structure(_switch_grad, initial_states)
if not time_major:
inputs = map_structure(_transpose_batch_time, inputs)
if sequence_length:
max_seq_len = paddle.shape(flatten(inputs)[0])[0]
mask = sequence_lod.sequence_mask(
sequence_length,
maxlen=max_seq_len,
dtype=flatten(initial_states)[0].dtype,
)
mask = paddle.transpose(mask, [1, 0])
if is_reverse:
inputs = map_structure(lambda x: paddle.reverse(x, axis=[0]), inputs)
mask = paddle.reverse(mask, axis=[0]) if sequence_length else None
# StaticRNN
rnn = control_flow.StaticRNN()
with rnn.step():
inputs = map_structure(rnn.step_input, inputs)
states = map_structure(rnn.memory, initial_states)
copy_states = map_structure(lambda x: x, states)
outputs, new_states = cell(inputs, copy_states, **kwargs)
utils.assert_same_structure(states, new_states)
if sequence_length:
step_mask = rnn.step_input(mask)
new_states = map_structure(
partial(_maybe_copy, step_mask=step_mask), states, new_states
)
map_structure(rnn.update_memory, states, new_states)
flat_outputs = flatten(outputs)
map_structure(rnn.step_output, outputs)
map_structure(rnn.step_output, new_states)
rnn_out = rnn()
final_outputs = rnn_out[: len(flat_outputs)]
final_outputs = utils.pack_sequence_as(outputs, final_outputs)
final_states = map_structure(lambda x: x[-1], rnn_out[len(flat_outputs) :])
final_states = utils.pack_sequence_as(new_states, final_states)
if is_reverse:
final_outputs = map_structure(
lambda x: paddle.reverse(x, axis=[0]), final_outputs
)
if not time_major:
final_outputs = map_structure(_transpose_batch_time, final_outputs)
return (final_outputs, final_states)
def birnn(
cell_fw,
cell_bw,
inputs,
initial_states=None,
sequence_length=None,
time_major=False,
**kwargs
):
r"""
birnn creates a bidirectional recurrent neural network specified by
RNNCell `cell_fw` and `cell_bw`, which performs :code:`cell.call()`
(for dygraph mode :code:`cell.forward`) repeatedly until reaches to
the maximum length of `inputs` and then concat the outputs for both RNNs
along the last axis.
Parameters:
cell_fw(RNNCellBase): An instance of `RNNCellBase`.
cell_bw(RNNCellBase): An instance of `RNNCellBase`.
inputs(Tensor): the input sequences.
If time_major is True, the shape is
`[time_steps, batch_size, input_size]`
else the shape is `[batch_size, time_steps, input_size]`.
initial_states(tuple, optional): A tuple of initial states of
`cell_fw` and `cell_bw`.
If not provided, `cell.get_initial_states` would be called to
produce initial state for each cell. Defaults to None.
sequence_length (Tensor, optional): shape `[batch_size]`, dtype: int64
or int32. The valid lengths of input sequences. Defaults to None.
If `sequence_length` is not None, the inputs are treated as
padded sequences. In each input sequence, elements whose time step
index are not less than the valid length are treated as paddings.
time_major (bool): Whether the first dimension of the input means the
time steps. Defaults to False.
**kwargs: Additional keyword arguments to pass to `forward` of each cell.
Returns:
outputs (Tensor): the outputs of the bidirectional RNN. It is the
concatenation of the outputs from the forward RNN and backward
RNN along the last axis.
If time major is True, the shape is `[time_steps, batch_size, size]`,
else the shape is `[batch_size, time_steps, size]`, where size is
`cell_fw.hidden_size + cell_bw.hidden_size`.
final_states (tuple): A tuple of the final states of the forward
cell and backward cell.
Examples:
.. code-block:: python
import paddle
paddle.disable_static()
cell_fw = paddle.nn.LSTMCell(16, 32)
cell_bw = paddle.nn.LSTMCell(16, 32)
inputs = paddle.rand((4, 23, 16))
hf, cf = paddle.rand((4, 32)), paddle.rand((4, 32))
hb, cb = paddle.rand((4, 32)), paddle.rand((4, 32))
initial_states = ((hf, cf), (hb, cb))
outputs, final_states = paddle.nn.layer.birnn(
cell_fw, cell_bw, inputs, initial_states)
"""
if initial_states is None:
states_fw = cell_fw.get_initial_states(
batch_ref=inputs, batch_dim_idx=1 if time_major else 0
)
states_bw = cell_fw.get_initial_states(
batch_ref=inputs, batch_dim_idx=1 if time_major else 0
)
else:
states_fw, states_bw = initial_states
outputs_fw, states_fw = rnn(
cell_fw,
inputs,
states_fw,
sequence_length,
time_major=time_major,
**kwargs
)
outputs_bw, states_bw = rnn(
cell_bw,
inputs,
states_bw,
sequence_length,
time_major=time_major,
is_reverse=True,
**kwargs
)
outputs = map_structure(
lambda x, y: paddle.concat([x, y], -1), outputs_fw, outputs_bw
)
final_states = (states_fw, states_bw)
return outputs, final_states
def split_states(states, bidirectional=False, state_components=1):
r"""
Split states of RNN network into possibly nested list or tuple of
......@@ -779,7 +1142,7 @@ class RNN(Layer):
def forward(
self, inputs, initial_states=None, sequence_length=None, **kwargs
):
final_outputs, final_states = paddle.fluid.layers.rnn(
final_outputs, final_states = rnn(
self.cell,
inputs,
initial_states=initial_states,
......@@ -866,7 +1229,7 @@ class BiRNN(Layer):
len(initial_states) == 2
), "length of initial_states should be 2 when it is a list/tuple"
outputs, final_states = paddle.fluid.layers.birnn(
outputs, final_states = birnn(
self.cell_fw,
self.cell_bw,
inputs,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册