提交 634faab1 编写于 作者: Y Yibing Liu

Format doc & add unit test for dynamic_lstmp api

上级 cc82ff0d
...@@ -19,11 +19,11 @@ dynamic_lstm ...@@ -19,11 +19,11 @@ dynamic_lstm
:noindex: :noindex:
dynamic_lstmp dynamic_lstmp
------------ -------------
.. autofunction:: paddle.v2.fluid.layers.dynamic_lstmp .. autofunction:: paddle.v2.fluid.layers.dynamic_lstmp
:noindex: :noindex:
dynamic_gru dynamic_gru
----------- -----------
.. autofunction:: paddle.v2.fluid.layers.dynamic_gru .. autofunction:: paddle.v2.fluid.layers.dynamic_gru
:noindex: :noindex:
......
...@@ -147,6 +147,7 @@ op_library(max_sequence_len_op DEPS lod_rank_table) ...@@ -147,6 +147,7 @@ op_library(max_sequence_len_op DEPS lod_rank_table)
op_library(sequence_conv_op DEPS context_project) op_library(sequence_conv_op DEPS context_project)
op_library(sequence_pool_op DEPS sequence_pooling) op_library(sequence_pool_op DEPS sequence_pooling)
op_library(lstm_op DEPS sequence2batch lstm_compute) op_library(lstm_op DEPS sequence2batch lstm_compute)
op_library(lstmp_op DEPS sequence2batch lstm_compute)
op_library(gru_op DEPS sequence2batch gru_compute) op_library(gru_op DEPS sequence2batch gru_compute)
op_library(recurrent_op DEPS executor) op_library(recurrent_op DEPS executor)
op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale math_function) op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale math_function)
......
...@@ -257,7 +257,8 @@ def dynamic_lstm(input, ...@@ -257,7 +257,8 @@ def dynamic_lstm(input,
gate_activation='sigmoid', gate_activation='sigmoid',
cell_activation='tanh', cell_activation='tanh',
candidate_activation='tanh', candidate_activation='tanh',
dtype='float32'): dtype='float32',
name=None):
""" """
**Dynamic LSTM Layer** **Dynamic LSTM Layer**
...@@ -309,25 +310,25 @@ def dynamic_lstm(input, ...@@ -309,25 +310,25 @@ def dynamic_lstm(input,
(T X 4D), where T is the total time steps in this (T X 4D), where T is the total time steps in this
mini-batch, D is the hidden size. mini-batch, D is the hidden size.
size(int): 4 * hidden size. size(int): 4 * hidden size.
param_attr(ParamAttr): The parameter attribute for the learnable param_attr(ParamAttr|None): The parameter attribute for the learnable
hidden-hidden weights. hidden-hidden weights.
- The shape is (D x 4D), where D is the hidden
size.
- Weights = {:math:`W_{ch}, W_{ih}, \ - Weights = {:math:`W_{ch}, W_{ih}, \
W_{fh}, W_{oh}`} W_{fh}, W_{oh}`}
bias_attr(ParamAttr): The bias attribute for the learnable bias - The shape is (D x 4D), where D is the hidden
size.
bias_attr(ParamAttr|None): The bias attribute for the learnable bias
weights, which contains two parts, input-hidden weights, which contains two parts, input-hidden
bias weights and peephole connections weights if bias weights and peephole connections weights if
setting `use_peepholes` to `True`. setting `use_peepholes` to `True`.
1. `use_peepholes = False` 1. `use_peepholes = False`
- The shape is (1 x 4D).
- Biases = {:math:`b_c, b_i, b_f, b_o`}. - Biases = {:math:`b_c, b_i, b_f, b_o`}.
- The shape is (1 x 4D).
2. `use_peepholes = True` 2. `use_peepholes = True`
- The shape is (1 x 7D).
- Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \ - Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \
W_{fc}, W_{oc}`}. W_{fc}, W_{oc}`}.
- The shape is (1 x 7D).
use_peepholes(bool): Whether to enable diagonal/peephole connections, use_peepholes(bool): Whether to enable diagonal/peephole connections,
default `True`. default `True`.
is_reverse(bool): Whether to compute reversed LSTM, default `False`. is_reverse(bool): Whether to compute reversed LSTM, default `False`.
...@@ -340,6 +341,8 @@ def dynamic_lstm(input, ...@@ -340,6 +341,8 @@ def dynamic_lstm(input,
Choices = ["sigmoid", "tanh", "relu", "identity"], Choices = ["sigmoid", "tanh", "relu", "identity"],
default "tanh". default "tanh".
dtype(str): Data type. Choices = ["float32", "float64"], default "float32". dtype(str): Data type. Choices = ["float32", "float64"], default "float32".
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns: Returns:
tuple: The hidden state, and cell state of LSTM. The shape of both \ tuple: The hidden state, and cell state of LSTM. The shape of both \
...@@ -354,6 +357,7 @@ def dynamic_lstm(input, ...@@ -354,6 +357,7 @@ def dynamic_lstm(input,
forward, _ = fluid.layers.dynamic_lstm( forward, _ = fluid.layers.dynamic_lstm(
input=forward_proj, size=hidden_dim * 4, use_peepholes=False) input=forward_proj, size=hidden_dim * 4, use_peepholes=False)
""" """
helper = LayerHelper('lstm', **locals()) helper = LayerHelper('lstm', **locals())
size = size / 4 size = size / 4
weight = helper.create_parameter( weight = helper.create_parameter(
...@@ -401,7 +405,8 @@ def dynamic_lstmp(input, ...@@ -401,7 +405,8 @@ def dynamic_lstmp(input,
cell_activation='tanh', cell_activation='tanh',
candidate_activation='tanh', candidate_activation='tanh',
proj_activation='tanh', proj_activation='tanh',
dtype='float32'): dtype='float32',
name=None):
""" """
**Dynamic LSTMP Layer** **Dynamic LSTMP Layer**
...@@ -416,19 +421,19 @@ def dynamic_lstmp(input, ...@@ -416,19 +421,19 @@ def dynamic_lstmp(input,
.. math:: .. math::
i_t = \sigma(W_{ix}x_{t} + W_{ir}r_{t-1} + W_{ic}c_{t-1} + b_i) \\ i_t & = \sigma(W_{ix}x_{t} + W_{ir}r_{t-1} + W_{ic}c_{t-1} + b_i)
f_t = \sigma(W_{fx}x_{t} + W_{fr}r_{t-1} + W_{fc}c_{t-1} + b_f) \\ f_t & = \sigma(W_{fx}x_{t} + W_{fr}r_{t-1} + W_{fc}c_{t-1} + b_f)
\tilde{c_t} = act_g(W_{cx}x_t + W_{cr}r_{t-1} + b_c) \\ \\tilde{c_t} & = act_g(W_{cx}x_t + W_{cr}r_{t-1} + b_c)
o_t = \sigma(W_{ox}x_{t} + W_{or}r_{t-1} + W_{oc}c_t + b_o) \\ o_t & = \sigma(W_{ox}x_{t} + W_{or}r_{t-1} + W_{oc}c_t + b_o)
c_t = f_t \odot c_{t-1} + i_t \odot \tilde{c_t} \\ c_t & = f_t \odot c_{t-1} + i_t \odot \\tilde{c_t}
h_t = o_t \odot act_h(c_t) \\ h_t & = o_t \odot act_h(c_t)
r_t = \overline{act_h}(W_{rh}h_t) r_t & = \overline{act_h}(W_{rh}h_t)
where the :math:`W` terms denote weight matrices (e.g. :math:`W_{xi}` is where the :math:`W` terms denote weight matrices (e.g. :math:`W_{xi}` is
the matrix of weights from the input gate to the input), :math:`W_{ic}`, the matrix of weights from the input gate to the input), :math:`W_{ic}`,
...@@ -441,7 +446,7 @@ def dynamic_lstmp(input, ...@@ -441,7 +446,7 @@ def dynamic_lstmp(input,
vectors, respectively, all of which have the same size as the cell output vectors, respectively, all of which have the same size as the cell output
activation vector :math:`h`. Here :math:`h` is usually called the hidden activation vector :math:`h`. Here :math:`h` is usually called the hidden
state and :math:`r` denotes its recurrent projection. And state and :math:`r` denotes its recurrent projection. And
:math:`\tilde{c_t}` is also called the candidate hidden state, whose :math:`\\tilde{c_t}` is also called the candidate hidden state, whose
computation is based on the current input and previous hidden state. computation is based on the current input and previous hidden state.
The :math:`\odot` is the element-wise product of the vectors. :math:`act_g` The :math:`\odot` is the element-wise product of the vectors. :math:`act_g`
...@@ -466,28 +471,28 @@ def dynamic_lstmp(input, ...@@ -466,28 +471,28 @@ def dynamic_lstmp(input,
mini-batch, D is the hidden size. mini-batch, D is the hidden size.
size(int): 4 * hidden size. size(int): 4 * hidden size.
proj_size(int): The size of projection output. proj_size(int): The size of projection output.
param_attr(ParamAttr): The parameter attribute for the learnable param_attr(ParamAttr|None): The parameter attribute for the learnable
hidden-hidden weight and projection weight. hidden-hidden weight and projection weight.
- Hidden-hidden weight = {:math:`W_{ch}, W_{ih}, \
W_{fh}, W_{oh}`}.
- The shape of hidden-hidden weight is (P x 4D), - The shape of hidden-hidden weight is (P x 4D),
where P is the projection size and D the hidden where P is the projection size and D the hidden
size. size.
- The shape of projection weight is (D x P).
- Hidden-hidden weight = {:math:`W_{ch}, W_{ih}, \
W_{fh}, W_{oh}`}.
- Projection weight = {:math:`W_{rh}`}. - Projection weight = {:math:`W_{rh}`}.
bias_attr(ParamAttr): The bias attribute for the learnable bias - The shape of projection weight is (D x P).
bias_attr(ParamAttr|None): The bias attribute for the learnable bias
weights, which contains two parts, input-hidden weights, which contains two parts, input-hidden
bias weights and peephole connections weights if bias weights and peephole connections weights if
setting `use_peepholes` to `True`. setting `use_peepholes` to `True`.
1. `use_peepholes = False` 1. `use_peepholes = False`
- The shape is (1 x 4D).
- Biases = {:math:`b_c, b_i, b_f, b_o`}. - Biases = {:math:`b_c, b_i, b_f, b_o`}.
- The shape is (1 x 4D).
2. `use_peepholes = True` 2. `use_peepholes = True`
- The shape is (1 x 7D).
- Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \ - Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \
W_{fc}, W_{oc}`}. W_{fc}, W_{oc}`}.
- The shape is (1 x 7D).
use_peepholes(bool): Whether to enable diagonal/peephole connections, use_peepholes(bool): Whether to enable diagonal/peephole connections,
default `True`. default `True`.
is_reverse(bool): Whether to compute reversed LSTM, default `False`. is_reverse(bool): Whether to compute reversed LSTM, default `False`.
...@@ -503,10 +508,12 @@ def dynamic_lstmp(input, ...@@ -503,10 +508,12 @@ def dynamic_lstmp(input,
Choices = ["sigmoid", "tanh", "relu", "identity"], Choices = ["sigmoid", "tanh", "relu", "identity"],
default "tanh". default "tanh".
dtype(str): Data type. Choices = ["float32", "float64"], default "float32". dtype(str): Data type. Choices = ["float32", "float64"], default "float32".
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns: Returns:
tuple: The projection of hidden state, and cell state of LSTMP. The tuple: The projection of hidden state, and cell state of LSTMP. The \
shape of projection is (T x P), for the cell state which is shape of projection is (T x P), for the cell state which is \
(T x D), and both LoD is the same with the `input`. (T x D), and both LoD is the same with the `input`.
Examples: Examples:
...@@ -519,6 +526,7 @@ def dynamic_lstmp(input, ...@@ -519,6 +526,7 @@ def dynamic_lstmp(input,
proj_out, _ = fluid.layers.dynamic_lstmp(input=fc_out, proj_out, _ = fluid.layers.dynamic_lstmp(input=fc_out,
size=hidden_dim * 4, proj_size=proj_dim, use_peepholes=False) size=hidden_dim * 4, proj_size=proj_dim, use_peepholes=False)
""" """
helper = LayerHelper('lstmp', **locals()) helper = LayerHelper('lstmp', **locals())
size = size / 4 size = size / 4
weight = helper.create_parameter( weight = helper.create_parameter(
......
...@@ -202,6 +202,18 @@ class TestBook(unittest.TestCase): ...@@ -202,6 +202,18 @@ class TestBook(unittest.TestCase):
x_t=x_t, hidden_t_prev=prev_hidden, cell_t_prev=prev_cell)) x_t=x_t, hidden_t_prev=prev_hidden, cell_t_prev=prev_cell))
print(str(program)) print(str(program))
def test_dynamic_lstmp(self):
program = Program()
with program_guard(program):
hidden_dim, proj_dim = 16, 8
seq_data = layers.data(
name='seq_data', shape=[10, 10], dtype='float32', lod_level=1)
fc_out = layers.fc(input=seq_data, size=4 * hidden_dim)
self.assertIsNotNone(
layers.dynamic_lstmp(
input=fc_out, size=4 * hidden_dim, proj_size=proj_dim))
print(str(program))
def test_sequence_softmax(self): def test_sequence_softmax(self):
program = Program() program = Program()
with program_guard(program): with program_guard(program):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册