未验证 提交 6f791260 编写于 作者: Y Yang yaming 提交者: GitHub

Merge pull request #6669 from pkuyym/fix-6581

Add python wrapper for lstm unit op.
...@@ -188,12 +188,6 @@ beam_search_decode ...@@ -188,12 +188,6 @@ beam_search_decode
:noindex: :noindex:
lstm
---------
.. autofunction:: paddle.v2.fluid.layers.lstm
:noindex:
lod_rank_table lod_rank_table
--------- ---------
.. autofunction:: paddle.v2.fluid.layers.lod_rank_table .. autofunction:: paddle.v2.fluid.layers.lod_rank_table
...@@ -300,7 +294,14 @@ conv2d_transpose ...@@ -300,7 +294,14 @@ conv2d_transpose
.. autofunction:: paddle.v2.fluid.layers.conv2d_transpose .. autofunction:: paddle.v2.fluid.layers.conv2d_transpose
:noindex: :noindex:
sequence_expand sequence_expand
--------- ---------
.. autofunction:: paddle.v2.fluid.layers.sequence_expand .. autofunction:: paddle.v2.fluid.layers.sequence_expand
:noindex: :noindex:
lstm_unit
---------
.. autofunction:: paddle.v2.fluid.layers.lstm_unit
:noindex:
...@@ -51,7 +51,10 @@ class LstmUnitOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -51,7 +51,10 @@ class LstmUnitOpMaker : public framework::OpProtoAndCheckerMaker {
LstmUnitOpMaker(framework::OpProto* proto, LstmUnitOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "FC input before the non-linear activation."); AddInput("X",
"Lstm unit only applies non-linear activations, please make sure"
"that linear tranformation has already been applied to `X`. "
"Linear tranformation can be applied by adding a `fc` layer");
AddInput( AddInput(
"C_prev", "C_prev",
"The cell state tensor of last time-step in the Lstm Unit operator."); "The cell state tensor of last time-step in the Lstm Unit operator.");
......
...@@ -5,12 +5,15 @@ All layers just related to the neural network. ...@@ -5,12 +5,15 @@ All layers just related to the neural network.
from ..layer_helper import LayerHelper from ..layer_helper import LayerHelper
from ..initializer import Normal, Constant from ..initializer import Normal, Constant
from ..framework import Variable from ..framework import Variable
from ..param_attr import ParamAttr
from tensor import concat
__all__ = [ __all__ = [
'fc', 'embedding', 'dynamic_lstm', 'gru_unit', 'linear_chain_crf', 'fc', 'embedding', 'dynamic_lstm', 'gru_unit', 'linear_chain_crf',
'crf_decoding', 'cos_sim', 'cross_entropy', 'square_error_cost', 'accuracy', 'crf_decoding', 'cos_sim', 'cross_entropy', 'square_error_cost', 'accuracy',
'chunk_eval', 'sequence_conv', 'conv2d', 'sequence_pool', 'pool2d', 'chunk_eval', 'sequence_conv', 'conv2d', 'sequence_pool', 'pool2d',
'batch_norm', 'beam_search_decode', 'conv2d_transpose', 'sequence_expand' 'batch_norm', 'beam_search_decode', 'conv2d_transpose', 'sequence_expand',
'lstm_unit'
] ]
...@@ -761,7 +764,7 @@ def conv2d_transpose(input, ...@@ -761,7 +764,7 @@ def conv2d_transpose(input,
return out return out
def sequence_expand(x, y, main_program=None, startup_program=None): def sequence_expand(x, y):
"""Sequence Expand Layer. This layer will expand the input variable **x** """Sequence Expand Layer. This layer will expand the input variable **x**
according to LoD information of **y**. And the following examples will according to LoD information of **y**. And the following examples will
explain how sequence_expand works: explain how sequence_expand works:
...@@ -805,8 +808,6 @@ def sequence_expand(x, y, main_program=None, startup_program=None): ...@@ -805,8 +808,6 @@ def sequence_expand(x, y, main_program=None, startup_program=None):
Args: Args:
x (Variable): The input variable which is a Tensor or LoDTensor. x (Variable): The input variable which is a Tensor or LoDTensor.
y (Variable): The input variable which is a LoDTensor. y (Variable): The input variable which is a LoDTensor.
main_program (Program): The main program.
startup_program (Program): The startup program.
Returns: Returns:
Variable: The expanded variable which is a LoDTensor. Variable: The expanded variable which is a LoDTensor.
...@@ -826,3 +827,111 @@ def sequence_expand(x, y, main_program=None, startup_program=None): ...@@ -826,3 +827,111 @@ def sequence_expand(x, y, main_program=None, startup_program=None):
type='sequence_expand', inputs={'X': x, type='sequence_expand', inputs={'X': x,
'Y': y}, outputs={'Out': tmp}) 'Y': y}, outputs={'Out': tmp})
return tmp return tmp
def lstm_unit(x_t,
hidden_t_prev,
cell_t_prev,
forget_bias=0.0,
param_attr=None,
bias_attr=None):
"""Lstm unit layer. The equation of a lstm step is:
.. math::
i_t & = \sigma(W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i)
f_t & = \sigma(W_{x_f}x_{t} + W_{h_f}h_{t-1} + W_{c_f}c_{t-1} + b_f)
c_t & = f_tc_{t-1} + i_t tanh (W_{x_c}x_t+W_{h_c}h_{t-1} + b_c)
o_t & = \sigma(W_{x_o}x_{t} + W_{h_o}h_{t-1} + W_{c_o}c_t + b_o)
h_t & = o_t tanh(c_t)
The inputs of lstm unit includes :math:`x_t`, :math:`h_{t-1}` and
:math:`c_{t-1}`. The implementation separates the linear transformation
and non-linear transformation apart. Here, we take :math:`i_t` as an
example. The linear transformation is applied by calling a `fc` layer and
the equation is:
.. math::
L_{i_t} = W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i
The non-linear transformation is applied by calling `lstm_unit_op` and the
equation is:
.. math::
i_t = \sigma(L_{i_t})
This layer has two outputs including :math:`h_t` and :math:`o_t`.
Args:
x_t (Variable): The input value of current step.
hidden_t_prev (Variable): The hidden value of lstm unit.
cell_t_prev (Variable): The cell value of lstm unit.
forget_bias (float): The forget bias of lstm unit.
param_attr (ParamAttr): The attributes of parameter weights, used to set
initializer, name etc.
bias_attr (ParamAttr): The attributes of bias weights, if not False,
bias weights will be created and be set to default value.
Returns:
tuple: The hidden value and cell value of lstm unit.
Raises:
ValueError: The ranks of **x_t**, **hidden_t_prev** and **cell_t_prev**\
not be 2 or the 1st dimensions of **x_t**, **hidden_t_prev** \
and **cell_t_prev** not be the same.
Examples:
.. code-block:: python
x_t = fluid.layers.fc(input=x_t_data, size=10)
prev_hidden = fluid.layers.fc(input=prev_hidden_data, size=20)
prev_cell = fluid.layers.fc(input=prev_cell_data, size=30)
hidden_value, cell_value = fluid.layers.lstm_unit(x_t=x_t,
hidden_t_prev=prev_hidden,
cell_t_prev=prev_cell)
"""
helper = LayerHelper('lstm_unit', **locals())
if len(x_t.shape) != 2:
raise ValueError("Rank of x_t must be 2.")
if len(hidden_t_prev.shape) != 2:
raise ValueError("Rank of hidden_t_prev must be 2.")
if len(cell_t_prev.shape) != 2:
raise ValueError("Rank of cell_t_prev must be 2.")
if x_t.shape[0] != hidden_t_prev.shape[0] or x_t.shape[
0] != cell_t_prev.shape[0]:
raise ValueError("The 1s dimension of x_t, hidden_t_prev and "
"cell_t_prev must be the same.")
if bias_attr is None:
bias_attr = ParamAttr()
size = cell_t_prev.shape[1]
concat_out = concat(input=[x_t, hidden_t_prev], axis=1)
fc_out = fc(input=concat_out,
size=4 * size,
param_attr=param_attr,
bias_attr=bias_attr)
dtype = x_t.dtype
c = helper.create_tmp_variable(dtype)
h = helper.create_tmp_variable(dtype)
helper.append_op(
type='lstm_unit',
inputs={"X": fc_out,
"C_prev": cell_t_prev},
outputs={"C": c,
"H": h},
attrs={"forget_bias": forget_bias})
return h, c
...@@ -161,7 +161,7 @@ class TestBook(unittest.TestCase): ...@@ -161,7 +161,7 @@ class TestBook(unittest.TestCase):
x=dat, label=lbl)) x=dat, label=lbl))
print(str(program)) print(str(program))
def test_seq_expand(self): def test_sequence_expand(self):
program = Program() program = Program()
with program_guard(program): with program_guard(program):
x = layers.data(name='x', shape=[10], dtype='float32') x = layers.data(name='x', shape=[10], dtype='float32')
...@@ -170,6 +170,23 @@ class TestBook(unittest.TestCase): ...@@ -170,6 +170,23 @@ class TestBook(unittest.TestCase):
self.assertIsNotNone(layers.sequence_expand(x=x, y=y)) self.assertIsNotNone(layers.sequence_expand(x=x, y=y))
print(str(program)) print(str(program))
def test_lstm_unit(self):
program = Program()
with program_guard(program):
x_t_data = layers.data(
name='x_t_data', shape=[10, 10], dtype='float32')
x_t = layers.fc(input=x_t_data, size=10)
prev_hidden_data = layers.data(
name='prev_hidden_data', shape=[10, 20], dtype='float32')
prev_hidden = layers.fc(input=prev_hidden_data, size=20)
prev_cell_data = layers.data(
name='prev_cell', shape=[10, 30], dtype='float32')
prev_cell = layers.fc(input=prev_cell_data, size=30)
self.assertIsNotNone(
layers.lstm_unit(
x_t=x_t, hidden_t_prev=prev_hidden, cell_t_prev=prev_cell))
print(str(program))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册