diff --git a/doc/api/v2/fluid/layers.rst b/doc/api/v2/fluid/layers.rst index 004ee2d8c85ce7661886179570e693d7d61bc6d8..a7c8670f66cc7f319e41155211ead2d89126117f 100644 --- a/doc/api/v2/fluid/layers.rst +++ b/doc/api/v2/fluid/layers.rst @@ -307,6 +307,12 @@ sequence_expand :noindex: +gru_unit +-------- +.. autofunction:: paddle.v2.fluid.layers.gru_unit + :noindex: + + lstm_unit --------- .. autofunction:: paddle.v2.fluid.layers.lstm_unit diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index 1c1c09dd28b7563cfb340370a495d7e7c66988cc..6883630ac6a28176258272996ed7d0e73652a40b 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -236,21 +236,47 @@ def gru_unit(input, activation='tanh', gate_activation='sigmoid'): """ - GRUUnit Operator implements partial calculations of the GRU unit as following: + GRU unit layer. The equation of a gru step is: - $$ - update \ gate: u_t = actGate(xu_t + W_u * h_{t-1} + b_u) \\ - reset \ gate: r_t = actGate(xr_t + W_r * h_{t-1} + b_r) \\ - output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, h_{t-1}) + b_c) \\ - output: h_t = dot((1 - u_t), h_{t-1}) + dot(u_t, {h}_t) - $$ + .. math:: + u_t & = actGate(xu_{t} + W_u h_{t-1} + b_u) + + r_t & = actGate(xr_{t} + W_r h_{t-1} + b_r) + + ch_t & = actNode(xc_t + W_c dot(r_t, h_{t-1}) + b_c) + + h_t & = dot((1-u_t), ch_{t-1}) + dot(u_t, h_t) - which is same as one time step of GRU Operator. + The inputs of gru unit includes :math:`z_t`, :math:`h_{t-1}`. In terms + of the equation above, the :math:`z_t` is split into 3 parts - + :math:`xu_t`, :math:`xr_t` and :math:`xc_t`. This means that in order to + implement a full GRU unit operator for an input, a fully + connected layer has to be applied, such that :math:`z_t = W_{fc}x_t`. + + This layer has three outputs :math:`h_t`, :math:`dot(r_t, h_{t - 1})` + and concatenation of :math:`u_t`, :math:`r_t` and :math:`ch_t`. + + Args: + input (Variable): The fc transformed input value of current step. + hidden (Variable): The hidden value of lstm unit from previous step. + size (integer): The input dimension value. + weight (ParamAttr): The weight parameters for gru unit. Default: None + bias (ParamAttr): The bias parameters for gru unit. Default: None + activation (string): The activation type for cell (actNode). Default: 'tanh' + gate_activation (string): The activation type for gates (actGate). Default: 'sigmoid' + + Returns: + tuple: The hidden value, reset-hidden value and gate values. + + Examples: + + .. code-block:: python - @note To implement the complete GRU unit, fully-connected operator must be - used before to feed xu, xr and xc as the Input of GRUUnit operator. + # assuming we have x_t_data and prev_hidden of size=10 + x_t = fluid.layers.fc(input=x_t_data, size=30) + hidden_val, r_h_val, gate_val = fluid.layers.gru_unit(input=x_t, + hidden = prev_hidden) - TODO(ChunweiYan) add more document here """ activation_dict = dict( identity=0,