Merge pull request #7157 from pkuyym/fix-7156

Doc fix and enhancement for lstm_unit python wrapper.

Merge pull request #7157 from pkuyym/fix-7156
Doc fix and enhancement for lstm_unit python wrapper.
89bbc4f6 · Yang yaming · GitHub · 19541468 · 60fecce4 · 89bbc4f6
显示空白变更内容
内联并排

Showing with 74 addition and 65 deletion

python/paddle/v2/fluid/layers/nn.py python/paddle/v2/fluid/layers/nn.py +72 -63

python/paddle/v2/fluid/tests/test_layers.py python/paddle/v2/fluid/tests/test_layers.py +2 -2

未找到文件。
--- a/python/paddle/v2/fluid/layers/nn.py
+++ b/python/paddle/v2/fluid/layers/nn.py
@@ -1168,25 +1168,26 @@ def lstm_unit(x_t,
        .. math::
-            i_t & = \sigma(W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i)
+            i_t & = \sigma(W_{x_i}x_{t} + W_{h_i}h_{t-1} + b_i)
-            f_t & = \sigma(W_{x_f}x_{t} + W_{h_f}h_{t-1} + W_{c_f}c_{t-1} + b_f)
+            f_t & = \sigma(W_{x_f}x_{t} + W_{h_f}h_{t-1} + b_f)
-            c_t & = f_tc_{t-1} + i_t tanh (W_{x_c}x_t+W_{h_c}h_{t-1} + b_c)
+            c_t & = f_tc_{t-1} + i_t tanh (W_{x_c}x_t + W_{h_c}h_{t-1} + b_c)
-            o_t & = \sigma(W_{x_o}x_{t} + W_{h_o}h_{t-1} + W_{c_o}c_t + b_o)
+            o_t & = \sigma(W_{x_o}x_{t} + W_{h_o}h_{t-1} + b_o)
            h_t & = o_t tanh(c_t)
-    The inputs of lstm unit includes :math:`x_t`, :math:`h_{t-1}` and
+    The inputs of lstm unit include :math:`x_t`, :math:`h_{t-1}` and
-    :math:`c_{t-1}`. The implementation separates the linear transformation
+    :math:`c_{t-1}`. The 2nd dimensions of :math:`h_{t-1}` and :math:`c_{t-1}`
-    and non-linear transformation apart. Here, we take :math:`i_t` as an
+    should be same. The implementation separates the linear transformation and
-    example. The linear transformation is applied by calling a `fc` layer and
+    non-linear transformation apart. Here, we take :math:`i_t` as an example.
-    the equation is:
+    The linear transformation is applied by calling a `fc` layer and the
+    equation is:
        .. math::
-            L_{i_t} = W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i
+            L_{i_t} = W_{x_i}x_{t} + W_{h_i}h_{t-1} + b_i
    The non-linear transformation is applied by calling `lstm_unit_op` and the
    equation is:
@@ -1198,9 +1199,12 @@ def lstm_unit(x_t,
    This layer has two outputs including :math:`h_t` and :math:`o_t`.
    Args:
-        x_t (Variable): The input value of current step.
+        x_t (Variable): The input value of current step, a 2-D tensor with shape
-        hidden_t_prev (Variable): The hidden value of lstm unit.
+            M x N, M for batch size and N for input size.
-        cell_t_prev (Variable): The cell value of lstm unit.
+        hidden_t_prev (Variable): The hidden value of lstm unit, a 2-D tensor
+            with shape M x S, M for batch size and S for size of lstm unit.
+        cell_t_prev (Variable): The cell value of lstm unit, a 2-D tensor with
+            shape M x S, M for batch size and S for size of lstm unit.
        forget_bias (float): The forget bias of lstm unit.
        param_attr (ParamAttr): The attributes of parameter weights, used to set
            initializer, name etc.
@@ -1213,14 +1217,15 @@ def lstm_unit(x_t,
    Raises:
        ValueError: The ranks of **x_t**, **hidden_t_prev** and **cell_t_prev**\
                not be 2 or the 1st dimensions of **x_t**, **hidden_t_prev** \
-                and **cell_t_prev** not be the same.
+                and **cell_t_prev** not be the same or the 2nd dimensions of \
+                **hidden_t_prev** and **cell_t_prev** not be the same.
    Examples:
        .. code-block:: python
             x_t = fluid.layers.fc(input=x_t_data, size=10)
-             prev_hidden = fluid.layers.fc(input=prev_hidden_data, size=20)
+             prev_hidden = fluid.layers.fc(input=prev_hidden_data, size=30)
             prev_cell = fluid.layers.fc(input=prev_cell_data, size=30)
             hidden_value, cell_value = fluid.layers.lstm_unit(x_t=x_t,
                                                    hidden_t_prev=prev_hidden,
@@ -1239,7 +1244,11 @@ def lstm_unit(x_t,
    if x_t.shape[0] != hidden_t_prev.shape[0] or x_t.shape[
            0] != cell_t_prev.shape[0]:
-        raise ValueError("The 1s dimension of x_t, hidden_t_prev and "
+        raise ValueError("The 1st dimensions of x_t, hidden_t_prev and "
+                         "cell_t_prev must be the same.")
+    if hidden_t_prev.shape[1] != cell_t_prev.shape[1]:
+        raise ValueError("The 2nd dimensions of hidden_t_prev and "
                         "cell_t_prev must be the same.")
    if bias_attr is None:

--- a/python/paddle/v2/fluid/tests/test_layers.py
+++ b/python/paddle/v2/fluid/tests/test_layers.py
@@ -177,8 +177,8 @@ class TestBook(unittest.TestCase):
                name='x_t_data', shape=[10, 10], dtype='float32')
            x_t = layers.fc(input=x_t_data, size=10)
            prev_hidden_data = layers.data(
-                name='prev_hidden_data', shape=[10, 20], dtype='float32')
+                name='prev_hidden_data', shape=[10, 30], dtype='float32')
-            prev_hidden = layers.fc(input=prev_hidden_data, size=20)
+            prev_hidden = layers.fc(input=prev_hidden_data, size=30)
            prev_cell_data = layers.data(
                name='prev_cell', shape=[10, 30], dtype='float32')
            prev_cell = layers.fc(input=prev_cell_data, size=30)