From 634faab1c06f9700297444789aa4336738a8100d Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Sun, 28 Jan 2018 00:53:15 -0800
Subject: [PATCH] Format doc & add unit test for dynamic_lstmp api

---
 doc/api/v2/fluid/layers.rst                 |  4 +-
 paddle/operators/CMakeLists.txt             |  1 +
 python/paddle/v2/fluid/layers/nn.py         | 58 ++++++++++++---------
 python/paddle/v2/fluid/tests/test_layers.py | 12 +++++
 4 files changed, 48 insertions(+), 27 deletions(-)

diff --git a/doc/api/v2/fluid/layers.rst b/doc/api/v2/fluid/layers.rst
index 6c6af106db7..231ec2d4ba1 100644
--- a/doc/api/v2/fluid/layers.rst
+++ b/doc/api/v2/fluid/layers.rst
@@ -19,11 +19,11 @@ dynamic_lstm
     :noindex:
 
 dynamic_lstmp
-------------
+-------------
 ..  autofunction:: paddle.v2.fluid.layers.dynamic_lstmp
     :noindex:
 
- dynamic_gru
+dynamic_gru
 -----------
 ..  autofunction:: paddle.v2.fluid.layers.dynamic_gru
     :noindex:
diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt
index 15f7cb6b560..48cf5816cce 100644
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@@ -147,6 +147,7 @@ op_library(max_sequence_len_op DEPS lod_rank_table)
 op_library(sequence_conv_op DEPS context_project)
 op_library(sequence_pool_op DEPS sequence_pooling)
 op_library(lstm_op DEPS sequence2batch lstm_compute)
+op_library(lstmp_op DEPS sequence2batch lstm_compute)
 op_library(gru_op DEPS sequence2batch gru_compute)
 op_library(recurrent_op DEPS executor)
 op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale math_function)
diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py
index fa03a642917..c23cd733f17 100644
--- a/python/paddle/v2/fluid/layers/nn.py
+++ b/python/paddle/v2/fluid/layers/nn.py
@@ -257,7 +257,8 @@ def dynamic_lstm(input,
                  gate_activation='sigmoid',
                  cell_activation='tanh',
                  candidate_activation='tanh',
-                 dtype='float32'):
+                 dtype='float32',
+                 name=None):
     """
     **Dynamic LSTM Layer**
 
@@ -309,25 +310,25 @@ def dynamic_lstm(input,
                          (T X 4D), where T is the total time steps in this
                          mini-batch, D is the hidden size.
         size(int): 4 * hidden size.
-        param_attr(ParamAttr): The parameter attribute for the learnable
+        param_attr(ParamAttr|None): The parameter attribute for the learnable
                                hidden-hidden weights.
 
-                               - The shape is (D x 4D), where D is the hidden
-                                 size.
                                - Weights = {:math:`W_{ch}, W_{ih}, \
                                                 W_{fh}, W_{oh}`}
-        bias_attr(ParamAttr): The bias attribute for the learnable bias
+                               - The shape is (D x 4D), where D is the hidden
+                                 size.
+        bias_attr(ParamAttr|None): The bias attribute for the learnable bias
                               weights, which contains two parts, input-hidden
                               bias weights and peephole connections weights if
                               setting `use_peepholes` to `True`.
 
                               1. `use_peepholes = False`
-                                - The shape is (1 x 4D).
                                 - Biases = {:math:`b_c, b_i, b_f, b_o`}.
+                                - The shape is (1 x 4D).
                               2. `use_peepholes = True`
-                                - The shape is (1 x 7D).
                                 - Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \
                                                  W_{fc}, W_{oc}`}.
+                                - The shape is (1 x 7D).
         use_peepholes(bool): Whether to enable diagonal/peephole connections,
                              default `True`.
         is_reverse(bool): Whether to compute reversed LSTM, default `False`.
@@ -340,6 +341,8 @@ def dynamic_lstm(input,
                               Choices = ["sigmoid", "tanh", "relu", "identity"],
                               default "tanh".
         dtype(str): Data type. Choices = ["float32", "float64"], default "float32".
+        name(str|None): A name for this layer(optional). If set None, the layer
+                        will be named automatically.
 
     Returns:
         tuple: The hidden state, and cell state of LSTM. The shape of both \
@@ -354,6 +357,7 @@ def dynamic_lstm(input,
             forward, _ = fluid.layers.dynamic_lstm(
                 input=forward_proj, size=hidden_dim * 4, use_peepholes=False)
     """
+
     helper = LayerHelper('lstm', **locals())
     size = size / 4
     weight = helper.create_parameter(
@@ -401,7 +405,8 @@ def dynamic_lstmp(input,
                   cell_activation='tanh',
                   candidate_activation='tanh',
                   proj_activation='tanh',
-                  dtype='float32'):
+                  dtype='float32',
+                  name=None):
     """
     **Dynamic LSTMP Layer**
 
@@ -416,19 +421,19 @@ def dynamic_lstmp(input,
 
     .. math::
 
-        i_t = \sigma(W_{ix}x_{t} + W_{ir}r_{t-1} + W_{ic}c_{t-1} + b_i) \\
+        i_t & = \sigma(W_{ix}x_{t} + W_{ir}r_{t-1} + W_{ic}c_{t-1} + b_i)
 
-        f_t = \sigma(W_{fx}x_{t} + W_{fr}r_{t-1} + W_{fc}c_{t-1} + b_f) \\
+        f_t & = \sigma(W_{fx}x_{t} + W_{fr}r_{t-1} + W_{fc}c_{t-1} + b_f)
 
-        \tilde{c_t} = act_g(W_{cx}x_t + W_{cr}r_{t-1} + b_c) \\
+        \\tilde{c_t} & = act_g(W_{cx}x_t + W_{cr}r_{t-1} + b_c)
 
-        o_t = \sigma(W_{ox}x_{t} + W_{or}r_{t-1} + W_{oc}c_t + b_o) \\
+        o_t & = \sigma(W_{ox}x_{t} + W_{or}r_{t-1} + W_{oc}c_t + b_o)
 
-        c_t = f_t \odot c_{t-1} + i_t \odot \tilde{c_t} \\
+        c_t & = f_t \odot c_{t-1} + i_t \odot \\tilde{c_t}
 
-        h_t = o_t \odot act_h(c_t) \\
+        h_t & = o_t \odot act_h(c_t)
 
-        r_t = \overline{act_h}(W_{rh}h_t)
+        r_t & = \overline{act_h}(W_{rh}h_t)
 
     where the :math:`W` terms denote weight matrices (e.g. :math:`W_{xi}` is 
     the matrix of weights from the input gate to the input), :math:`W_{ic}`, 
@@ -441,7 +446,7 @@ def dynamic_lstmp(input,
     vectors, respectively, all of which have the same size as the cell output 
     activation vector :math:`h`. Here :math:`h` is usually called the hidden 
     state and :math:`r` denotes its recurrent projection. And 
-    :math:`\tilde{c_t}` is also called the candidate hidden state, whose 
+    :math:`\\tilde{c_t}` is also called the candidate hidden state, whose 
     computation is based on the current input and previous hidden state.
 
     The :math:`\odot` is the element-wise product of the vectors. :math:`act_g` 
@@ -466,28 +471,28 @@ def dynamic_lstmp(input,
                          mini-batch, D is the hidden size.
         size(int): 4 * hidden size.
         proj_size(int): The size of projection output.
-        param_attr(ParamAttr): The parameter attribute for the learnable
+        param_attr(ParamAttr|None): The parameter attribute for the learnable
                                hidden-hidden weight and projection weight.
 
+                               - Hidden-hidden weight = {:math:`W_{ch}, W_{ih}, \
+                                                W_{fh}, W_{oh}`}.
                                - The shape of hidden-hidden weight is (P x 4D), 
                                  where P is the projection size and D the hidden 
                                  size.
-                               - The shape of projection weight is (D x P).
-                               - Hidden-hidden weight = {:math:`W_{ch}, W_{ih}, \
-                                                W_{fh}, W_{oh}`}.
                                - Projection weight = {:math:`W_{rh}`}.
-        bias_attr(ParamAttr): The bias attribute for the learnable bias
+                               - The shape of projection weight is (D x P).
+        bias_attr(ParamAttr|None): The bias attribute for the learnable bias
                               weights, which contains two parts, input-hidden
                               bias weights and peephole connections weights if
                               setting `use_peepholes` to `True`.
 
                               1. `use_peepholes = False`
-                                - The shape is (1 x 4D).
                                 - Biases = {:math:`b_c, b_i, b_f, b_o`}.
+                                - The shape is (1 x 4D).
                               2. `use_peepholes = True`
-                                - The shape is (1 x 7D).
                                 - Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \
                                                  W_{fc}, W_{oc}`}.
+                                - The shape is (1 x 7D).
         use_peepholes(bool): Whether to enable diagonal/peephole connections,
                              default `True`.
         is_reverse(bool): Whether to compute reversed LSTM, default `False`.
@@ -503,10 +508,12 @@ def dynamic_lstmp(input,
                               Choices = ["sigmoid", "tanh", "relu", "identity"],
                               default "tanh".
         dtype(str): Data type. Choices = ["float32", "float64"], default "float32".
+        name(str|None): A name for this layer(optional). If set None, the layer
+                        will be named automatically.
 
     Returns:
-        tuple: The projection of hidden state, and cell state of LSTMP. The 
-               shape of projection is (T x P), for the cell state which is 
+        tuple: The projection of hidden state, and cell state of LSTMP. The \
+               shape of projection is (T x P), for the cell state which is \
                (T x D), and both LoD is the same with the `input`.
 
     Examples:
@@ -519,6 +526,7 @@ def dynamic_lstmp(input,
             proj_out, _ = fluid.layers.dynamic_lstmp(input=fc_out, 
                 size=hidden_dim * 4, proj_size=proj_dim, use_peepholes=False)
     """
+
     helper = LayerHelper('lstmp', **locals())
     size = size / 4
     weight = helper.create_parameter(
diff --git a/python/paddle/v2/fluid/tests/test_layers.py b/python/paddle/v2/fluid/tests/test_layers.py
index 4e863625422..3f54e28defb 100644
--- a/python/paddle/v2/fluid/tests/test_layers.py
+++ b/python/paddle/v2/fluid/tests/test_layers.py
@@ -202,6 +202,18 @@ class TestBook(unittest.TestCase):
                     x_t=x_t, hidden_t_prev=prev_hidden, cell_t_prev=prev_cell))
         print(str(program))
 
+    def test_dynamic_lstmp(self):
+        program = Program()
+        with program_guard(program):
+            hidden_dim, proj_dim = 16, 8
+            seq_data = layers.data(
+                name='seq_data', shape=[10, 10], dtype='float32', lod_level=1)
+            fc_out = layers.fc(input=seq_data, size=4 * hidden_dim)
+            self.assertIsNotNone(
+                layers.dynamic_lstmp(
+                    input=fc_out, size=4 * hidden_dim, proj_size=proj_dim))
+        print(str(program))
+
     def test_sequence_softmax(self):
         program = Program()
         with program_guard(program):
-- 
GitLab