change interface and api spec for dynamic_gru test=develop

e641ffe7 · Qiao Longfei · 4c7be265 · e641ffe7 · e641ffe7 · e641ffe7
Showing with 46 addition and 8 deletion

paddle/fluid/API.spec paddle/fluid/API.spec +1 -1

paddle/fluid/operators/gru_unit_op.cc paddle/fluid/operators/gru_unit_op.cc +4 -1

python/paddle/fluid/layers/nn.py python/paddle/fluid/layers/nn.py +41 -6

未找到文件。
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -68,7 +68,7 @@ paddle.fluid.layers.fc ArgSpec(args=['input', 'size', 'num_flatten_dims', 'param
 paddle.fluid.layers.embedding ArgSpec(args=['input', 'size', 'is_sparse', 'is_distributed', 'padding_idx', 'param_attr', 'dtype'], varargs=None, keywords=None, defaults=(False, False, None, None, 'float32'))
 paddle.fluid.layers.dynamic_lstm ArgSpec(args=['input', 'size', 'h_0', 'c_0', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'float32', None))
 paddle.fluid.layers.dynamic_lstmp ArgSpec(args=['input', 'size', 'proj_size', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'proj_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'tanh', 'float32', None))
-paddle.fluid.layers.dynamic_gru ArgSpec(args=['input', 'size', 'param_attr', 'bias_attr', 'is_reverse', 'gate_activation', 'candidate_activation', 'h_0'], varargs=None, keywords=None, defaults=(None, None, False, 'sigmoid', 'tanh', None))
+paddle.fluid.layers.dynamic_gru ArgSpec(args=['input', 'size', 'param_attr', 'bias_attr', 'is_reverse', 'gate_activation', 'candidate_activation', 'h_0', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, False, 'sigmoid', 'tanh', None, False))
 paddle.fluid.layers.gru_unit ArgSpec(args=['input', 'hidden', 'size', 'param_attr', 'bias_attr', 'activation', 'gate_activation', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, 'tanh', 'sigmoid', False))
 paddle.fluid.layers.linear_chain_crf ArgSpec(args=['input', 'label', 'param_attr'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.crf_decoding ArgSpec(args=['input', 'param_attr', 'label'], varargs=None, keywords=None, defaults=(None,))

--- a/paddle/fluid/operators/gru_unit_op.cc
+++ b/paddle/fluid/operators/gru_unit_op.cc
@@ -113,7 +113,10 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
        .InEnum({identity, sigmoid, tanh, relu});
    AddAttr<bool>("origin_mode",
                  "bool"
-                  "use origin mode in article https://arxiv.org/abs/1412.3555")
+                  "use origin mode in article <Learning Phrase Representations "
+                  "using RNN Encoder–Decoder\n"
+                  "for Statistical Machine "
+                  "Translation>(https://arxiv.org/pdf/1406.1078.pdf)")
        .SetDefault(false);
    AddComment(R"DOC(
 GRUUnit Operator implements partial calculations of the GRU unit as following:

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -864,12 +864,14 @@ def dynamic_gru(input,
                is_reverse=False,
                gate_activation='sigmoid',
                candidate_activation='tanh',
-                h_0=None):
+                h_0=None,
+                origin_mode=False):
    """
    **Gated Recurrent Unit (GRU) Layer**

-    Refer to `Empirical Evaluation of Gated Recurrent Neural Networks on
-    Sequence Modeling <https://arxiv.org/abs/1412.3555>`_ .
+    if origin_mode is False, then the equation of a gru step is from paper
+    `Empirical Evaluation of Gated Recurrent Neural Networks on Sequence
+    Modeling <https://arxiv.org/pdf/1412.3555.pdf>`_ .

    The formula is as follows:

@@ -883,6 +885,20 @@ def dynamic_gru(input,

        h_t & = (1-u_t) \odot h_{t-1} + u_t \odot \\tilde{h_t}

+    if origin_mode is True, then the equation is from paper
+    `Learning Phrase Representations using RNN Encoder–Decoder for Statistical
+    Machine Translation <https://arxiv.org/pdf/1406.1078.pdf>`_
+
+    .. math::
+
+        u_t & = act_g(W_{ux}x_{t} + W_{uh}h_{t-1} + b_u)
+
+        r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r)
+
+        \\tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c)
+
+        h_t & = u_t \odot h_{t-1} + (1-u_t) \odot \\tilde{h_t}
+
    The :math:`\odot` is the element-wise product of the vectors. :math:`act_g`
    is the update gate and reset gate activation function and :math:`sigmoid`
    is usually used for it. :math:`act_c` is the activation function for
@@ -980,7 +996,8 @@ def dynamic_gru(input,
        attrs={
            'is_reverse': is_reverse,
            'gate_activation': gate_activation,
-            'activation': candidate_activation
+            'activation': candidate_activation,
+            'origin_mode': origin_mode
        })
    return hidden

@@ -994,7 +1011,11 @@ def gru_unit(input,
             gate_activation='sigmoid',
             origin_mode=False):
    """
-    GRU unit layer. The equation of a gru step is:
+    **GRU unit layer**
+
+    if origin_mode is True, then the equation of a gru step is from paper
+    `Learning Phrase Representations using RNN Encoder–Decoder for Statistical
+    Machine Translation <https://arxiv.org/pdf/1406.1078.pdf>`_

        .. math::
            u_t & = actGate(xu_{t} + W_u h_{t-1} + b_u)
@@ -1003,7 +1024,21 @@ def gru_unit(input,

            m_t & = actNode(xm_t + W_c dot(r_t, h_{t-1}) + b_m)

-            h_t & = dot((1-u_t), m_t) + dot(u_t, h_{t-1})
+            h_t & = dot(u_t, h_{t-1}) + dot((1-u_t), m_t)
+
+    if origin_mode is False, then the equation of a gru step is from paper
+    `Empirical Evaluation of Gated Recurrent Neural Networks on Sequence
+    Modeling <https://arxiv.org/pdf/1412.3555.pdf>`_
+
+        .. math::
+            u_t & = actGate(xu_{t} + W_u h_{t-1} + b_u)
+
+            r_t & = actGate(xr_{t} + W_r h_{t-1} + b_r)
+
+            m_t & = actNode(xm_t + W_c dot(r_t, h_{t-1}) + b_m)
+
+            h_t & = dot((1-u_t), h_{t-1}) + dot(u_t, m_t)
+

    The inputs of gru unit includes :math:`z_t`, :math:`h_{t-1}`. In terms
    of the equation above, the :math:`z_t` is split into 3 parts -