Fix calculations in gru_unit_op

3e7fff41 · guosheng · 01d6ccb4 · 3e7fff41 · 3e7fff41 · 3e7fff41
3 changed file
--- a/paddle/operators/gru_unit_op.cc
+++ b/paddle/operators/gru_unit_op.cc
@@ -114,18 +114,19 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
        .SetDefault(sigmoid)
        .InEnum({identity, sigmoid, tanh, relu});
    AddComment(R"DOC(
-GRUUnit Operator.
-
-This operator implements partial calculations of the GRU unit as follows:
+GRUUnit Operator implements partial calculations of the GRU unit as following:

 $$
-update \ gate: u_t = actGate(xu_t + W_u * hidden_{prev} + bias_u) \\
-reset \ gate: r_t = actGate(xr_t + W_r * hidden_{prev} + bias_r)  \\
-output \ candidate: {h}_t = actNode({xc}_t + W_c * dot(r_t, hidden_{prev}) + bias_c) \\
-output: h_t = dot((1-u_t), {h}_t) + dot(u_t, hidden_{prev})
+update \ gate: u_t = actGate(xu_t + W_u * h_{t-1} + b_u) \\
+reset \ gate: r_t = actGate(xr_t + W_r * h_{t-1} + b_r)  \\
+output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, h_{t-1}) + b_c) \\
+output: h_t = dot((1 - u_t), h_{t-1}) + dot(u_t, {h}_t)
 $$

-The rest of GRU unit can be completed by using FCOp's output as the input of GRUUnitOp.
+which is same as one time step of GRU Operator.
+
+@note To implement the complete GRU unit, fully-connected operator must be 
+used before to feed xu, xr and xc as the Input of GRUUnit operator.

 )DOC");
  }
@@ -150,12 +151,6 @@ class GRUUnitGradOp : public framework::OperatorWithKernel {
                   "ResetHiddenPrev");
    PADDLE_ENFORCE(ctx->HasInput("Hidden"),
                   "Input(%s) of GRUUnitGradOp should not be null.", "Hidden");
-    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Gate")),
-                   "Input(%s@GRAD) of GRUUnitGradOp should not be null.",
-                   "Gate");
-    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("ResetHiddenPrev")),
-                   "Input(%s@GRAD) of GRUUnitGradOp should not be null.",
-                   "ResetHiddenPrev");
    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Hidden")),
                   "Input(%s@GRAD) of GRUUnitGradOp should not be null.",
                   "Hidden");

--- a/paddle/operators/gru_unit_op.h
+++ b/paddle/operators/gru_unit_op.h
@@ -110,7 +110,7 @@ class GRUUnitKernel : public framework::OpKernel<T> {
    auto c = g.slice(c_offsets, extents);  // output candidate

    // calculate final output
-    h.device(place) = u * (h_p - c) + c;
+    h.device(place) = u * (c - h_p) + h_p;
  }
 };

@@ -185,10 +185,10 @@ class GRUUnitGradKernel : public framework::OpKernel<T> {

    // backward for unactivated update gate
    ActGradCompute(context.Attr<int>("gate_activation"), place, u, u,
-                   d_g.slice(u_offsets, extents), d_h * (h_p - c));
+                   d_g.slice(u_offsets, extents), d_h * (c - h_p));
    // backward for unactivated output candidate
    ActGradCompute(context.Attr<int>("activation"), place, c, c,
-                   d_g.slice(c_offsets, extents), d_h * (u.constant(T(1)) - u));
+                   d_g.slice(c_offsets, extents), d_h * u);
    // backward for reset_hidden_prev
    math::gemm<Place, T>(context.device_context(), false, true, batch_size,
                         frame_size, frame_size, 1,
@@ -210,7 +210,7 @@ class GRUUnitGradKernel : public framework::OpKernel<T> {
                         frame_size, gate_grad_data, frame_size * 3, 0,
                         weight_grad_data, frame_size * 2);
    // backward for hidden_prev
-    d_h_p.device(place) = d_r_h_p * r + d_h * u;
+    d_h_p.device(place) = d_r_h_p * r + d_h * (u.constant(T(1)) - u);
    math::gemm<Place, T>(context.device_context(), false, true, batch_size,
                         frame_size, frame_size * 2, 1, gate_grad_data,
                         frame_size * 3, weight_data, frame_size * 2, 1,

--- a/python/paddle/v2/fluid/tests/test_gru_unit_op.py
+++ b/python/paddle/v2/fluid/tests/test_gru_unit_op.py
@@ -77,7 +77,7 @@ class TestGRUUnitOp(OpTest):
        c = self.activate[self.attrs['activation']](np.dot(r_h_p, w_c) +
                                                    g[:, frame_size * 2:])
        g = np.hstack((u_r, c))
-        h = u * h_p + (1 - u) * c
+        h = u * c + (1 - u) * h_p
        self.outputs = {
            'Gate': g.astype('float64'),
            'ResetHiddenPrev': r_h_p.astype('float64'),
@@ -93,8 +93,7 @@ class TestGRUUnitOp(OpTest):

    def test_check_grad(self):
        self.check_grad(
-            ['Input', 'HiddenPrev', 'Weight'],
-            ['Hidden', 'ResetHiddenPrev', 'Gate'],
+            ['Input', 'HiddenPrev', 'Weight'], ['Hidden'],
            max_relative_error=0.007)


@@ -104,7 +103,7 @@ class TestGRUUnitOpWithBias(TestGRUUnitOp):
        frame_size = self.frame_size
        super(TestGRUUnitOpWithBias, self).set_inputs()
        self.inputs['Bias'] = np.random.uniform(
-            -0.1, 0.1, (1, frame_size * 3)).astype('float32')
+            -0.1, 0.1, (1, frame_size * 3)).astype('float64')
        self.attrs = {
            'activation': GRUActivationType.identity,
            'gate_activation': GRUActivationType.sigmoid
@@ -117,5 +116,4 @@ class TestGRUUnitOpWithBias(TestGRUUnitOp):


 if __name__ == '__main__':
-    exit(0)  # FIXME(yuyang18): This unittest is not pass. Fix it later
    unittest.main()