Refine GRU Operator by following comments

faad8351 · guosheng · 23a631d4 · faad8351 · faad8351 · faad8351
3 changed file
--- a/paddle/operators/gru_op.cc
+++ b/paddle/operators/gru_op.cc
@@ -61,8 +61,6 @@ class GRUOp : public framework::OperatorWithKernel {
    ctx->SetOutputDim("BatchResetHiddenPrev", {input_dims[0], frame_size});
    ctx->SetOutputDim("BatchHidden", {input_dims[0], frame_size});
    ctx->SetOutputDim("Hidden", {input_dims[0], frame_size});
-    // ctx->ShareLoD("Input", "Gate");
-    // ctx->ShareLoD("Input", "ResetHiddenPrev");
    ctx->ShareLoD("Input", "Hidden");
  }
 };
@@ -72,7 +70,7 @@ class GRUOpMaker : public framework::OpProtoAndCheckerMaker {
  GRUOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
    AddInput("Input",
-             "(LoDTensor) The first input is a LodTensor, which support "
+             "(LoDTensor) The first input is a LodTensor, which supports "
             "variable-time length input sequence. The underlying tensor in "
             "this LoDTenosr is a matrix with shape (T X 3D), where, T is the "
             "total time steps in this mini-batch, D is the hidden size.");
@@ -132,14 +130,17 @@ class GRUOpMaker : public framework::OpProtoAndCheckerMaker {
                  "whether to compute reversed GRU.")
        .SetDefault(false);
    AddComment(R"DOC(
-GRUOp implements part calculations of the GRU as following:
+GRU Operator implements part calculations of the complete GRU as following:
+
 \f[
-update \ gate: u_t = actGate(xu_t + W_u * hidden_prev + bias_u) \\
-reset \ gate: r_t = actGate(xr_t + W_r * hidden_prev + bias_r)  \\
-output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, hidden_prev) + bias_c) \\
-output: h_t = dot((1-u_t), hidden_prev) + dot(u_t, {h}_t)
+update \ gate: u_t = actGate(xu_t + W_u * h_{t-1} + b_u) \\
+reset \ gate: r_t = actGate(xr_t + W_r * h_{t-1} + b_r)  \\
+output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, h_{t-1}) + b_c) \\
+output: h_t = dot((1 - u_t), h_{t-1}) + dot(u_t, {h}_t)
 \f]
-The rest of GRU can be completed by using FCOp's output as the input of GRUOp.
+
+@note To implement the complete GRU, fully-connected operator must be used  
+before to feed xu, xr and xc as the Input of GRU operator.
 )DOC");
  }
 };

--- a/paddle/operators/math/gru_compute.h
+++ b/paddle/operators/math/gru_compute.h
@@ -19,28 +19,6 @@ namespace paddle {
 namespace operators {
 namespace math {

-// typedef enum {
-//   HL_ACTIVATION_SIGMOID = 0,
-//   HL_ACTIVATION_RELU = 1,
-//   HL_ACTIVATION_TANH = 2,
-//   HL_ACTIVATION_LINEAR = 3,
-//   HL_ACTIVATION_END
-// } activation_mode_t;
-
-// inline activation_mode_t ActiveType(const std::string &type) {
-//   if (type == "sigmoid") {
-//     return HL_ACTIVATION_SIGMOID;
-//   } else if (type == "relu") {
-//     return HL_ACTIVATION_RELU;
-//   } else if (type == "tanh") {
-//     return HL_ACTIVATION_TANH;
-//   } else if (type == "linear" || type == "") {
-//     return HL_ACTIVATION_LINEAR;
-//   } else {
-//     PADDLE_THROW("Do not support activation type.");
-//   }
-// }
-
 template <typename T>
 struct hl_gru_value {
  T *gateWeight;

--- a/python/paddle/v2/framework/tests/test_gru_op.py
+++ b/python/paddle/v2/framework/tests/test_gru_op.py
@@ -2,31 +2,7 @@ import unittest
 import numpy as np
 import math
 from op_test import OpTest
-
-SIGMOID_THRESHOLD_MIN = -40.0
-SIGMOID_THRESHOLD_MAX = 13.0
-EXP_MAX_INPUT = 40.0
-
-
-def identity(x):
-    return x
-
-
-def sigmoid(x):
-    y = np.copy(x)
-    y[x < SIGMOID_THRESHOLD_MIN] = SIGMOID_THRESHOLD_MIN
-    y[x > SIGMOID_THRESHOLD_MAX] = SIGMOID_THRESHOLD_MAX
-    return 1. / (1. + np.exp(-y))
-
-
-def tanh(x):
-    y = -2. * x
-    y[y > EXP_MAX_INPUT] = EXP_MAX_INPUT
-    return (2. / (1. + np.exp(y))) - 1.
-
-
-def relu(x):
-    return np.maximum(x, 0)
+from test_lstm_op import identity, sigmoid, tanh, relu


 class TestGRUOp(OpTest):
@@ -108,7 +84,7 @@ class TestGRUOp(OpTest):
        return batch_gate, batch_reset_hidden_prev, hidden

    def set_data(self):
-        lod = [[0, 2, 6, 9]]
+        lod = [[0, 2, 6, self.batch_size]]
        self.idx_in_seq_list = self.seq_to_batch(lod, self.is_reverse)
        batch_size = self.batch_size
        frame_size = self.frame_size