From faad835166659eba5a05b8e005b7d49206016ccb Mon Sep 17 00:00:00 2001
From: guosheng <guosheng@baidu.com>
Date: Fri, 3 Nov 2017 16:43:35 +0800
Subject: [PATCH] Refine GRU Operator by following comments

---
 paddle/operators/gru_op.cc                    | 19 +++++++------
 paddle/operators/math/gru_compute.h           | 22 ---------------
 .../paddle/v2/framework/tests/test_gru_op.py  | 28 ++-----------------
 3 files changed, 12 insertions(+), 57 deletions(-)
diff --git a/paddle/operators/gru_op.cc b/paddle/operators/gru_op.cc
index d4e4c8a3225..5aa03f8916a 100644
--- a/paddle/operators/gru_op.cc
+++ b/paddle/operators/gru_op.cc
@@ -61,8 +61,6 @@ class GRUOp : public framework::OperatorWithKernel {
     ctx->SetOutputDim("BatchResetHiddenPrev", {input_dims[0], frame_size});
     ctx->SetOutputDim("BatchHidden", {input_dims[0], frame_size});
     ctx->SetOutputDim("Hidden", {input_dims[0], frame_size});
-    // ctx->ShareLoD("Input", "Gate");
-    // ctx->ShareLoD("Input", "ResetHiddenPrev");
     ctx->ShareLoD("Input", "Hidden");
   }
 };
@@ -72,7 +70,7 @@ class GRUOpMaker : public framework::OpProtoAndCheckerMaker {
   GRUOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
     AddInput("Input",
-             "(LoDTensor) The first input is a LodTensor, which support "
+             "(LoDTensor) The first input is a LodTensor, which supports "
              "variable-time length input sequence. The underlying tensor in "
              "this LoDTenosr is a matrix with shape (T X 3D), where, T is the "
              "total time steps in this mini-batch, D is the hidden size.");
@@ -132,14 +130,17 @@ class GRUOpMaker : public framework::OpProtoAndCheckerMaker {
                   "whether to compute reversed GRU.")
         .SetDefault(false);
     AddComment(R"DOC(
-GRUOp implements part calculations of the GRU as following:
+GRU Operator implements part calculations of the complete GRU as following:
+
 \f[
-update \ gate: u_t = actGate(xu_t + W_u * hidden_prev + bias_u) \\
-reset \ gate: r_t = actGate(xr_t + W_r * hidden_prev + bias_r)  \\
-output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, hidden_prev) + bias_c) \\
-output: h_t = dot((1-u_t), hidden_prev) + dot(u_t, {h}_t)
+update \ gate: u_t = actGate(xu_t + W_u * h_{t-1} + b_u) \\
+reset \ gate: r_t = actGate(xr_t + W_r * h_{t-1} + b_r)  \\
+output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, h_{t-1}) + b_c) \\
+output: h_t = dot((1 - u_t), h_{t-1}) + dot(u_t, {h}_t)
 \f]
-The rest of GRU can be completed by using FCOp's output as the input of GRUOp.
+
+@note To implement the complete GRU, fully-connected operator must be used  
+before to feed xu, xr and xc as the Input of GRU operator.
 )DOC");
   }
 };
diff --git a/paddle/operators/math/gru_compute.h b/paddle/operators/math/gru_compute.h
index 45ce48658aa..4e0a7779da8 100644
--- a/paddle/operators/math/gru_compute.h
+++ b/paddle/operators/math/gru_compute.h
@@ -19,28 +19,6 @@ namespace paddle {
 namespace operators {
 namespace math {
 
-// typedef enum {
-//   HL_ACTIVATION_SIGMOID = 0,
-//   HL_ACTIVATION_RELU = 1,
-//   HL_ACTIVATION_TANH = 2,
-//   HL_ACTIVATION_LINEAR = 3,
-//   HL_ACTIVATION_END
-// } activation_mode_t;
-
-// inline activation_mode_t ActiveType(const std::string &type) {
-//   if (type == "sigmoid") {
-//     return HL_ACTIVATION_SIGMOID;
-//   } else if (type == "relu") {
-//     return HL_ACTIVATION_RELU;
-//   } else if (type == "tanh") {
-//     return HL_ACTIVATION_TANH;
-//   } else if (type == "linear" || type == "") {
-//     return HL_ACTIVATION_LINEAR;
-//   } else {
-//     PADDLE_THROW("Do not support activation type.");
-//   }
-// }
-
 template <typename T>
 struct hl_gru_value {
   T *gateWeight;
diff --git a/python/paddle/v2/framework/tests/test_gru_op.py b/python/paddle/v2/framework/tests/test_gru_op.py
index 1848fb34919..b2474cff94c 100644
--- a/python/paddle/v2/framework/tests/test_gru_op.py
+++ b/python/paddle/v2/framework/tests/test_gru_op.py
@@ -2,31 +2,7 @@ import unittest
 import numpy as np
 import math
 from op_test import OpTest
-
-SIGMOID_THRESHOLD_MIN = -40.0
-SIGMOID_THRESHOLD_MAX = 13.0
-EXP_MAX_INPUT = 40.0
-
-
-def identity(x):
-    return x
-
-
-def sigmoid(x):
-    y = np.copy(x)
-    y[x < SIGMOID_THRESHOLD_MIN] = SIGMOID_THRESHOLD_MIN
-    y[x > SIGMOID_THRESHOLD_MAX] = SIGMOID_THRESHOLD_MAX
-    return 1. / (1. + np.exp(-y))
-
-
-def tanh(x):
-    y = -2. * x
-    y[y > EXP_MAX_INPUT] = EXP_MAX_INPUT
-    return (2. / (1. + np.exp(y))) - 1.
-
-
-def relu(x):
-    return np.maximum(x, 0)
+from test_lstm_op import identity, sigmoid, tanh, relu
 
 
 class TestGRUOp(OpTest):
@@ -108,7 +84,7 @@ class TestGRUOp(OpTest):
         return batch_gate, batch_reset_hidden_prev, hidden
 
     def set_data(self):
-        lod = [[0, 2, 6, 9]]
+        lod = [[0, 2, 6, self.batch_size]]
         self.idx_in_seq_list = self.seq_to_batch(lod, self.is_reverse)
         batch_size = self.batch_size
         frame_size = self.frame_size
-- 
GitLab