From faad835166659eba5a05b8e005b7d49206016ccb Mon Sep 17 00:00:00 2001 From: guosheng Date: Fri, 3 Nov 2017 16:43:35 +0800 Subject: [PATCH] Refine GRU Operator by following comments --- paddle/operators/gru_op.cc | 19 +++++++------ paddle/operators/math/gru_compute.h | 22 --------------- .../paddle/v2/framework/tests/test_gru_op.py | 28 ++----------------- 3 files changed, 12 insertions(+), 57 deletions(-) diff --git a/paddle/operators/gru_op.cc b/paddle/operators/gru_op.cc index d4e4c8a3225..5aa03f8916a 100644 --- a/paddle/operators/gru_op.cc +++ b/paddle/operators/gru_op.cc @@ -61,8 +61,6 @@ class GRUOp : public framework::OperatorWithKernel { ctx->SetOutputDim("BatchResetHiddenPrev", {input_dims[0], frame_size}); ctx->SetOutputDim("BatchHidden", {input_dims[0], frame_size}); ctx->SetOutputDim("Hidden", {input_dims[0], frame_size}); - // ctx->ShareLoD("Input", "Gate"); - // ctx->ShareLoD("Input", "ResetHiddenPrev"); ctx->ShareLoD("Input", "Hidden"); } }; @@ -72,7 +70,7 @@ class GRUOpMaker : public framework::OpProtoAndCheckerMaker { GRUOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("Input", - "(LoDTensor) The first input is a LodTensor, which support " + "(LoDTensor) The first input is a LodTensor, which supports " "variable-time length input sequence. The underlying tensor in " "this LoDTenosr is a matrix with shape (T X 3D), where, T is the " "total time steps in this mini-batch, D is the hidden size."); @@ -132,14 +130,17 @@ class GRUOpMaker : public framework::OpProtoAndCheckerMaker { "whether to compute reversed GRU.") .SetDefault(false); AddComment(R"DOC( -GRUOp implements part calculations of the GRU as following: +GRU Operator implements part calculations of the complete GRU as following: + \f[ -update \ gate: u_t = actGate(xu_t + W_u * hidden_prev + bias_u) \\ -reset \ gate: r_t = actGate(xr_t + W_r * hidden_prev + bias_r) \\ -output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, hidden_prev) + bias_c) \\ -output: h_t = dot((1-u_t), hidden_prev) + dot(u_t, {h}_t) +update \ gate: u_t = actGate(xu_t + W_u * h_{t-1} + b_u) \\ +reset \ gate: r_t = actGate(xr_t + W_r * h_{t-1} + b_r) \\ +output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, h_{t-1}) + b_c) \\ +output: h_t = dot((1 - u_t), h_{t-1}) + dot(u_t, {h}_t) \f] -The rest of GRU can be completed by using FCOp's output as the input of GRUOp. + +@note To implement the complete GRU, fully-connected operator must be used +before to feed xu, xr and xc as the Input of GRU operator. )DOC"); } }; diff --git a/paddle/operators/math/gru_compute.h b/paddle/operators/math/gru_compute.h index 45ce48658aa..4e0a7779da8 100644 --- a/paddle/operators/math/gru_compute.h +++ b/paddle/operators/math/gru_compute.h @@ -19,28 +19,6 @@ namespace paddle { namespace operators { namespace math { -// typedef enum { -// HL_ACTIVATION_SIGMOID = 0, -// HL_ACTIVATION_RELU = 1, -// HL_ACTIVATION_TANH = 2, -// HL_ACTIVATION_LINEAR = 3, -// HL_ACTIVATION_END -// } activation_mode_t; - -// inline activation_mode_t ActiveType(const std::string &type) { -// if (type == "sigmoid") { -// return HL_ACTIVATION_SIGMOID; -// } else if (type == "relu") { -// return HL_ACTIVATION_RELU; -// } else if (type == "tanh") { -// return HL_ACTIVATION_TANH; -// } else if (type == "linear" || type == "") { -// return HL_ACTIVATION_LINEAR; -// } else { -// PADDLE_THROW("Do not support activation type."); -// } -// } - template struct hl_gru_value { T *gateWeight; diff --git a/python/paddle/v2/framework/tests/test_gru_op.py b/python/paddle/v2/framework/tests/test_gru_op.py index 1848fb34919..b2474cff94c 100644 --- a/python/paddle/v2/framework/tests/test_gru_op.py +++ b/python/paddle/v2/framework/tests/test_gru_op.py @@ -2,31 +2,7 @@ import unittest import numpy as np import math from op_test import OpTest - -SIGMOID_THRESHOLD_MIN = -40.0 -SIGMOID_THRESHOLD_MAX = 13.0 -EXP_MAX_INPUT = 40.0 - - -def identity(x): - return x - - -def sigmoid(x): - y = np.copy(x) - y[x < SIGMOID_THRESHOLD_MIN] = SIGMOID_THRESHOLD_MIN - y[x > SIGMOID_THRESHOLD_MAX] = SIGMOID_THRESHOLD_MAX - return 1. / (1. + np.exp(-y)) - - -def tanh(x): - y = -2. * x - y[y > EXP_MAX_INPUT] = EXP_MAX_INPUT - return (2. / (1. + np.exp(y))) - 1. - - -def relu(x): - return np.maximum(x, 0) +from test_lstm_op import identity, sigmoid, tanh, relu class TestGRUOp(OpTest): @@ -108,7 +84,7 @@ class TestGRUOp(OpTest): return batch_gate, batch_reset_hidden_prev, hidden def set_data(self): - lod = [[0, 2, 6, 9]] + lod = [[0, 2, 6, self.batch_size]] self.idx_in_seq_list = self.seq_to_batch(lod, self.is_reverse) batch_size = self.batch_size frame_size = self.frame_size -- GitLab