提交 faad8351 编写于 作者: G guosheng

Refine GRU Operator by following comments

上级 23a631d4
......@@ -61,8 +61,6 @@ class GRUOp : public framework::OperatorWithKernel {
ctx->SetOutputDim("BatchResetHiddenPrev", {input_dims[0], frame_size});
ctx->SetOutputDim("BatchHidden", {input_dims[0], frame_size});
ctx->SetOutputDim("Hidden", {input_dims[0], frame_size});
// ctx->ShareLoD("Input", "Gate");
// ctx->ShareLoD("Input", "ResetHiddenPrev");
ctx->ShareLoD("Input", "Hidden");
}
};
......@@ -72,7 +70,7 @@ class GRUOpMaker : public framework::OpProtoAndCheckerMaker {
GRUOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Input",
"(LoDTensor) The first input is a LodTensor, which support "
"(LoDTensor) The first input is a LodTensor, which supports "
"variable-time length input sequence. The underlying tensor in "
"this LoDTenosr is a matrix with shape (T X 3D), where, T is the "
"total time steps in this mini-batch, D is the hidden size.");
......@@ -132,14 +130,17 @@ class GRUOpMaker : public framework::OpProtoAndCheckerMaker {
"whether to compute reversed GRU.")
.SetDefault(false);
AddComment(R"DOC(
GRUOp implements part calculations of the GRU as following:
GRU Operator implements part calculations of the complete GRU as following:
\f[
update \ gate: u_t = actGate(xu_t + W_u * hidden_prev + bias_u) \\
reset \ gate: r_t = actGate(xr_t + W_r * hidden_prev + bias_r) \\
output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, hidden_prev) + bias_c) \\
output: h_t = dot((1-u_t), hidden_prev) + dot(u_t, {h}_t)
update \ gate: u_t = actGate(xu_t + W_u * h_{t-1} + b_u) \\
reset \ gate: r_t = actGate(xr_t + W_r * h_{t-1} + b_r) \\
output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, h_{t-1}) + b_c) \\
output: h_t = dot((1 - u_t), h_{t-1}) + dot(u_t, {h}_t)
\f]
The rest of GRU can be completed by using FCOp's output as the input of GRUOp.
@note To implement the complete GRU, fully-connected operator must be used
before to feed xu, xr and xc as the Input of GRU operator.
)DOC");
}
};
......
......@@ -19,28 +19,6 @@ namespace paddle {
namespace operators {
namespace math {
// typedef enum {
// HL_ACTIVATION_SIGMOID = 0,
// HL_ACTIVATION_RELU = 1,
// HL_ACTIVATION_TANH = 2,
// HL_ACTIVATION_LINEAR = 3,
// HL_ACTIVATION_END
// } activation_mode_t;
// inline activation_mode_t ActiveType(const std::string &type) {
// if (type == "sigmoid") {
// return HL_ACTIVATION_SIGMOID;
// } else if (type == "relu") {
// return HL_ACTIVATION_RELU;
// } else if (type == "tanh") {
// return HL_ACTIVATION_TANH;
// } else if (type == "linear" || type == "") {
// return HL_ACTIVATION_LINEAR;
// } else {
// PADDLE_THROW("Do not support activation type.");
// }
// }
template <typename T>
struct hl_gru_value {
T *gateWeight;
......
......@@ -2,31 +2,7 @@ import unittest
import numpy as np
import math
from op_test import OpTest
SIGMOID_THRESHOLD_MIN = -40.0
SIGMOID_THRESHOLD_MAX = 13.0
EXP_MAX_INPUT = 40.0
def identity(x):
return x
def sigmoid(x):
y = np.copy(x)
y[x < SIGMOID_THRESHOLD_MIN] = SIGMOID_THRESHOLD_MIN
y[x > SIGMOID_THRESHOLD_MAX] = SIGMOID_THRESHOLD_MAX
return 1. / (1. + np.exp(-y))
def tanh(x):
y = -2. * x
y[y > EXP_MAX_INPUT] = EXP_MAX_INPUT
return (2. / (1. + np.exp(y))) - 1.
def relu(x):
return np.maximum(x, 0)
from test_lstm_op import identity, sigmoid, tanh, relu
class TestGRUOp(OpTest):
......@@ -108,7 +84,7 @@ class TestGRUOp(OpTest):
return batch_gate, batch_reset_hidden_prev, hidden
def set_data(self):
lod = [[0, 2, 6, 9]]
lod = [[0, 2, 6, self.batch_size]]
self.idx_in_seq_list = self.seq_to_batch(lod, self.is_reverse)
batch_size = self.batch_size
frame_size = self.frame_size
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册