From a4d54b83d402b12ecd7643fbd13050898a9fa9e2 Mon Sep 17 00:00:00 2001 From: guosheng Date: Wed, 1 Nov 2017 00:50:56 +0800 Subject: [PATCH] Make GRU Operator adapt to the latest code --- paddle/operators/gru_op.cc | 66 ++++++++++--------- .../paddle/v2/framework/tests/test_gru_op.py | 6 +- 2 files changed, 39 insertions(+), 33 deletions(-) diff --git a/paddle/operators/gru_op.cc b/paddle/operators/gru_op.cc index e80e170fb9..d4e4c8a322 100644 --- a/paddle/operators/gru_op.cc +++ b/paddle/operators/gru_op.cc @@ -43,14 +43,12 @@ class GRUOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( weight_dims[1], frame_size * 3, "The shape of Weight matrix must be [frame_size, frame_size * 3]."); - auto h0 = Input("H0"); - if (h0 != framework::kEmptyVarName) { + if (ctx->HasInput("H0")) { auto h0_dims = ctx->GetInputDim("H0"); PADDLE_ENFORCE_EQ(h0_dims[1], frame_size, "The width of H0 must be equal to frame_size."); } - auto bias = Input("Bias"); - if (bias != framework::kEmptyVarName) { + if (ctx->HasInput("Bias")) { auto bias_dims = ctx->GetInputDim("Bias"); int bias_height = bias_dims[0]; int bias_width = bias_dims[1]; @@ -74,42 +72,52 @@ class GRUOpMaker : public framework::OpProtoAndCheckerMaker { GRUOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("Input", - "(LoDTensor) the first input is a LodTensor, which support " + "(LoDTensor) The first input is a LodTensor, which support " "variable-time length input sequence. The underlying tensor in " "this LoDTenosr is a matrix with shape (T X 3D), where, T is the " "total time steps in this mini-batch, D is the hidden size."); AddInput("H0", - "(Tensor, optional) the initial hidden state is an optional " + "(Tensor, optional) The initial hidden state is an optional " "input. This is a tensor with shape (N x D), where N is the " - "batch size, D is the hidden size."); + "batch size, D is the hidden size.") + .AsDispensable(); AddInput( "Weight", - "(Tensor) Weight matrix with shape [hidden_size, hidden_size * 3]. " - "The elements continuous in memory can be divided into two parts. " - "The first part are weights of the update gate and reset gate " - "with shape [hidden_size, hidden_size * 2], and the second part are " - "weights of output candidate with shape [hidden_size, hidden_size]"); + "(Tensor) The learnable hidden-hidden weight matrix with shape " + "(D x 3D), where D is the hidden size. The elements continuous in " + "memory can be divided into two parts. The first part are weights of " + "the update gate and reset gate with shape (D x 2D), and the second " + "part are weights of output candidate with shape (D x D)."); AddInput("Bias", - "(Tensor) Bias vector with shape [1, hidden_size * 3] concating " - "bias of the update gate, reset gate and output candidate."); + "(Tensor, optional) Bias vector with shape (1 x 3D) concating " + "bias of the update gate, reset gate and output candidate.") + .AsDispensable(); AddOutput("BatchGate", - "(LoDTensor) the update gata, reset gate and output candidate " - "lod tensor of GRU operator. " - "The shape and lod is the same with the `Input`.") + "(LoDTensor) To compute with batches, sequence data will be " + "reorganized into several successive batches each containing " + "data from the same time step. The LoDTensor BatchGate contains " + "the update gate, reset gate and output candidate values " + "organized in batches. The LoD size is 2. The first LoD contains " + "the batch offsets and the second LoD contains the indexes in " + "the raw sequence data.") .AsIntermediate(); AddOutput( "BatchResetHiddenPrev", - "(LoDTensor) the reseted hidden state lod tensor of GRU operator. " - "The shape and lod is the same with the `Input`.") + "(LoDTensor) The reseted hidden state LoDTensor organized in batches. " + "This LoDTensor is a matrix with shape (T X D) and has the same LoD " + "with `BatchGate`.") .AsIntermediate(); AddOutput( "BatchHidden", - "(LoDTensor) the reseted hidden state lod tensor of GRU operator. " - "The shape and lod is the same with the `Input`.") + "(LoDTensor) The hidden state LoDTensor organized in batches. " + "This LoDTensor is a matrix with shape (T X D) and has the same LoD " + "with `BatchGate`.") .AsIntermediate(); - AddOutput("Hidden", - "(LoDTensor) the hidden state lod tensor of GRU operator. " - "The shape and lod is the same with the `Input`."); + AddOutput( + "Hidden", + "(LoDTensor) the hidden state LoDTensor organized in sequences. " + "This LoDTensor is a matrix with shape (T X D) and has the same LoD " + "with `BatchGate`."); AddAttr("activation", "(string, default tanh) " "The activation type used for output candidate {h}_t.") @@ -124,14 +132,14 @@ class GRUOpMaker : public framework::OpProtoAndCheckerMaker { "whether to compute reversed GRU.") .SetDefault(false); AddComment(R"DOC( -GRUOp implements part calculations of the GRU unit as following: +GRUOp implements part calculations of the GRU as following: \f[ update \ gate: u_t = actGate(xu_t + W_u * hidden_prev + bias_u) \\ reset \ gate: r_t = actGate(xr_t + W_r * hidden_prev + bias_r) \\ output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, hidden_prev) + bias_c) \\ output: h_t = dot((1-u_t), hidden_prev) + dot(u_t, {h}_t) \f] -The rest of GRU unit can be completed by using FCOp's output as the input of GRUOp. +The rest of GRU can be completed by using FCOp's output as the input of GRUOp. )DOC"); } }; @@ -170,8 +178,7 @@ class GRUGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( weight_width, frame_size * 3, "The shape of Weight matrix must be [frame_size, frame_size * 3]."); - auto h0 = Input("H0"); - if (h0 != framework::kEmptyVarName) { + if (ctx->HasInput("H0")) { auto h0_dims = ctx->GetInputDim("H0"); PADDLE_ENFORCE_EQ(h0_dims[1], frame_size, "The width of H0 must be equal to frame_size."); @@ -179,8 +186,7 @@ class GRUGradOp : public framework::OperatorWithKernel { if (ctx->HasOutput(h0_grad_name)) ctx->SetOutputDim(h0_grad_name, h0_dims); } - auto bias = Input("Bias"); - if (bias != framework::kEmptyVarName) { + if (ctx->HasInput("Bias")) { auto bias_dims = ctx->GetInputDim("Bias"); int bias_height = bias_dims[0]; int bias_width = bias_dims[1]; diff --git a/python/paddle/v2/framework/tests/test_gru_op.py b/python/paddle/v2/framework/tests/test_gru_op.py index e4cd126427..1c8bbabf12 100644 --- a/python/paddle/v2/framework/tests/test_gru_op.py +++ b/python/paddle/v2/framework/tests/test_gru_op.py @@ -62,7 +62,7 @@ class TestGRUOp(OpTest): return idx_in_seq_list def gru_step(self, x, h_p, w, b): - print x.shape, h_p.shape, w.shape, b.shape + # print x.shape, h_p.shape, w.shape, b.shape batch_size = x.shape[0] frame_size = w.shape[0] g = x + np.tile(b, (batch_size, 1)) @@ -96,7 +96,7 @@ class TestGRUOp(OpTest): num_batch = len(idx_in_seq_list) end_idx = 0 for batch_idx in range(num_batch): - print idx_in_seq_list[batch_idx] + # print idx_in_seq_list[batch_idx] x = input[idx_in_seq_list[batch_idx]] g, r_h_p, h = self.gru_step(x, h_p, w, b) if batch_idx < (num_batch - 1): @@ -112,7 +112,7 @@ class TestGRUOp(OpTest): def set_data(self): lod = [[0, 2, 6, 9]] #[[0, 1, 2, 3]] self.idx_in_seq_list = self.seq_to_batch(lod, self.is_reverse) - print self.idx_in_seq_list + # print self.idx_in_seq_list batch_size = self.batch_size frame_size = self.frame_size input = np.random.rand(batch_size, frame_size * 3).astype('float64') -- GitLab