From 3e7fff4188b0279c0db5bc4b16858b5880cea6f8 Mon Sep 17 00:00:00 2001 From: guosheng Date: Mon, 20 Nov 2017 12:34:42 +0800 Subject: [PATCH] Fix calculations in gru_unit_op --- paddle/operators/gru_unit_op.cc | 23 ++++++++----------- paddle/operators/gru_unit_op.h | 8 +++---- .../paddle/v2/fluid/tests/test_gru_unit_op.py | 8 +++---- 3 files changed, 16 insertions(+), 23 deletions(-) diff --git a/paddle/operators/gru_unit_op.cc b/paddle/operators/gru_unit_op.cc index 89c027ff1e..877c969103 100644 --- a/paddle/operators/gru_unit_op.cc +++ b/paddle/operators/gru_unit_op.cc @@ -114,18 +114,19 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker { .SetDefault(sigmoid) .InEnum({identity, sigmoid, tanh, relu}); AddComment(R"DOC( -GRUUnit Operator. - -This operator implements partial calculations of the GRU unit as follows: +GRUUnit Operator implements partial calculations of the GRU unit as following: $$ -update \ gate: u_t = actGate(xu_t + W_u * hidden_{prev} + bias_u) \\ -reset \ gate: r_t = actGate(xr_t + W_r * hidden_{prev} + bias_r) \\ -output \ candidate: {h}_t = actNode({xc}_t + W_c * dot(r_t, hidden_{prev}) + bias_c) \\ -output: h_t = dot((1-u_t), {h}_t) + dot(u_t, hidden_{prev}) +update \ gate: u_t = actGate(xu_t + W_u * h_{t-1} + b_u) \\ +reset \ gate: r_t = actGate(xr_t + W_r * h_{t-1} + b_r) \\ +output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, h_{t-1}) + b_c) \\ +output: h_t = dot((1 - u_t), h_{t-1}) + dot(u_t, {h}_t) $$ -The rest of GRU unit can be completed by using FCOp's output as the input of GRUUnitOp. +which is same as one time step of GRU Operator. + +@note To implement the complete GRU unit, fully-connected operator must be +used before to feed xu, xr and xc as the Input of GRUUnit operator. )DOC"); } @@ -150,12 +151,6 @@ class GRUUnitGradOp : public framework::OperatorWithKernel { "ResetHiddenPrev"); PADDLE_ENFORCE(ctx->HasInput("Hidden"), "Input(%s) of GRUUnitGradOp should not be null.", "Hidden"); - PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Gate")), - "Input(%s@GRAD) of GRUUnitGradOp should not be null.", - "Gate"); - PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("ResetHiddenPrev")), - "Input(%s@GRAD) of GRUUnitGradOp should not be null.", - "ResetHiddenPrev"); PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Hidden")), "Input(%s@GRAD) of GRUUnitGradOp should not be null.", "Hidden"); diff --git a/paddle/operators/gru_unit_op.h b/paddle/operators/gru_unit_op.h index c53e7d9827..81818b0a0a 100644 --- a/paddle/operators/gru_unit_op.h +++ b/paddle/operators/gru_unit_op.h @@ -110,7 +110,7 @@ class GRUUnitKernel : public framework::OpKernel { auto c = g.slice(c_offsets, extents); // output candidate // calculate final output - h.device(place) = u * (h_p - c) + c; + h.device(place) = u * (c - h_p) + h_p; } }; @@ -185,10 +185,10 @@ class GRUUnitGradKernel : public framework::OpKernel { // backward for unactivated update gate ActGradCompute(context.Attr("gate_activation"), place, u, u, - d_g.slice(u_offsets, extents), d_h * (h_p - c)); + d_g.slice(u_offsets, extents), d_h * (c - h_p)); // backward for unactivated output candidate ActGradCompute(context.Attr("activation"), place, c, c, - d_g.slice(c_offsets, extents), d_h * (u.constant(T(1)) - u)); + d_g.slice(c_offsets, extents), d_h * u); // backward for reset_hidden_prev math::gemm(context.device_context(), false, true, batch_size, frame_size, frame_size, 1, @@ -210,7 +210,7 @@ class GRUUnitGradKernel : public framework::OpKernel { frame_size, gate_grad_data, frame_size * 3, 0, weight_grad_data, frame_size * 2); // backward for hidden_prev - d_h_p.device(place) = d_r_h_p * r + d_h * u; + d_h_p.device(place) = d_r_h_p * r + d_h * (u.constant(T(1)) - u); math::gemm(context.device_context(), false, true, batch_size, frame_size, frame_size * 2, 1, gate_grad_data, frame_size * 3, weight_data, frame_size * 2, 1, diff --git a/python/paddle/v2/fluid/tests/test_gru_unit_op.py b/python/paddle/v2/fluid/tests/test_gru_unit_op.py index f356f6e9ec..beedcf7f42 100644 --- a/python/paddle/v2/fluid/tests/test_gru_unit_op.py +++ b/python/paddle/v2/fluid/tests/test_gru_unit_op.py @@ -77,7 +77,7 @@ class TestGRUUnitOp(OpTest): c = self.activate[self.attrs['activation']](np.dot(r_h_p, w_c) + g[:, frame_size * 2:]) g = np.hstack((u_r, c)) - h = u * h_p + (1 - u) * c + h = u * c + (1 - u) * h_p self.outputs = { 'Gate': g.astype('float64'), 'ResetHiddenPrev': r_h_p.astype('float64'), @@ -93,8 +93,7 @@ class TestGRUUnitOp(OpTest): def test_check_grad(self): self.check_grad( - ['Input', 'HiddenPrev', 'Weight'], - ['Hidden', 'ResetHiddenPrev', 'Gate'], + ['Input', 'HiddenPrev', 'Weight'], ['Hidden'], max_relative_error=0.007) @@ -104,7 +103,7 @@ class TestGRUUnitOpWithBias(TestGRUUnitOp): frame_size = self.frame_size super(TestGRUUnitOpWithBias, self).set_inputs() self.inputs['Bias'] = np.random.uniform( - -0.1, 0.1, (1, frame_size * 3)).astype('float32') + -0.1, 0.1, (1, frame_size * 3)).astype('float64') self.attrs = { 'activation': GRUActivationType.identity, 'gate_activation': GRUActivationType.sigmoid @@ -117,5 +116,4 @@ class TestGRUUnitOpWithBias(TestGRUUnitOp): if __name__ == '__main__': - exit(0) # FIXME(yuyang18): This unittest is not pass. Fix it later unittest.main() -- GitLab