提交 3e7fff41 编写于 作者: G guosheng

Fix calculations in gru_unit_op

上级 01d6ccb4
...@@ -114,18 +114,19 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -114,18 +114,19 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault(sigmoid) .SetDefault(sigmoid)
.InEnum({identity, sigmoid, tanh, relu}); .InEnum({identity, sigmoid, tanh, relu});
AddComment(R"DOC( AddComment(R"DOC(
GRUUnit Operator. GRUUnit Operator implements partial calculations of the GRU unit as following:
This operator implements partial calculations of the GRU unit as follows:
$$ $$
update \ gate: u_t = actGate(xu_t + W_u * hidden_{prev} + bias_u) \\ update \ gate: u_t = actGate(xu_t + W_u * h_{t-1} + b_u) \\
reset \ gate: r_t = actGate(xr_t + W_r * hidden_{prev} + bias_r) \\ reset \ gate: r_t = actGate(xr_t + W_r * h_{t-1} + b_r) \\
output \ candidate: {h}_t = actNode({xc}_t + W_c * dot(r_t, hidden_{prev}) + bias_c) \\ output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, h_{t-1}) + b_c) \\
output: h_t = dot((1-u_t), {h}_t) + dot(u_t, hidden_{prev}) output: h_t = dot((1 - u_t), h_{t-1}) + dot(u_t, {h}_t)
$$ $$
The rest of GRU unit can be completed by using FCOp's output as the input of GRUUnitOp. which is same as one time step of GRU Operator.
@note To implement the complete GRU unit, fully-connected operator must be
used before to feed xu, xr and xc as the Input of GRUUnit operator.
)DOC"); )DOC");
} }
...@@ -150,12 +151,6 @@ class GRUUnitGradOp : public framework::OperatorWithKernel { ...@@ -150,12 +151,6 @@ class GRUUnitGradOp : public framework::OperatorWithKernel {
"ResetHiddenPrev"); "ResetHiddenPrev");
PADDLE_ENFORCE(ctx->HasInput("Hidden"), PADDLE_ENFORCE(ctx->HasInput("Hidden"),
"Input(%s) of GRUUnitGradOp should not be null.", "Hidden"); "Input(%s) of GRUUnitGradOp should not be null.", "Hidden");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Gate")),
"Input(%s@GRAD) of GRUUnitGradOp should not be null.",
"Gate");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("ResetHiddenPrev")),
"Input(%s@GRAD) of GRUUnitGradOp should not be null.",
"ResetHiddenPrev");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Hidden")), PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Hidden")),
"Input(%s@GRAD) of GRUUnitGradOp should not be null.", "Input(%s@GRAD) of GRUUnitGradOp should not be null.",
"Hidden"); "Hidden");
......
...@@ -110,7 +110,7 @@ class GRUUnitKernel : public framework::OpKernel<T> { ...@@ -110,7 +110,7 @@ class GRUUnitKernel : public framework::OpKernel<T> {
auto c = g.slice(c_offsets, extents); // output candidate auto c = g.slice(c_offsets, extents); // output candidate
// calculate final output // calculate final output
h.device(place) = u * (h_p - c) + c; h.device(place) = u * (c - h_p) + h_p;
} }
}; };
...@@ -185,10 +185,10 @@ class GRUUnitGradKernel : public framework::OpKernel<T> { ...@@ -185,10 +185,10 @@ class GRUUnitGradKernel : public framework::OpKernel<T> {
// backward for unactivated update gate // backward for unactivated update gate
ActGradCompute(context.Attr<int>("gate_activation"), place, u, u, ActGradCompute(context.Attr<int>("gate_activation"), place, u, u,
d_g.slice(u_offsets, extents), d_h * (h_p - c)); d_g.slice(u_offsets, extents), d_h * (c - h_p));
// backward for unactivated output candidate // backward for unactivated output candidate
ActGradCompute(context.Attr<int>("activation"), place, c, c, ActGradCompute(context.Attr<int>("activation"), place, c, c,
d_g.slice(c_offsets, extents), d_h * (u.constant(T(1)) - u)); d_g.slice(c_offsets, extents), d_h * u);
// backward for reset_hidden_prev // backward for reset_hidden_prev
math::gemm<Place, T>(context.device_context(), false, true, batch_size, math::gemm<Place, T>(context.device_context(), false, true, batch_size,
frame_size, frame_size, 1, frame_size, frame_size, 1,
...@@ -210,7 +210,7 @@ class GRUUnitGradKernel : public framework::OpKernel<T> { ...@@ -210,7 +210,7 @@ class GRUUnitGradKernel : public framework::OpKernel<T> {
frame_size, gate_grad_data, frame_size * 3, 0, frame_size, gate_grad_data, frame_size * 3, 0,
weight_grad_data, frame_size * 2); weight_grad_data, frame_size * 2);
// backward for hidden_prev // backward for hidden_prev
d_h_p.device(place) = d_r_h_p * r + d_h * u; d_h_p.device(place) = d_r_h_p * r + d_h * (u.constant(T(1)) - u);
math::gemm<Place, T>(context.device_context(), false, true, batch_size, math::gemm<Place, T>(context.device_context(), false, true, batch_size,
frame_size, frame_size * 2, 1, gate_grad_data, frame_size, frame_size * 2, 1, gate_grad_data,
frame_size * 3, weight_data, frame_size * 2, 1, frame_size * 3, weight_data, frame_size * 2, 1,
......
...@@ -77,7 +77,7 @@ class TestGRUUnitOp(OpTest): ...@@ -77,7 +77,7 @@ class TestGRUUnitOp(OpTest):
c = self.activate[self.attrs['activation']](np.dot(r_h_p, w_c) + c = self.activate[self.attrs['activation']](np.dot(r_h_p, w_c) +
g[:, frame_size * 2:]) g[:, frame_size * 2:])
g = np.hstack((u_r, c)) g = np.hstack((u_r, c))
h = u * h_p + (1 - u) * c h = u * c + (1 - u) * h_p
self.outputs = { self.outputs = {
'Gate': g.astype('float64'), 'Gate': g.astype('float64'),
'ResetHiddenPrev': r_h_p.astype('float64'), 'ResetHiddenPrev': r_h_p.astype('float64'),
...@@ -93,8 +93,7 @@ class TestGRUUnitOp(OpTest): ...@@ -93,8 +93,7 @@ class TestGRUUnitOp(OpTest):
def test_check_grad(self): def test_check_grad(self):
self.check_grad( self.check_grad(
['Input', 'HiddenPrev', 'Weight'], ['Input', 'HiddenPrev', 'Weight'], ['Hidden'],
['Hidden', 'ResetHiddenPrev', 'Gate'],
max_relative_error=0.007) max_relative_error=0.007)
...@@ -104,7 +103,7 @@ class TestGRUUnitOpWithBias(TestGRUUnitOp): ...@@ -104,7 +103,7 @@ class TestGRUUnitOpWithBias(TestGRUUnitOp):
frame_size = self.frame_size frame_size = self.frame_size
super(TestGRUUnitOpWithBias, self).set_inputs() super(TestGRUUnitOpWithBias, self).set_inputs()
self.inputs['Bias'] = np.random.uniform( self.inputs['Bias'] = np.random.uniform(
-0.1, 0.1, (1, frame_size * 3)).astype('float32') -0.1, 0.1, (1, frame_size * 3)).astype('float64')
self.attrs = { self.attrs = {
'activation': GRUActivationType.identity, 'activation': GRUActivationType.identity,
'gate_activation': GRUActivationType.sigmoid 'gate_activation': GRUActivationType.sigmoid
...@@ -117,5 +116,4 @@ class TestGRUUnitOpWithBias(TestGRUUnitOp): ...@@ -117,5 +116,4 @@ class TestGRUUnitOpWithBias(TestGRUUnitOp):
if __name__ == '__main__': if __name__ == '__main__':
exit(0) # FIXME(yuyang18): This unittest is not pass. Fix it later
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册