提交 0bc5a122 编写于 作者: G guosheng

Refine gru_unit_op by optional bias

上级 1cabdb87
......@@ -31,8 +31,6 @@ class GRUUnitOp : public framework::OperatorWithKernel {
"Input(%s) of GRUUnitOp should not be null.", "HiddenPrev");
PADDLE_ENFORCE(ctx->HasInput("Weight"),
"Input(%s) of GRUUnitOp should not be null.", "Weight");
PADDLE_ENFORCE(ctx->HasInput("Bias"),
"Input(%s) of GRUUnitOp should not be null.", "Bias");
PADDLE_ENFORCE(ctx->HasOutput("Gate"),
"Output(%s) of GRUUnitOp should not be null.", "Gate");
PADDLE_ENFORCE(ctx->HasOutput("ResetHiddenPrev"),
......@@ -43,14 +41,11 @@ class GRUUnitOp : public framework::OperatorWithKernel {
auto input_dims = ctx->GetInputDim("Input");
auto hidden_prev_dims = ctx->GetInputDim("HiddenPrev");
auto weight_dims = ctx->GetInputDim("Weight");
auto bias_dims = ctx->GetInputDim("Bias");
int batch_size = input_dims[0];
int input_size = input_dims[1];
int frame_size = hidden_prev_dims[1];
int weight_height = weight_dims[0];
int weight_width = weight_dims[1];
int bias_height = bias_dims[0];
int bias_width = bias_dims[1];
PADDLE_ENFORCE_EQ(
input_size, frame_size * 3,
"The input_size must be 3 times of frame_size in GRUUnitOp.");
......@@ -60,10 +55,16 @@ class GRUUnitOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(
weight_width, frame_size * 3,
"The shape of Weight matrix must be [frame_size, frame_size * 3].");
PADDLE_ENFORCE_EQ(bias_height, 1,
"The shape of Bias must be [1, frame_size * 3].");
PADDLE_ENFORCE_EQ(bias_width, frame_size * 3,
"The shape of Bias must be [1, frame_size * 3].");
auto bias = Input("Bias");
if (bias != framework::kEmptyVarName) {
auto bias_dims = ctx->GetInputDim("Bias");
int bias_height = bias_dims[0];
int bias_width = bias_dims[1];
PADDLE_ENFORCE_EQ(bias_height, 1,
"The shape of Bias must be [1, frame_size * 3].");
PADDLE_ENFORCE_EQ(bias_width, frame_size * 3,
"The shape of Bias must be [1, frame_size * 3].");
}
ctx->SetOutputDim("Gate", {batch_size, frame_size * 3});
ctx->SetOutputDim("ResetHiddenPrev", {batch_size, frame_size});
ctx->SetOutputDim("Hidden", {batch_size, frame_size});
......@@ -139,8 +140,6 @@ class GRUUnitGradOp : public framework::OperatorWithKernel {
"HiddenPrev");
PADDLE_ENFORCE(ctx->HasInput("Weight"),
"Input(%s) of GRUUnitGradOp should not be null.", "Weight");
PADDLE_ENFORCE(ctx->HasInput("Bias"),
"Input(%s) of GRUUnitGradOp should not be null.", "Bias");
PADDLE_ENFORCE(ctx->HasInput("Gate"),
"Input(%s) of GRUUnitGradOp should not be null.", "Gate");
PADDLE_ENFORCE(ctx->HasInput("ResetHiddenPrev"),
......@@ -160,14 +159,11 @@ class GRUUnitGradOp : public framework::OperatorWithKernel {
auto input_dims = ctx->GetInputDim("Input");
auto hidden_prev_dims = ctx->GetInputDim("HiddenPrev");
auto weight_dims = ctx->GetInputDim("Weight");
auto bias_dims = ctx->GetInputDim("Bias");
// int batch_size = input_dims[0];
int input_size = input_dims[1];
int frame_size = hidden_prev_dims[1];
int weight_height = weight_dims[0];
int weight_width = weight_dims[1];
int bias_height = bias_dims[0];
int bias_width = bias_dims[1];
PADDLE_ENFORCE_EQ(
input_size, frame_size * 3,
"The input_size must be 3 times of frame_size in GRUUnitOp.");
......@@ -177,10 +173,19 @@ class GRUUnitGradOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(
weight_width, frame_size * 3,
"The shape of Weight matrix must be [frame_size, frame_size * 3].");
PADDLE_ENFORCE_EQ(bias_height, 1,
"The shape of Bias must be [1, frame_size * 3].");
PADDLE_ENFORCE_EQ(bias_width, frame_size * 3,
"The shape of Bias must be [1, frame_size * 3].");
auto bias = Input("Bias");
if (bias != framework::kEmptyVarName) {
auto bias_dims = ctx->GetInputDim("Bias");
int bias_height = bias_dims[0];
int bias_width = bias_dims[1];
PADDLE_ENFORCE_EQ(bias_height, 1,
"The shape of Bias must be [1, frame_size * 3].");
PADDLE_ENFORCE_EQ(bias_width, frame_size * 3,
"The shape of Bias must be [1, frame_size * 3].");
auto bias_grad_name = framework::GradVarName("Bias");
if (ctx->HasOutput(bias_grad_name))
ctx->SetOutputDim(bias_grad_name, bias_dims);
}
auto input_grad_name = framework::GradVarName("Input");
if (ctx->HasOutput(input_grad_name))
ctx->SetOutputDim(input_grad_name, input_dims);
......@@ -190,9 +195,6 @@ class GRUUnitGradOp : public framework::OperatorWithKernel {
auto weight_grad_name = framework::GradVarName("Weight");
if (ctx->HasOutput(weight_grad_name))
ctx->SetOutputDim(weight_grad_name, weight_dims);
auto bias_grad_name = framework::GradVarName("Bias");
if (ctx->HasOutput(bias_grad_name))
ctx->SetOutputDim(bias_grad_name, bias_dims);
}
};
......
......@@ -64,16 +64,20 @@ class GRUUnitKernel : public framework::OpKernel<T> {
auto x = EigenMatrix<T>::From(*input);
auto h_p = EigenMatrix<T>::From(*hidden_prev);
auto b = EigenMatrix<T>::From(*bias);
auto g = EigenMatrix<T>::From(*gate);
auto r_h_p = EigenMatrix<T>::From(*reset_hidden_prev);
auto h = EigenMatrix<T>::From(*hidden);
auto place = context.GetEigenDevice<Place>();
// calculate unactivated gate outputs
g.device(place) = x +
b.reshape(Eigen::array<int, 2>({{1, frame_size * 3}}))
.broadcast(Eigen::array<int, 2>({{batch_size, 1}}));
if (bias) {
auto b = EigenMatrix<T>::From(*bias);
g.device(place) = x +
b.reshape(Eigen::array<int, 2>({{1, frame_size * 3}}))
.broadcast(Eigen::array<int, 2>({{batch_size, 1}}));
} else {
g.device(place) = x;
}
const T* hidden_prev_data = hidden_prev->data<T>();
const T* weight_data = weight->data<T>();
T* gate_data = gate->data<T>();
......@@ -145,7 +149,6 @@ class GRUUnitGradKernel : public framework::OpKernel<T> {
input_grad->mutable_data<T>(context.GetPlace());
hidden_prev_grad->mutable_data<T>(context.GetPlace());
weight_grad->mutable_data<T>(context.GetPlace());
bias_grad->mutable_data<T>(context.GetPlace());
Tensor gate_grad;
gate_grad.mutable_data<T>(input->dims(), context.GetPlace());
Tensor reset_hidden_prev_grad;
......@@ -168,7 +171,6 @@ class GRUUnitGradKernel : public framework::OpKernel<T> {
auto d_h = EigenMatrix<T>::From(*hidden_grad);
auto d_x = EigenMatrix<T>::From(*input_grad);
auto d_h_p = EigenMatrix<T>::From(*hidden_prev_grad);
auto d_b = EigenMatrix<T>::From(*bias_grad);
auto d_g = EigenMatrix<T>::From(gate_grad);
auto d_r_h_p = EigenMatrix<T>::From(reset_hidden_prev_grad);
auto place = context.GetEigenDevice<Place>();
......@@ -216,7 +218,11 @@ class GRUUnitGradKernel : public framework::OpKernel<T> {
// backward for input
d_x.device(place) = d_g;
// backward for bias
d_b.device(place) = d_g.sum(Eigen::array<int, 1>({{0}}));
if (bias_grad) {
bias_grad->mutable_data<T>(context.GetPlace());
auto d_b = EigenMatrix<T>::From(*bias_grad);
d_b.device(place) = d_g.sum(Eigen::array<int, 1>({{0}}));
}
}
};
......
......@@ -28,6 +28,8 @@ def relu(x):
class TestGRUUnitOp(OpTest):
batch_size = 3
frame_size = 5
activate = {
GRUActivationType.identity: identity,
GRUActivationType.sigmoid: sigmoid,
......@@ -35,9 +37,9 @@ class TestGRUUnitOp(OpTest):
GRUActivationType.relu: relu,
}
def setUp(self):
batch_size = 3
frame_size = 5
def set_inputs(self):
batch_size = self.batch_size
frame_size = self.frame_size
self.op_type = 'gru_unit'
self.inputs = {
'Input': np.random.uniform(
......@@ -47,18 +49,21 @@ class TestGRUUnitOp(OpTest):
'Weight': np.random.uniform(
-1. / math.sqrt(frame_size), 1. / math.sqrt(frame_size),
(frame_size, frame_size * 3)).astype('float32'),
'Bias': np.random.uniform(-0.1, 0.1,
(1, frame_size * 3)).astype('float32')
}
self.attrs = {
'activation': GRUActivationType.tanh,
'gate_activation': GRUActivationType.sigmoid
}
def set_outputs(self):
# GRU calculations
batch_size = self.batch_size
frame_size = self.frame_size
x = self.inputs['Input']
h_p = self.inputs['HiddenPrev']
w = self.inputs['Weight']
b = self.inputs['Bias']
b = self.inputs['Bias'] if self.inputs.has_key('Bias') else np.zeros(
(1, frame_size * 3))
g = x + np.tile(b, (batch_size, 1))
w_u_r = w.flatten()[:frame_size * frame_size * 2].reshape(
(frame_size, frame_size * 2))
......@@ -73,12 +78,33 @@ class TestGRUUnitOp(OpTest):
g[:, frame_size * 2:])
g = np.hstack((u_r, c))
h = u * h_p + (1 - u) * c
self.outputs = {'Gate': g, 'ResetHiddenPrev': r_h_p, 'Hidden': h}
def setUp(self):
self.set_inputs()
self.set_outputs()
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(
['Input', 'HiddenPrev', 'Weight'], ['Hidden'],
max_relative_error=0.007)
class TestGRUUnitOpWithBias(TestGRUUnitOp):
def set_inputs(self):
batch_size = self.batch_size
frame_size = self.frame_size
super(TestGRUUnitOpWithBias, self).set_inputs()
self.inputs['Bias'] = np.random.uniform(
-0.1, 0.1, (1, frame_size * 3)).astype('float32')
self.attrs = {
'activation': GRUActivationType.identity,
'gate_activation': GRUActivationType.sigmoid
}
def test_check_grad(self):
self.check_grad(
['Input', 'HiddenPrev', 'Weight', 'Bias'], ['Hidden'],
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册