提交 4c19f9f4 编写于 作者: C chengduoZH

fix backward

上级 6246be29
......@@ -27,6 +27,10 @@ class SequenceProjectOp : public framework::OperatorWithKernel {
"Input(X) of SequenceProjectOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of SequenceProjectOp should not be null.");
// PaddingData mast be not empty.
PADDLE_ENFORCE(
ctx->HasInput("PaddingData"),
"Output(PaddingData) of SequenceProjectOp should not be null.");
auto in_dims = ctx->GetInputDim("X");
PADDLE_ENFORCE(in_dims.size() == 2, "Input(X) should be 2-D tensor.");
......@@ -35,9 +39,6 @@ class SequenceProjectOp : public framework::OperatorWithKernel {
int context_start = ctx->Attrs().Get<int>("context_start");
if (padding_trainable) {
PADDLE_ENFORCE(
ctx->HasInput("PaddingData"),
"Output(PaddingData) of SequenceProjectOp should not be null.");
framework::DDim padding_dim = ctx->GetInputDim("PaddingData");
int up_pad = std::max(0, -context_start);
int down_pad = std::max(0, context_start + context_length - 1);
......@@ -71,17 +72,15 @@ class SequenceProjectGradOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Gradient of Out should not be null.");
PADDLE_ENFORCE(ctx->HasInput("X"), "The input X should not be null.");
PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
"Gradient of input(X@GRAD) should not be null.");
if (ctx->Attrs().Get<bool>("padding_trainable")) {
PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("PaddingData")),
"Output(PaddingData@GRAD) of SequenceProjectGradOp should "
"not be null.");
if (ctx->Attrs().Get<bool>("padding_trainable") &&
ctx->HasOutput(framework::GradVarName("PaddingData"))) {
auto padding_dims = ctx->GetInputDim("PaddingData");
ctx->SetOutputDim(framework::GradVarName("PaddingData"), padding_dims);
}
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
if (ctx->HasOutput(framework::GradVarName("X"))) {
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
}
}
};
......
......@@ -39,7 +39,6 @@ class SequenceProjectKernel : public framework::OpKernel<T> {
auto* out = context.Output<LoDTensor>("Out");
out->mutable_data<T>(context.GetPlace());
// need discuss, is it necessary to set zeros ?
// Because if padding_trainable is false, padding data should be zeros.
auto temp = framework::EigenVector<T>::Flatten(*out);
temp.device(context.GetEigenDevice<Place>()) =
......@@ -176,12 +175,9 @@ class SequenceProjectGradKernel : public framework::OpKernel<T> {
void Compute(const framework::ExecutionContext& context) const override {
auto* out_g = context.Input<LoDTensor>(framework::GradVarName("Out"));
auto* in_g = context.Output<LoDTensor>(framework::GradVarName("X"));
auto* padding_data_g =
context.Output<LoDTensor>(framework::GradVarName("PaddingData"));
auto* in = context.Input<LoDTensor>("X");
in_g->mutable_data<T>(context.GetPlace());
if (in_g) {
math::SetConstant<Place, T> functor;
functor(context.device_context(), in_g, 0);
}
auto place = context.GetEigenDevice<Place>();
int context_start = context.Attr<int>("context_start");
......@@ -193,49 +189,87 @@ class SequenceProjectGradKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ(in->lod().size(), 1UL,
"Only support one level sequence now.");
auto lod_g_level_0 = in->lod()[0];
int64_t input_width = in_g->dims()[1];
int64_t input_width = in->dims()[1];
int64_t output_width = out_g->dims()[1];
int64_t padding_width = 0;
PADDLE_ENFORCE(input_width * context_length == output_width,
"Input size and pooling size should be consistent.");
LoDTensor* padding_data_g = nullptr;
if (padding_trainable) {
padding_data_g =
context.Output<LoDTensor>(framework::GradVarName("PaddingData"));
padding_data_g->mutable_data<T>(context.GetPlace());
PADDLE_ENFORCE_EQ(padding_data_g->dims().size(), 2UL,
"Only support one level sequence now.");
padding_width = padding_data_g->dims()[1];
PADDLE_ENFORCE(padding_width == input_width,
"Input size and pooling size should be consistent.");
math::SetConstant<Place, T> functor;
functor(context.device_context(), padding_data_g, 0);
}
int up_pad = std::max(0, -context_start);
int down_pad = std::max(0, context_start + context_length - 1);
int sequence_height, sequence_width;
int input_row_begin, input_row_end;
sequence_width = static_cast<int>(in->dims()[1]);
paddle::operators::math::Col2ImFunctor<
paddle::operators::math::ColFormat::kOCF, Place, float>
col2im_ocf;
for (int i = 0; i < static_cast<int>(lod_g_level_0.size()) - 1; ++i) {
input_row_begin = (context_start > 0)
? static_cast<int>(lod_g_level_0[i]) + context_start
: static_cast<int>(lod_g_level_0[i]);
input_row_end = static_cast<int>(lod_g_level_0[i + 1]);
if (in_g) {
in_g->mutable_data<T>(context.GetPlace());
math::SetConstant<Place, T> functor;
functor(context.device_context(), in_g, 0);
Tensor out_g_t = out_g->Slice(static_cast<int>(lod_g_level_0[i]),
static_cast<int>(lod_g_level_0[i + 1]));
for (int i = 0; i < static_cast<int>(lod_g_level_0.size()) - 1; ++i) {
input_row_begin =
(context_start > 0)
? static_cast<int>(lod_g_level_0[i]) + context_start
: static_cast<int>(lod_g_level_0[i]);
input_row_end = static_cast<int>(lod_g_level_0[i + 1]);
sequence_height = static_cast<int>(out_g_t.dims()[0]);
sequence_width = static_cast<int>(in_g->dims()[1]);
Tensor out_g_t = out_g->Slice(static_cast<int>(lod_g_level_0[i]),
static_cast<int>(lod_g_level_0[i + 1]));
sequence_height = static_cast<int>(out_g_t.dims()[0]);
if (input_row_begin < input_row_end) {
Tensor in_t = in_g->Slice(input_row_begin, input_row_end);
std::vector<int64_t> output_shape(
{sequence_height, 1, 1, context_length,
sequence_width}); // output_height, output_width,
// input_channels, filter_height, filter_width
out_g_t.Resize(framework::make_ddim(output_shape));
std::vector<int64_t> input_shape(
{1, input_row_end - input_row_begin,
sequence_width}); // input_channels, input_height, input_width
in_t.Resize(framework::make_ddim(input_shape));
col2im_ocf(context.device_context(), in_t, out_g_t,
/*stride_height*/ context_stride, /*stride_width*/ 0,
up_pad, down_pad);
}
out_g_t.Resize(framework::make_ddim(
{sequence_height, context_length * sequence_width}));
}
}
if (padding_trainable && padding_data_g) {
padding_data_g->mutable_data<T>(context.GetPlace());
PADDLE_ENFORCE_EQ(padding_data_g->dims().size(), 2UL,
"Only support one level sequence now.");
padding_width = padding_data_g->dims()[1];
PADDLE_ENFORCE(padding_width == input_width,
"Input size and pooling size should be consistent.");
math::SetConstant<Place, T> functor;
functor(context.device_context(), padding_data_g, 0);
for (int i = 0; i < static_cast<int>(lod_g_level_0.size()) - 1; ++i) {
input_row_begin =
(context_start > 0)
? static_cast<int>(lod_g_level_0[i]) + context_start
: static_cast<int>(lod_g_level_0[i]);
input_row_end = static_cast<int>(lod_g_level_0[i + 1]);
Tensor out_g_t = out_g->Slice(static_cast<int>(lod_g_level_0[i]),
static_cast<int>(lod_g_level_0[i + 1]));
sequence_height = static_cast<int>(out_g_t.dims()[0]);
if (padding_trainable) {
// add up trainable data
out_g_t.Resize(framework::make_ddim(
{sequence_height * context_length, sequence_width}));
......@@ -287,29 +321,9 @@ class SequenceProjectGradKernel : public framework::OpKernel<T> {
w_sub_e.device(place) = w_sub_e + out_t_sub_e;
}
}
out_g_t.Resize(framework::make_ddim(
{sequence_height, context_length * sequence_width}));
}
if (in_g && input_row_begin < input_row_end) {
Tensor in_t = in_g->Slice(input_row_begin, input_row_end);
std::vector<int64_t> output_shape(
{sequence_height, 1, 1, context_length,
sequence_width}); // output_height, output_width,
// input_channels, filter_height, filter_width
out_g_t.Resize(framework::make_ddim(output_shape));
std::vector<int64_t> input_shape(
{1, input_row_end - input_row_begin,
sequence_width}); // input_channels, input_height, input_width
in_t.Resize(framework::make_ddim(input_shape));
col2im_ocf(context.device_context(), in_t, out_g_t,
/*stride_height*/ context_stride, /*stride_width*/ 0, up_pad,
down_pad);
}
out_g_t.Resize(framework::make_ddim(
{sequence_height, context_length * sequence_width}));
}
}
};
......
......@@ -15,8 +15,6 @@ class TestSeqProject(OpTest):
self.begin_pad = np.max([0, -self.context_start])
self.end_pad = np.max([0, self.context_start + self.context_length - 1])
self.total_pad = self.begin_pad + self.end_pad
# w = np.array(range(self.total_pad * self.input_size[1]))
# w.shape = self.total_pad, self.input_size[1]
w = np.random.uniform(
0.1, 1, [self.total_pad, self.input_size[1]]).astype('float32')
self.inputs = {
......@@ -73,6 +71,27 @@ class TestSeqProject(OpTest):
out[out_begin:out_end, j * self.input_size[1]:(j + 1) *
self.input_size[1]] += in_sub
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(
set(['X', 'PaddingData']), 'Out', max_relative_error=0.05)
def test_check_grad_no_filter(self):
self.check_grad(
['X'],
'Out',
max_relative_error=0.05,
no_grad_set=set(['PaddingData']))
def test_check_grad_no_input(self):
self.check_grad(
['PaddingData'],
'Out',
max_relative_error=0.05,
no_grad_set=set(['X']))
def init_test_case(self):
self.op_type = "sequence_project"
self.input_row = 11
......@@ -84,29 +103,8 @@ class TestSeqProject(OpTest):
self.input_size = [self.input_row, 23]
self.lod = [[0, 4, 5, 8, self.input_row]]
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(
set(['X', 'PaddingData']), 'Out', max_relative_error=0.05)
# def test_check_grad_no_filter(self):
# self.check_grad(
# ['X'],
# 'Out',
# max_relative_error=0.05,
# no_grad_set=set(['PaddingData']))
#
# def test_check_grad_no_input(self):
# self.check_grad(
# ['PaddingData'],
# 'Out',
# max_relative_error=0.05,
# no_grad_set=set(['X']))
class TestSeqProjectCases(TestSeqProject):
class TestSeqProjectCase1(TestSeqProject):
def init_test_case(self):
self.op_type = "sequence_project"
self.input_row = 25
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册