diff --git a/paddle/operators/sequence_project_op.cc b/paddle/operators/sequence_project_op.cc index 8baae0f1d8b3ba12eeba02aa00049a92dd443da2..800d0b6563d8c90c9ff99f537b630426f021245d 100644 --- a/paddle/operators/sequence_project_op.cc +++ b/paddle/operators/sequence_project_op.cc @@ -27,6 +27,10 @@ class SequenceProjectOp : public framework::OperatorWithKernel { "Input(X) of SequenceProjectOp should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) of SequenceProjectOp should not be null."); + // PaddingData mast be not empty. + PADDLE_ENFORCE( + ctx->HasInput("PaddingData"), + "Output(PaddingData) of SequenceProjectOp should not be null."); auto in_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE(in_dims.size() == 2, "Input(X) should be 2-D tensor."); @@ -35,9 +39,6 @@ class SequenceProjectOp : public framework::OperatorWithKernel { int context_start = ctx->Attrs().Get("context_start"); if (padding_trainable) { - PADDLE_ENFORCE( - ctx->HasInput("PaddingData"), - "Output(PaddingData) of SequenceProjectOp should not be null."); framework::DDim padding_dim = ctx->GetInputDim("PaddingData"); int up_pad = std::max(0, -context_start); int down_pad = std::max(0, context_start + context_length - 1); @@ -71,17 +72,15 @@ class SequenceProjectGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), "Gradient of Out should not be null."); PADDLE_ENFORCE(ctx->HasInput("X"), "The input X should not be null."); - PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), - "Gradient of input(X@GRAD) should not be null."); - if (ctx->Attrs().Get("padding_trainable")) { - PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("PaddingData")), - "Output(PaddingData@GRAD) of SequenceProjectGradOp should " - "not be null."); + if (ctx->Attrs().Get("padding_trainable") && + ctx->HasOutput(framework::GradVarName("PaddingData"))) { auto padding_dims = ctx->GetInputDim("PaddingData"); ctx->SetOutputDim(framework::GradVarName("PaddingData"), padding_dims); } - ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); + if (ctx->HasOutput(framework::GradVarName("X"))) { + ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); + } } }; diff --git a/paddle/operators/sequence_project_op.h b/paddle/operators/sequence_project_op.h index b31768b55833de8e90ff36739b9aee6c17398bbf..77c5e853851f7164e0e3b4d72bafbe0a67af5c92 100644 --- a/paddle/operators/sequence_project_op.h +++ b/paddle/operators/sequence_project_op.h @@ -39,7 +39,6 @@ class SequenceProjectKernel : public framework::OpKernel { auto* out = context.Output("Out"); out->mutable_data(context.GetPlace()); - // need discuss, is it necessary to set zeros ? // Because if padding_trainable is false, padding data should be zeros. auto temp = framework::EigenVector::Flatten(*out); temp.device(context.GetEigenDevice()) = @@ -176,12 +175,9 @@ class SequenceProjectGradKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto* out_g = context.Input(framework::GradVarName("Out")); auto* in_g = context.Output(framework::GradVarName("X")); + auto* padding_data_g = + context.Output(framework::GradVarName("PaddingData")); auto* in = context.Input("X"); - in_g->mutable_data(context.GetPlace()); - if (in_g) { - math::SetConstant functor; - functor(context.device_context(), in_g, 0); - } auto place = context.GetEigenDevice(); int context_start = context.Attr("context_start"); @@ -193,49 +189,87 @@ class SequenceProjectGradKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(in->lod().size(), 1UL, "Only support one level sequence now."); auto lod_g_level_0 = in->lod()[0]; - int64_t input_width = in_g->dims()[1]; + + int64_t input_width = in->dims()[1]; int64_t output_width = out_g->dims()[1]; int64_t padding_width = 0; + PADDLE_ENFORCE(input_width * context_length == output_width, "Input size and pooling size should be consistent."); - LoDTensor* padding_data_g = nullptr; - if (padding_trainable) { - padding_data_g = - context.Output(framework::GradVarName("PaddingData")); - padding_data_g->mutable_data(context.GetPlace()); - PADDLE_ENFORCE_EQ(padding_data_g->dims().size(), 2UL, - "Only support one level sequence now."); - padding_width = padding_data_g->dims()[1]; - PADDLE_ENFORCE(padding_width == input_width, - "Input size and pooling size should be consistent."); - math::SetConstant functor; - functor(context.device_context(), padding_data_g, 0); - } - int up_pad = std::max(0, -context_start); int down_pad = std::max(0, context_start + context_length - 1); int sequence_height, sequence_width; int input_row_begin, input_row_end; + sequence_width = static_cast(in->dims()[1]); + paddle::operators::math::Col2ImFunctor< paddle::operators::math::ColFormat::kOCF, Place, float> col2im_ocf; - for (int i = 0; i < static_cast(lod_g_level_0.size()) - 1; ++i) { - input_row_begin = (context_start > 0) - ? static_cast(lod_g_level_0[i]) + context_start - : static_cast(lod_g_level_0[i]); - input_row_end = static_cast(lod_g_level_0[i + 1]); + if (in_g) { + in_g->mutable_data(context.GetPlace()); + math::SetConstant functor; + functor(context.device_context(), in_g, 0); - Tensor out_g_t = out_g->Slice(static_cast(lod_g_level_0[i]), - static_cast(lod_g_level_0[i + 1])); + for (int i = 0; i < static_cast(lod_g_level_0.size()) - 1; ++i) { + input_row_begin = + (context_start > 0) + ? static_cast(lod_g_level_0[i]) + context_start + : static_cast(lod_g_level_0[i]); + input_row_end = static_cast(lod_g_level_0[i + 1]); - sequence_height = static_cast(out_g_t.dims()[0]); - sequence_width = static_cast(in_g->dims()[1]); + Tensor out_g_t = out_g->Slice(static_cast(lod_g_level_0[i]), + static_cast(lod_g_level_0[i + 1])); + + sequence_height = static_cast(out_g_t.dims()[0]); + + if (input_row_begin < input_row_end) { + Tensor in_t = in_g->Slice(input_row_begin, input_row_end); + + std::vector output_shape( + {sequence_height, 1, 1, context_length, + sequence_width}); // output_height, output_width, + // input_channels, filter_height, filter_width + out_g_t.Resize(framework::make_ddim(output_shape)); + + std::vector input_shape( + {1, input_row_end - input_row_begin, + sequence_width}); // input_channels, input_height, input_width + in_t.Resize(framework::make_ddim(input_shape)); + + col2im_ocf(context.device_context(), in_t, out_g_t, + /*stride_height*/ context_stride, /*stride_width*/ 0, + up_pad, down_pad); + } + out_g_t.Resize(framework::make_ddim( + {sequence_height, context_length * sequence_width})); + } + } + + if (padding_trainable && padding_data_g) { + padding_data_g->mutable_data(context.GetPlace()); + PADDLE_ENFORCE_EQ(padding_data_g->dims().size(), 2UL, + "Only support one level sequence now."); + padding_width = padding_data_g->dims()[1]; + PADDLE_ENFORCE(padding_width == input_width, + "Input size and pooling size should be consistent."); + math::SetConstant functor; + functor(context.device_context(), padding_data_g, 0); + + for (int i = 0; i < static_cast(lod_g_level_0.size()) - 1; ++i) { + input_row_begin = + (context_start > 0) + ? static_cast(lod_g_level_0[i]) + context_start + : static_cast(lod_g_level_0[i]); + input_row_end = static_cast(lod_g_level_0[i + 1]); + + Tensor out_g_t = out_g->Slice(static_cast(lod_g_level_0[i]), + static_cast(lod_g_level_0[i + 1])); + + sequence_height = static_cast(out_g_t.dims()[0]); - if (padding_trainable) { - // add up trainable data out_g_t.Resize(framework::make_ddim( {sequence_height * context_length, sequence_width})); @@ -287,29 +321,9 @@ class SequenceProjectGradKernel : public framework::OpKernel { w_sub_e.device(place) = w_sub_e + out_t_sub_e; } } + out_g_t.Resize(framework::make_ddim( + {sequence_height, context_length * sequence_width})); } - - if (in_g && input_row_begin < input_row_end) { - Tensor in_t = in_g->Slice(input_row_begin, input_row_end); - - std::vector output_shape( - {sequence_height, 1, 1, context_length, - sequence_width}); // output_height, output_width, - // input_channels, filter_height, filter_width - out_g_t.Resize(framework::make_ddim(output_shape)); - - std::vector input_shape( - {1, input_row_end - input_row_begin, - sequence_width}); // input_channels, input_height, input_width - in_t.Resize(framework::make_ddim(input_shape)); - - col2im_ocf(context.device_context(), in_t, out_g_t, - /*stride_height*/ context_stride, /*stride_width*/ 0, up_pad, - down_pad); - } - - out_g_t.Resize(framework::make_ddim( - {sequence_height, context_length * sequence_width})); } } }; diff --git a/python/paddle/v2/framework/tests/test_seq_project.py b/python/paddle/v2/framework/tests/test_seq_project.py index c783aff51628c21b4d97f80848b6f76f4c0e1664..2bbdadbc8f0fbe995baa058a02e29ee6165a073d 100644 --- a/python/paddle/v2/framework/tests/test_seq_project.py +++ b/python/paddle/v2/framework/tests/test_seq_project.py @@ -15,8 +15,6 @@ class TestSeqProject(OpTest): self.begin_pad = np.max([0, -self.context_start]) self.end_pad = np.max([0, self.context_start + self.context_length - 1]) self.total_pad = self.begin_pad + self.end_pad - # w = np.array(range(self.total_pad * self.input_size[1])) - # w.shape = self.total_pad, self.input_size[1] w = np.random.uniform( 0.1, 1, [self.total_pad, self.input_size[1]]).astype('float32') self.inputs = { @@ -73,6 +71,27 @@ class TestSeqProject(OpTest): out[out_begin:out_end, j * self.input_size[1]:(j + 1) * self.input_size[1]] += in_sub + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad( + set(['X', 'PaddingData']), 'Out', max_relative_error=0.05) + + def test_check_grad_no_filter(self): + self.check_grad( + ['X'], + 'Out', + max_relative_error=0.05, + no_grad_set=set(['PaddingData'])) + + def test_check_grad_no_input(self): + self.check_grad( + ['PaddingData'], + 'Out', + max_relative_error=0.05, + no_grad_set=set(['X'])) + def init_test_case(self): self.op_type = "sequence_project" self.input_row = 11 @@ -84,29 +103,8 @@ class TestSeqProject(OpTest): self.input_size = [self.input_row, 23] self.lod = [[0, 4, 5, 8, self.input_row]] - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - self.check_grad( - set(['X', 'PaddingData']), 'Out', max_relative_error=0.05) - - # def test_check_grad_no_filter(self): - # self.check_grad( - # ['X'], - # 'Out', - # max_relative_error=0.05, - # no_grad_set=set(['PaddingData'])) - # - # def test_check_grad_no_input(self): - # self.check_grad( - # ['PaddingData'], - # 'Out', - # max_relative_error=0.05, - # no_grad_set=set(['X'])) - -class TestSeqProjectCases(TestSeqProject): +class TestSeqProjectCase1(TestSeqProject): def init_test_case(self): self.op_type = "sequence_project" self.input_row = 25