diff --git a/paddle/operators/math/sequence_project.h b/paddle/operators/math/sequence_project.h index 53b61ce16c99f27bb0612f5c6df4ff1c1276356a..3d8b5a2f397b560cc2eecbb861b2a39b69e9d5fa 100644 --- a/paddle/operators/math/sequence_project.h +++ b/paddle/operators/math/sequence_project.h @@ -90,108 +90,143 @@ template class SequenceProjectFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::LoDTensor* in, - const framework::LoDTensor* padding_data, - framework::LoDTensor* col, bool padding_trainable, + framework::LoDTensor& in, framework::LoDTensor& padding_data, + framework::LoDTensor& col, bool padding_trainable, int context_start, int context_length, int context_stride, - int up_pad, int down_pad) { - auto lod_level_0 = in->lod()[0]; + int up_pad, int down_pad, bool gradient, bool input_grad, + bool pad_grad) { + auto lod_level_0 = in.lod()[0]; paddle::operators::math::Im2ColFunctor< paddle::operators::math::ColFormat::kOCF, Place, float> im2col_ocf; + paddle::operators::math::Col2ImFunctor< + paddle::operators::math::ColFormat::kOCF, Place, float> + col2im_ocf; int input_row_begin, input_row_end; int sequence_height, sequence_width; - sequence_width = in->dims()[1]; - - for (int i = 0; i < static_cast(lod_level_0.size()) - 1; ++i) { - input_row_begin = (context_start > 0) - ? static_cast(lod_level_0[i]) + context_start - : static_cast(lod_level_0[i]); - input_row_end = static_cast(lod_level_0[i + 1]); - - framework::Tensor out_t = - col->Slice(static_cast(lod_level_0[i]), - static_cast(lod_level_0[i + 1])); - - sequence_height = static_cast(out_t.dims()[0]); - - if (input_row_begin < input_row_end) { - framework::Tensor in_t = in->Slice(input_row_begin, input_row_end); - - std::vector output_shape( - {sequence_height, 1, 1, context_length, - sequence_width}); // output_height, output_width, - // input_channels, filter_height, filter_width - - out_t.Resize(framework::make_ddim(output_shape)); - - std::vector input_shape( - {1, input_row_end - input_row_begin, - sequence_width}); // input_channels, input_height, input_width - in_t.Resize(framework::make_ddim(input_shape)); - - im2col_ocf(context, in_t, out_t, - /*stride_height*/ context_stride, /*stride_width*/ 1, up_pad, - down_pad, 0, 0); + sequence_width = in.dims()[1]; + input_grad = gradient && input_grad; + pad_grad = gradient && pad_grad; + + if (!gradient || input_grad) { + for (int i = 0; i < static_cast(lod_level_0.size()) - 1; ++i) { + input_row_begin = (context_start > 0) + ? static_cast(lod_level_0[i]) + context_start + : static_cast(lod_level_0[i]); + input_row_end = static_cast(lod_level_0[i + 1]); + + framework::Tensor out_t = + col.Slice(static_cast(lod_level_0[i]), + static_cast(lod_level_0[i + 1])); + + sequence_height = static_cast(out_t.dims()[0]); + + if (input_row_begin < input_row_end) { + framework::Tensor in_t = in.Slice(input_row_begin, input_row_end); + + std::vector output_shape( + {sequence_height, 1, 1, context_length, + sequence_width}); // output_height, output_width, + // input_channels, filter_height, filter_width + + out_t.Resize(framework::make_ddim(output_shape)); + + std::vector input_shape( + {1, input_row_end - input_row_begin, + sequence_width}); // input_channels, input_height, input_width + in_t.Resize(framework::make_ddim(input_shape)); + + if (gradient) { + col2im_ocf(context, in_t, out_t, + /*stride_height*/ context_stride, /*stride_width*/ 1, + up_pad, down_pad, 0, 0); + } else { + im2col_ocf(context, in_t, out_t, + /*stride_height*/ context_stride, /*stride_width*/ 1, + up_pad, down_pad, 0, 0); + } + out_t.Resize(framework::make_ddim( + {sequence_height, context_length * sequence_width})); + } } - + } + if (!gradient || pad_grad) { if (padding_trainable) { - // add up trainable data - out_t.Resize(framework::make_ddim( - {sequence_height * context_length, sequence_width})); - - if (up_pad > 0) { // add up pad - int padding_rows = std::min( - up_pad, static_cast(lod_level_0[i + 1] - lod_level_0[i])); - - for (int k = 0; k < padding_rows; ++k) { - int padding_size = - k + context_length < up_pad ? context_length : up_pad - k; - framework::Tensor out_t_sub = out_t.Slice( - k * context_length, k * context_length + padding_size); - framework::Tensor w_sub = padding_data->Slice(k, k + padding_size); - // in this block, using EigenVector::Flatten is ok too. - auto out_t_sub_e = EigenMatrix::From(out_t_sub); - auto w_sub_e = EigenMatrix::From(w_sub); - out_t_sub_e.device(*context.GetEigenDevice()) = w_sub_e; + for (int i = 0; i < static_cast(lod_level_0.size()) - 1; ++i) { + framework::Tensor out_t = + col.Slice(static_cast(lod_level_0[i]), + static_cast(lod_level_0[i + 1])); + + sequence_height = static_cast(out_t.dims()[0]); + + // add up trainable data + out_t.Resize(framework::make_ddim( + {sequence_height * context_length, sequence_width})); + + if (up_pad > 0) { // add up pad + int padding_rows = std::min( + up_pad, static_cast(lod_level_0[i + 1] - lod_level_0[i])); + + for (int k = 0; k < padding_rows; ++k) { + int padding_size = + k + context_length < up_pad ? context_length : up_pad - k; + framework::Tensor out_t_sub = out_t.Slice( + k * context_length, k * context_length + padding_size); + framework::Tensor w_sub = padding_data.Slice(k, k + padding_size); + // in this block, using EigenVector::Flatten is ok too. + auto out_t_sub_e = EigenMatrix::From(out_t_sub); + auto w_sub_e = EigenMatrix::From(w_sub); + if (gradient) { + w_sub_e.device(*context.GetEigenDevice()) = + w_sub_e + out_t_sub_e; + } else { + out_t_sub_e.device(*context.GetEigenDevice()) = w_sub_e; + } + } } - } - if (down_pad > 0) { // add down pad - int down_pad_begin_row = - std::max(0, - (sequence_height - context_start - context_length) + 1) + - 1; - int padding_begin = std::max(0, context_start - sequence_height); - int padding_size = - sequence_height - context_start >= context_length - ? 1 - : context_length - (sequence_height - context_start); - if (context_start >= sequence_height) padding_size = context_length; - int padding_idx = padding_begin; - for (int t = 0; t + down_pad_begin_row <= sequence_height; - ++t, ++padding_size) { + if (down_pad > 0) { // add down pad + int down_pad_begin_row = + std::max( + 0, (sequence_height - context_start - context_length) + 1) + + 1; + int padding_begin = std::max(0, context_start - sequence_height); + int padding_size = + sequence_height - context_start >= context_length + ? 1 + : context_length - (sequence_height - context_start); if (context_start >= sequence_height) padding_size = context_length; - if (padding_size > context_length) { - padding_size = context_length; - padding_idx++; + int padding_idx = padding_begin; + for (int t = 0; t + down_pad_begin_row <= sequence_height; + ++t, ++padding_size) { + if (context_start >= sequence_height) + padding_size = context_length; + if (padding_size > context_length) { + padding_size = context_length; + padding_idx++; + } + if (padding_begin > 0 || sequence_height == context_start) + padding_idx = padding_begin + t; + framework::Tensor out_t_sub = out_t.Slice( + (down_pad_begin_row + t) * context_length - padding_size, + (down_pad_begin_row + t) * context_length); + framework::Tensor w_sub = padding_data.Slice( + up_pad + padding_idx, up_pad + padding_idx + padding_size); + auto out_t_sub_e = EigenMatrix::From(out_t_sub); + auto w_sub_e = EigenMatrix::From(w_sub); + if (gradient) { + w_sub_e.device(*context.GetEigenDevice()) = + w_sub_e + out_t_sub_e; + } else { + out_t_sub_e.device(*context.GetEigenDevice()) = w_sub_e; + } } - if (padding_begin > 0 || sequence_height == context_start) - padding_idx = padding_begin + t; - framework::Tensor out_t_sub = out_t.Slice( - (down_pad_begin_row + t) * context_length - padding_size, - (down_pad_begin_row + t) * context_length); - framework::Tensor w_sub = padding_data->Slice( - up_pad + padding_idx, up_pad + padding_idx + padding_size); - auto out_t_sub_e = EigenMatrix::From(out_t_sub); - auto w_sub_e = EigenMatrix::From(w_sub); - out_t_sub_e.device(*context.GetEigenDevice()) = w_sub_e; } + out_t.Resize(framework::make_ddim( + {sequence_height, context_length * sequence_width})); } } - out_t.Resize(framework::make_ddim( - {sequence_height, context_length * sequence_width})); } } }; diff --git a/paddle/operators/sequence_conv_op.h b/paddle/operators/sequence_conv_op.h index 4735fa4a5fa249e28892afd46960c2a81ab42f07..3525bb752b416db974d3bf8ea77e1b00b3c6e600 100644 --- a/paddle/operators/sequence_conv_op.h +++ b/paddle/operators/sequence_conv_op.h @@ -39,6 +39,7 @@ class SequenceConvKernel : public framework::OpKernel { auto filter = *context.Input("Filter"); out->mutable_data(context.GetPlace()); + // out->set_lod(in->lod()); int context_start = context.Attr("context_start"); int context_length = context.Attr("context_length"); @@ -71,10 +72,12 @@ class SequenceConvKernel : public framework::OpKernel { paddle::operators::math::SequenceProjectFunctor seq_project_functor; + LoDTensor* input = const_cast(in); + LoDTensor* pad_data = const_cast(padding_data); - seq_project_functor(context.device_context(), in, padding_data, &col, + seq_project_functor(context.device_context(), *input, *pad_data, col, padding_trainable, context_start, context_length, - context_stride, up_pad, down_pad); + context_stride, up_pad, down_pad, false, false, false); filter.Resize(framework::make_ddim({context_length * sequence_width, 1})); math::matmul(context.device_context(), col, false, filter, false, @@ -95,8 +98,6 @@ class SequenceConvGradKernel : public framework::OpKernel { auto* in = context.Input("X"); auto* filter = context.Input("Filter"); - auto place = context.GetEigenDevice(); - int context_start = context.Attr("context_start"); int context_length = context.Attr("context_length"); int context_stride = context.Attr("context_stride"); @@ -109,10 +110,7 @@ class SequenceConvGradKernel : public framework::OpKernel { int up_pad = std::max(0, -context_start); int down_pad = std::max(0, context_start + context_length - 1); - int sequence_height, sequence_width; - int input_row_begin, input_row_end; - - sequence_width = static_cast(in->dims()[1]); + int sequence_width = static_cast(in->dims()[1]); // use col_shape in the im2col calculation framework::DDim col_shape = {in->dims()[0], @@ -129,50 +127,19 @@ class SequenceConvGradKernel : public framework::OpKernel { math::matmul(context.device_context(), *out_g, false, *filter, true, T(1.0), &col, T(1.0)); } + paddle::operators::math::SequenceProjectFunctor + seq_project_functor; if (in_g) { in_g->mutable_data(context.GetPlace()); + in_g->set_lod(in->lod()); math::SetConstant functor; functor(context.device_context(), in_g, 0); - paddle::operators::math::Col2ImFunctor< - paddle::operators::math::ColFormat::kOCF, Place, float> - col2im_ocf; - - for (int i = 0; i < static_cast(lod_g_level_0.size()) - 1; ++i) { - input_row_begin = - (context_start > 0) - ? static_cast(lod_g_level_0[i]) + context_start - : static_cast(lod_g_level_0[i]); - input_row_end = static_cast(lod_g_level_0[i + 1]); - - Tensor col_t = col.Slice(static_cast(lod_g_level_0[i]), - static_cast(lod_g_level_0[i + 1])); - - sequence_height = static_cast(col_t.dims()[0]); - - if (input_row_begin < input_row_end) { - Tensor in_t = in_g->Slice(input_row_begin, input_row_end); - - std::vector output_shape( - {sequence_height, 1, 1, context_length, - sequence_width}); // output_height, output_width, - // input_channels, filter_height, filter_width - col_t.Resize(framework::make_ddim(output_shape)); - - std::vector input_shape( - {1, input_row_end - input_row_begin, - sequence_width}); // input_channels, input_height, input_width - in_t.Resize(framework::make_ddim(input_shape)); - - col2im_ocf(context.device_context(), in_t, col_t, - /*stride_height*/ context_stride, /*stride_width*/ 1, - up_pad, down_pad, 0, 0); - } - col_t.Resize(framework::make_ddim( - {sequence_height, context_length * sequence_width})); - } + seq_project_functor(context.device_context(), *in_g, *padding_data_g, col, + padding_trainable, context_start, context_length, + context_stride, up_pad, down_pad, true, true, false); } if (padding_trainable && padding_data_g) { @@ -181,66 +148,10 @@ class SequenceConvGradKernel : public framework::OpKernel { math::SetConstant functor; functor(context.device_context(), padding_data_g, 0); - for (int i = 0; i < static_cast(lod_g_level_0.size()) - 1; ++i) { - Tensor col_t = col.Slice(static_cast(lod_g_level_0[i]), - static_cast(lod_g_level_0[i + 1])); - - sequence_height = static_cast(col_t.dims()[0]); - - col_t.Resize(framework::make_ddim( - {sequence_height * context_length, sequence_width})); - - if (up_pad > 0) { // add up pad - int padding_rows = std::min( - up_pad, - static_cast(lod_g_level_0[i + 1] - lod_g_level_0[i])); - - for (int k = 0; k < padding_rows; ++k) { - int padding_size = - k + context_length < up_pad ? context_length : up_pad - k; - Tensor out_t_sub = col_t.Slice(k * context_length, - k * context_length + padding_size); - Tensor w_sub = padding_data_g->Slice(k, k + padding_size); - // in this block, using EigenVector::Flatten is ok too. - auto out_t_sub_e = EigenMatrix::From(out_t_sub); - auto w_sub_e = EigenMatrix::From(w_sub); - w_sub_e.device(place) = w_sub_e + out_t_sub_e; - } - } - if (down_pad > 0) { // add down pad - int down_pad_begin_row = - std::max(0, - (sequence_height - context_start - context_length) + 1) + - 1; - int padding_begin = std::max(0, context_start - sequence_height); - int padding_size = - sequence_height - context_start >= context_length - ? 1 - : context_length - (sequence_height - context_start); - if (context_start >= sequence_height) padding_size = context_length; - int padding_idx = padding_begin; - for (int t = 0; t + down_pad_begin_row <= sequence_height; - ++t, ++padding_size) { - if (context_start >= sequence_height) padding_size = context_length; - if (padding_size > context_length) { - padding_size = context_length; - padding_idx++; - } - if (padding_begin > 0 || sequence_height == context_start) - padding_idx = padding_begin + t; - Tensor out_t_sub = col_t.Slice( - (down_pad_begin_row + t) * context_length - padding_size, - (down_pad_begin_row + t) * context_length); - Tensor w_sub = padding_data_g->Slice( - up_pad + padding_idx, up_pad + padding_idx + padding_size); - auto out_t_sub_e = EigenMatrix::From(out_t_sub); - auto w_sub_e = EigenMatrix::From(w_sub); - w_sub_e.device(place) = w_sub_e + out_t_sub_e; - } - } - col_t.Resize(framework::make_ddim( - {sequence_height, context_length * sequence_width})); - } + LoDTensor* input = const_cast(in); + seq_project_functor(context.device_context(), *input, *padding_data_g, + col, padding_trainable, context_start, context_length, + context_stride, up_pad, down_pad, true, false, true); } if (filter_g) { @@ -259,12 +170,13 @@ class SequenceConvGradKernel : public framework::OpKernel { sequence_width = static_cast(in->dims()[1]); - paddle::operators::math::SequenceProjectFunctor - seq_project_functor; + LoDTensor* input = const_cast(in); + LoDTensor* pad_data = const_cast(padding_data); - seq_project_functor(context.device_context(), in, padding_data, &col, + seq_project_functor(context.device_context(), *input, *pad_data, col, padding_trainable, context_start, context_length, - context_stride, up_pad, down_pad); + context_stride, up_pad, down_pad, false, false, + false); filter_grad_.Resize( framework::make_ddim({context_length * sequence_width, 1}));