diff --git a/paddle/operators/math/context_project.h b/paddle/operators/math/context_project.h index b7466d206e700ff88a974ef1f8eea9c160d31497..7d9cdab2cf397e7181ce0d0df091c653c22d8cc9 100644 --- a/paddle/operators/math/context_project.h +++ b/paddle/operators/math/context_project.h @@ -31,6 +31,7 @@ using EigenMatrix = framework::EigenMatrix; * a sequence. The i-th row of the output is the concatenation of * context_length rows of the input. The context_length rows are the * consecutive rows from the i+shift_start row. + * ContextProjectGradFunctor is the inverse process of ContextProjectFunctor. * \param in Input data. * \param Shape The shape of Input data, @@ -85,16 +86,126 @@ template class ContextProjectFunctor { public: void operator()(const platform::DeviceContext& context, - framework::LoDTensor& in, framework::Tensor& padding_data, - framework::Tensor& col, bool padding_trainable, - int context_start, int context_length, int context_stride, - int up_pad, int down_pad, bool gradient, bool input_grad, - bool pad_grad) { + const framework::LoDTensor& in, + const framework::Tensor& padding_data, framework::Tensor& col, + bool padding_trainable, int context_start, int context_length, + int context_stride, int up_pad, int down_pad) { auto lod_level_0 = in.lod()[0]; paddle::operators::math::Im2ColFunctor< paddle::operators::math::ColFormat::kOCF, Place, float> im2col_ocf; + + int input_row_begin, input_row_end; + int sequence_height, sequence_width; + sequence_width = in.dims()[1]; + + for (int i = 0; i < static_cast(lod_level_0.size()) - 1; ++i) { + input_row_begin = (context_start > 0) + ? static_cast(lod_level_0[i]) + context_start + : static_cast(lod_level_0[i]); + input_row_end = static_cast(lod_level_0[i + 1]); + + framework::Tensor out_t = col.Slice(static_cast(lod_level_0[i]), + static_cast(lod_level_0[i + 1])); + + sequence_height = static_cast(out_t.dims()[0]); + + if (input_row_begin < input_row_end) { + framework::Tensor in_t = in.Slice(input_row_begin, input_row_end); + + std::vector output_shape( + {sequence_height, 1, 1, context_length, + sequence_width}); // output_height, output_width, + // input_channels, filter_height, filter_width + + out_t.Resize(framework::make_ddim(output_shape)); + + std::vector input_shape( + {1, input_row_end - input_row_begin, + sequence_width}); // input_channels, input_height, input_width + in_t.Resize(framework::make_ddim(input_shape)); + + im2col_ocf(context, in_t, out_t, + /*stride_height*/ context_stride, /*stride_width*/ 1, up_pad, + down_pad, 0, 0); + out_t.Resize({sequence_height, context_length * sequence_width}); + } + } + if (padding_trainable) { + for (int i = 0; i < static_cast(lod_level_0.size()) - 1; ++i) { + framework::Tensor out_t = + col.Slice(static_cast(lod_level_0[i]), + static_cast(lod_level_0[i + 1])); + + sequence_height = static_cast(out_t.dims()[0]); + + // add up trainable data + out_t.Resize({sequence_height * context_length, sequence_width}); + + if (up_pad > 0) { // add up pad + int padding_rows = std::min( + up_pad, static_cast(lod_level_0[i + 1] - lod_level_0[i])); + + for (int k = 0; k < padding_rows; ++k) { + int padding_size = + k + context_length < up_pad ? context_length : up_pad - k; + framework::Tensor out_t_sub = out_t.Slice( + k * context_length, k * context_length + padding_size); + framework::Tensor w_sub = padding_data.Slice(k, k + padding_size); + // in this block, using EigenVector::Flatten is ok too. + auto out_t_sub_e = EigenMatrix::From(out_t_sub); + auto w_sub_e = EigenMatrix::From(w_sub); + out_t_sub_e.device(*context.GetEigenDevice()) = w_sub_e; + } + } + if (down_pad > 0) { // add down pad + int down_pad_begin_row = + std::max(0, + (sequence_height - context_start - context_length) + 1) + + 1; + int padding_begin = std::max(0, context_start - sequence_height); + int padding_size = + sequence_height - context_start >= context_length + ? 1 + : context_length - (sequence_height - context_start); + if (context_start >= sequence_height) padding_size = context_length; + int padding_idx = padding_begin; + for (int t = 0; t + down_pad_begin_row <= sequence_height; + ++t, ++padding_size) { + if (context_start >= sequence_height) padding_size = context_length; + if (padding_size > context_length) { + padding_size = context_length; + padding_idx++; + } + if (padding_begin > 0 || sequence_height == context_start) + padding_idx = padding_begin + t; + framework::Tensor out_t_sub = out_t.Slice( + (down_pad_begin_row + t) * context_length - padding_size, + (down_pad_begin_row + t) * context_length); + framework::Tensor w_sub = padding_data.Slice( + up_pad + padding_idx, up_pad + padding_idx + padding_size); + auto out_t_sub_e = EigenMatrix::From(out_t_sub); + auto w_sub_e = EigenMatrix::From(w_sub); + out_t_sub_e.device(*context.GetEigenDevice()) = w_sub_e; + } + } + out_t.Resize({sequence_height, context_length * sequence_width}); + } + } + } +}; + +template +class ContextProjectGradFunctor { + public: + void operator()(const platform::DeviceContext& context, + framework::LoDTensor& in, framework::Tensor& padding_data, + framework::Tensor& col, bool padding_trainable, + int context_start, int context_length, int context_stride, + int up_pad, int down_pad, bool input_grad, bool pad_grad) { + auto lod_level_0 = in.lod()[0]; + paddle::operators::math::Col2ImFunctor< paddle::operators::math::ColFormat::kOCF, Place, float> col2im_ocf; @@ -102,10 +213,8 @@ class ContextProjectFunctor { int input_row_begin, input_row_end; int sequence_height, sequence_width; sequence_width = in.dims()[1]; - input_grad = gradient && input_grad; - pad_grad = gradient && pad_grad; - if (!gradient || input_grad) { + if (input_grad) { for (int i = 0; i < static_cast(lod_level_0.size()) - 1; ++i) { input_row_begin = (context_start > 0) ? static_cast(lod_level_0[i]) + context_start @@ -133,20 +242,14 @@ class ContextProjectFunctor { sequence_width}); // input_channels, input_height, input_width in_t.Resize(framework::make_ddim(input_shape)); - if (gradient) { - col2im_ocf(context, in_t, out_t, - /*stride_height*/ context_stride, /*stride_width*/ 1, - up_pad, down_pad, 0, 0); - } else { - im2col_ocf(context, in_t, out_t, - /*stride_height*/ context_stride, /*stride_width*/ 1, - up_pad, down_pad, 0, 0); - } + col2im_ocf(context, in_t, out_t, + /*stride_height*/ context_stride, /*stride_width*/ 1, + up_pad, down_pad, 0, 0); out_t.Resize({sequence_height, context_length * sequence_width}); } } } - if (!gradient || pad_grad) { + if (pad_grad) { if (padding_trainable) { for (int i = 0; i < static_cast(lod_level_0.size()) - 1; ++i) { framework::Tensor out_t = @@ -154,11 +257,9 @@ class ContextProjectFunctor { static_cast(lod_level_0[i + 1])); sequence_height = static_cast(out_t.dims()[0]); - - // add up trainable data out_t.Resize({sequence_height * context_length, sequence_width}); - if (up_pad > 0) { // add up pad + if (up_pad > 0) { int padding_rows = std::min( up_pad, static_cast(lod_level_0[i + 1] - lod_level_0[i])); @@ -171,15 +272,11 @@ class ContextProjectFunctor { // in this block, using EigenVector::Flatten is ok too. auto out_t_sub_e = EigenMatrix::From(out_t_sub); auto w_sub_e = EigenMatrix::From(w_sub); - if (gradient) { - w_sub_e.device(*context.GetEigenDevice()) = - w_sub_e + out_t_sub_e; - } else { - out_t_sub_e.device(*context.GetEigenDevice()) = w_sub_e; - } + w_sub_e.device(*context.GetEigenDevice()) = + w_sub_e + out_t_sub_e; } } - if (down_pad > 0) { // add down pad + if (down_pad > 0) { int down_pad_begin_row = std::max( 0, (sequence_height - context_start - context_length) + 1) + @@ -208,12 +305,8 @@ class ContextProjectFunctor { up_pad + padding_idx, up_pad + padding_idx + padding_size); auto out_t_sub_e = EigenMatrix::From(out_t_sub); auto w_sub_e = EigenMatrix::From(w_sub); - if (gradient) { - w_sub_e.device(*context.GetEigenDevice()) = - w_sub_e + out_t_sub_e; - } else { - out_t_sub_e.device(*context.GetEigenDevice()) = w_sub_e; - } + w_sub_e.device(*context.GetEigenDevice()) = + w_sub_e + out_t_sub_e; } } out_t.Resize({sequence_height, context_length * sequence_width}); diff --git a/paddle/operators/sequence_conv_op.h b/paddle/operators/sequence_conv_op.h index c502601b3800e3ed45680cd51b2c9af8e0761a16..5727238c0d49cdbf6a9cfa988d3280323fa4ebb3 100644 --- a/paddle/operators/sequence_conv_op.h +++ b/paddle/operators/sequence_conv_op.h @@ -65,12 +65,10 @@ class SequenceConvKernel : public framework::OpKernel { paddle::operators::math::ContextProjectFunctor seq_project_functor; - LoDTensor* input = const_cast(in); - Tensor* pad_data = const_cast(padding_data); - seq_project_functor(context.device_context(), *input, *pad_data, col, + seq_project_functor(context.device_context(), *in, *padding_data, col, padding_trainable, context_start, context_length, - context_stride, up_pad, down_pad, false, false, false); + context_stride, up_pad, down_pad); math::matmul(context.device_context(), col, false, filter, false, static_cast(1.0), out, static_cast(0.0)); @@ -117,15 +115,18 @@ class SequenceConvGradKernel : public framework::OpKernel { } paddle::operators::math::ContextProjectFunctor seq_project_functor; + paddle::operators::math::ContextProjectGradFunctor + seq_project_grad_functor; if (in_g) { in_g->mutable_data(context.GetPlace()); in_g->set_lod(in->lod()); set_zero(context.device_context(), in_g, static_cast(0)); - seq_project_functor(context.device_context(), *in_g, *padding_data_g, col, - padding_trainable, context_start, context_length, - context_stride, up_pad, down_pad, true, true, false); + seq_project_grad_functor(context.device_context(), *in_g, *padding_data_g, + col, padding_trainable, context_start, + context_length, context_stride, up_pad, down_pad, + true, false); } if (padding_trainable && padding_data_g) { @@ -133,9 +134,10 @@ class SequenceConvGradKernel : public framework::OpKernel { set_zero(context.device_context(), padding_data_g, static_cast(0)); LoDTensor* input = const_cast(in); - seq_project_functor(context.device_context(), *input, *padding_data_g, - col, padding_trainable, context_start, context_length, - context_stride, up_pad, down_pad, true, false, true); + seq_project_grad_functor(context.device_context(), *input, + *padding_data_g, col, padding_trainable, + context_start, context_length, context_stride, + up_pad, down_pad, false, true); } if (filter_g) { @@ -150,15 +152,9 @@ class SequenceConvGradKernel : public framework::OpKernel { padding_data = context.Input("PaddingData"); } - sequence_width = static_cast(in->dims()[1]); - - LoDTensor* input = const_cast(in); - Tensor* pad_data = const_cast(padding_data); - - seq_project_functor(context.device_context(), *input, *pad_data, col, + seq_project_functor(context.device_context(), *in, *padding_data, col, padding_trainable, context_start, context_length, - context_stride, up_pad, down_pad, false, false, - false); + context_stride, up_pad, down_pad); math::matmul(context.device_context(), col, true, out_grad, false, T(1.0), &filter_grad, T(1.0));