From b9edcc4a1b4f2c12e878169b21abcb4b4aab3fae Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Fri, 27 Oct 2017 11:12:15 +0800
Subject: [PATCH] sss

---
 paddle/operators/math/context_project.h | 161 +++++++++++++++++++-----
 paddle/operators/sequence_conv_op.h     |  32 +++--
 2 files changed, 141 insertions(+), 52 deletions(-)
diff --git a/paddle/operators/math/context_project.h b/paddle/operators/math/context_project.h
index b7466d206..7d9cdab2c 100644
--- a/paddle/operators/math/context_project.h
+++ b/paddle/operators/math/context_project.h
@@ -31,6 +31,7 @@ using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
  * a sequence. The i-th row of the output is the concatenation of
  * context_length rows of the input. The context_length rows are the
  * consecutive rows from the i+shift_start row.
+ * ContextProjectGradFunctor is the inverse process of ContextProjectFunctor.
 
  * \param in            Input data.
  * \param Shape         The shape of Input data,
@@ -85,16 +86,126 @@ template <typename Place, typename T>
 class ContextProjectFunctor {
  public:
   void operator()(const platform::DeviceContext& context,
-                  framework::LoDTensor& in, framework::Tensor& padding_data,
-                  framework::Tensor& col, bool padding_trainable,
-                  int context_start, int context_length, int context_stride,
-                  int up_pad, int down_pad, bool gradient, bool input_grad,
-                  bool pad_grad) {
+                  const framework::LoDTensor& in,
+                  const framework::Tensor& padding_data, framework::Tensor& col,
+                  bool padding_trainable, int context_start, int context_length,
+                  int context_stride, int up_pad, int down_pad) {
     auto lod_level_0 = in.lod()[0];
 
     paddle::operators::math::Im2ColFunctor<
         paddle::operators::math::ColFormat::kOCF, Place, float>
         im2col_ocf;
+
+    int input_row_begin, input_row_end;
+    int sequence_height, sequence_width;
+    sequence_width = in.dims()[1];
+
+    for (int i = 0; i < static_cast<int>(lod_level_0.size()) - 1; ++i) {
+      input_row_begin = (context_start > 0)
+                            ? static_cast<int>(lod_level_0[i]) + context_start
+                            : static_cast<int>(lod_level_0[i]);
+      input_row_end = static_cast<int>(lod_level_0[i + 1]);
+
+      framework::Tensor out_t = col.Slice(static_cast<int>(lod_level_0[i]),
+                                          static_cast<int>(lod_level_0[i + 1]));
+
+      sequence_height = static_cast<int>(out_t.dims()[0]);
+
+      if (input_row_begin < input_row_end) {
+        framework::Tensor in_t = in.Slice(input_row_begin, input_row_end);
+
+        std::vector<int64_t> output_shape(
+            {sequence_height, 1, 1, context_length,
+             sequence_width});  // output_height, output_width,
+        // input_channels, filter_height, filter_width
+
+        out_t.Resize(framework::make_ddim(output_shape));
+
+        std::vector<int64_t> input_shape(
+            {1, input_row_end - input_row_begin,
+             sequence_width});  // input_channels, input_height, input_width
+        in_t.Resize(framework::make_ddim(input_shape));
+
+        im2col_ocf(context, in_t, out_t,
+                   /*stride_height*/ context_stride, /*stride_width*/ 1, up_pad,
+                   down_pad, 0, 0);
+        out_t.Resize({sequence_height, context_length * sequence_width});
+      }
+    }
+    if (padding_trainable) {
+      for (int i = 0; i < static_cast<int>(lod_level_0.size()) - 1; ++i) {
+        framework::Tensor out_t =
+            col.Slice(static_cast<int>(lod_level_0[i]),
+                      static_cast<int>(lod_level_0[i + 1]));
+
+        sequence_height = static_cast<int>(out_t.dims()[0]);
+
+        // add up trainable data
+        out_t.Resize({sequence_height * context_length, sequence_width});
+
+        if (up_pad > 0) {  // add up pad
+          int padding_rows = std::min(
+              up_pad, static_cast<int>(lod_level_0[i + 1] - lod_level_0[i]));
+
+          for (int k = 0; k < padding_rows; ++k) {
+            int padding_size =
+                k + context_length < up_pad ? context_length : up_pad - k;
+            framework::Tensor out_t_sub = out_t.Slice(
+                k * context_length, k * context_length + padding_size);
+            framework::Tensor w_sub = padding_data.Slice(k, k + padding_size);
+            // in this block, using EigenVector<T>::Flatten is ok too.
+            auto out_t_sub_e = EigenMatrix<T>::From(out_t_sub);
+            auto w_sub_e = EigenMatrix<T>::From(w_sub);
+            out_t_sub_e.device(*context.GetEigenDevice<Place>()) = w_sub_e;
+          }
+        }
+        if (down_pad > 0) {  // add down pad
+          int down_pad_begin_row =
+              std::max(0,
+                       (sequence_height - context_start - context_length) + 1) +
+              1;
+          int padding_begin = std::max(0, context_start - sequence_height);
+          int padding_size =
+              sequence_height - context_start >= context_length
+                  ? 1
+                  : context_length - (sequence_height - context_start);
+          if (context_start >= sequence_height) padding_size = context_length;
+          int padding_idx = padding_begin;
+          for (int t = 0; t + down_pad_begin_row <= sequence_height;
+               ++t, ++padding_size) {
+            if (context_start >= sequence_height) padding_size = context_length;
+            if (padding_size > context_length) {
+              padding_size = context_length;
+              padding_idx++;
+            }
+            if (padding_begin > 0 || sequence_height == context_start)
+              padding_idx = padding_begin + t;
+            framework::Tensor out_t_sub = out_t.Slice(
+                (down_pad_begin_row + t) * context_length - padding_size,
+                (down_pad_begin_row + t) * context_length);
+            framework::Tensor w_sub = padding_data.Slice(
+                up_pad + padding_idx, up_pad + padding_idx + padding_size);
+            auto out_t_sub_e = EigenMatrix<T>::From(out_t_sub);
+            auto w_sub_e = EigenMatrix<T>::From(w_sub);
+            out_t_sub_e.device(*context.GetEigenDevice<Place>()) = w_sub_e;
+          }
+        }
+        out_t.Resize({sequence_height, context_length * sequence_width});
+      }
+    }
+  }
+};
+
+template <typename Place, typename T>
+class ContextProjectGradFunctor {
+ public:
+  void operator()(const platform::DeviceContext& context,
+                  framework::LoDTensor& in, framework::Tensor& padding_data,
+                  framework::Tensor& col, bool padding_trainable,
+                  int context_start, int context_length, int context_stride,
+                  int up_pad, int down_pad, bool input_grad, bool pad_grad) {
+    auto lod_level_0 = in.lod()[0];
+
     paddle::operators::math::Col2ImFunctor<
         paddle::operators::math::ColFormat::kOCF, Place, float>
         col2im_ocf;
@@ -102,10 +213,8 @@ class ContextProjectFunctor {
     int input_row_begin, input_row_end;
     int sequence_height, sequence_width;
     sequence_width = in.dims()[1];
-    input_grad = gradient && input_grad;
-    pad_grad = gradient && pad_grad;
 
-    if (!gradient || input_grad) {
+    if (input_grad) {
       for (int i = 0; i < static_cast<int>(lod_level_0.size()) - 1; ++i) {
         input_row_begin = (context_start > 0)
                               ? static_cast<int>(lod_level_0[i]) + context_start
@@ -133,20 +242,14 @@ class ContextProjectFunctor {
                sequence_width});  // input_channels, input_height, input_width
           in_t.Resize(framework::make_ddim(input_shape));
 
-          if (gradient) {
-            col2im_ocf(context, in_t, out_t,
-                       /*stride_height*/ context_stride, /*stride_width*/ 1,
-                       up_pad, down_pad, 0, 0);
-          } else {
-            im2col_ocf(context, in_t, out_t,
-                       /*stride_height*/ context_stride, /*stride_width*/ 1,
-                       up_pad, down_pad, 0, 0);
-          }
+          col2im_ocf(context, in_t, out_t,
+                     /*stride_height*/ context_stride, /*stride_width*/ 1,
+                     up_pad, down_pad, 0, 0);
           out_t.Resize({sequence_height, context_length * sequence_width});
         }
       }
     }
-    if (!gradient || pad_grad) {
+    if (pad_grad) {
       if (padding_trainable) {
         for (int i = 0; i < static_cast<int>(lod_level_0.size()) - 1; ++i) {
           framework::Tensor out_t =
@@ -154,11 +257,9 @@ class ContextProjectFunctor {
                         static_cast<int>(lod_level_0[i + 1]));
 
           sequence_height = static_cast<int>(out_t.dims()[0]);
-
-          // add up trainable data
           out_t.Resize({sequence_height * context_length, sequence_width});
 
-          if (up_pad > 0) {  // add up pad
+          if (up_pad > 0) {
             int padding_rows = std::min(
                 up_pad, static_cast<int>(lod_level_0[i + 1] - lod_level_0[i]));
 
@@ -171,15 +272,11 @@ class ContextProjectFunctor {
               // in this block, using EigenVector<T>::Flatten is ok too.
               auto out_t_sub_e = EigenMatrix<T>::From(out_t_sub);
               auto w_sub_e = EigenMatrix<T>::From(w_sub);
-              if (gradient) {
-                w_sub_e.device(*context.GetEigenDevice<Place>()) =
-                    w_sub_e + out_t_sub_e;
-              } else {
-                out_t_sub_e.device(*context.GetEigenDevice<Place>()) = w_sub_e;
-              }
+              w_sub_e.device(*context.GetEigenDevice<Place>()) =
+                  w_sub_e + out_t_sub_e;
             }
           }
-          if (down_pad > 0) {  // add down pad
+          if (down_pad > 0) {
             int down_pad_begin_row =
                 std::max(
                     0, (sequence_height - context_start - context_length) + 1) +
@@ -208,12 +305,8 @@ class ContextProjectFunctor {
                   up_pad + padding_idx, up_pad + padding_idx + padding_size);
               auto out_t_sub_e = EigenMatrix<T>::From(out_t_sub);
               auto w_sub_e = EigenMatrix<T>::From(w_sub);
-              if (gradient) {
-                w_sub_e.device(*context.GetEigenDevice<Place>()) =
-                    w_sub_e + out_t_sub_e;
-              } else {
-                out_t_sub_e.device(*context.GetEigenDevice<Place>()) = w_sub_e;
-              }
+              w_sub_e.device(*context.GetEigenDevice<Place>()) =
+                  w_sub_e + out_t_sub_e;
             }
           }
           out_t.Resize({sequence_height, context_length * sequence_width});
diff --git a/paddle/operators/sequence_conv_op.h b/paddle/operators/sequence_conv_op.h
index c502601b3..5727238c0 100644
--- a/paddle/operators/sequence_conv_op.h
+++ b/paddle/operators/sequence_conv_op.h
@@ -65,12 +65,10 @@ class SequenceConvKernel : public framework::OpKernel<T> {
 
     paddle::operators::math::ContextProjectFunctor<Place, T>
         seq_project_functor;
-    LoDTensor* input = const_cast<LoDTensor*>(in);
-    Tensor* pad_data = const_cast<Tensor*>(padding_data);
 
-    seq_project_functor(context.device_context(), *input, *pad_data, col,
+    seq_project_functor(context.device_context(), *in, *padding_data, col,
                         padding_trainable, context_start, context_length,
-                        context_stride, up_pad, down_pad, false, false, false);
+                        context_stride, up_pad, down_pad);
 
     math::matmul<Place, T>(context.device_context(), col, false, filter, false,
                            static_cast<T>(1.0), out, static_cast<T>(0.0));
@@ -117,15 +115,18 @@ class SequenceConvGradKernel : public framework::OpKernel<T> {
     }
     paddle::operators::math::ContextProjectFunctor<Place, T>
         seq_project_functor;
+    paddle::operators::math::ContextProjectGradFunctor<Place, T>
+        seq_project_grad_functor;
 
     if (in_g) {
       in_g->mutable_data<T>(context.GetPlace());
       in_g->set_lod(in->lod());
       set_zero(context.device_context(), in_g, static_cast<T>(0));
 
-      seq_project_functor(context.device_context(), *in_g, *padding_data_g, col,
-                          padding_trainable, context_start, context_length,
-                          context_stride, up_pad, down_pad, true, true, false);
+      seq_project_grad_functor(context.device_context(), *in_g, *padding_data_g,
+                               col, padding_trainable, context_start,
+                               context_length, context_stride, up_pad, down_pad,
+                               true, false);
     }
 
     if (padding_trainable && padding_data_g) {
@@ -133,9 +134,10 @@ class SequenceConvGradKernel : public framework::OpKernel<T> {
       set_zero(context.device_context(), padding_data_g, static_cast<T>(0));
 
       LoDTensor* input = const_cast<LoDTensor*>(in);
-      seq_project_functor(context.device_context(), *input, *padding_data_g,
-                          col, padding_trainable, context_start, context_length,
-                          context_stride, up_pad, down_pad, true, false, true);
+      seq_project_grad_functor(context.device_context(), *input,
+                               *padding_data_g, col, padding_trainable,
+                               context_start, context_length, context_stride,
+                               up_pad, down_pad, false, true);
     }
 
     if (filter_g) {
@@ -150,15 +152,9 @@ class SequenceConvGradKernel : public framework::OpKernel<T> {
         padding_data = context.Input<Tensor>("PaddingData");
       }
 
-      sequence_width = static_cast<int>(in->dims()[1]);
-
-      LoDTensor* input = const_cast<LoDTensor*>(in);
-      Tensor* pad_data = const_cast<Tensor*>(padding_data);
-
-      seq_project_functor(context.device_context(), *input, *pad_data, col,
+      seq_project_functor(context.device_context(), *in, *padding_data, col,
                           padding_trainable, context_start, context_length,
-                          context_stride, up_pad, down_pad, false, false,
-                          false);
+                          context_stride, up_pad, down_pad);
 
       math::matmul<Place, T>(context.device_context(), col, true, out_grad,
                              false, T(1.0), &filter_grad, T(1.0));
-- 
GitLab