diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt
index e381545d272829d0c60a0992f35337b4b5c78a10..2560c0a5aace83b111dcf02b3349335cfdc78274 100644
--- a/paddle/operators/math/CMakeLists.txt
+++ b/paddle/operators/math/CMakeLists.txt
@@ -7,7 +7,7 @@ if(WITH_GPU)
     nv_library(cross_entropy SRCS cross_entropy.cc cross_entropy.cu DEPS operator)
     nv_library(pooling SRCS pooling.cc pooling.cu DEPS device_context)
     nv_library(vol2col SRCS vol2col.cc vol2col.cu DEPS device_context)
-    nv_library(sequence_project SRCS sequence_project.cc sequence_project.cu DEPS device_context math_function)
+    nv_library(sequence_project SRCS sequence_project.cc sequence_project.cu DEPS device_context)
 else()
     cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context operator)
     cc_library(selected_rows_functor SRCS selected_rows_functor.cc DEPS selected_rows math_function)
@@ -15,7 +15,7 @@ else()
     cc_library(cross_entropy SRCS cross_entropy.cc DEPS operator)
     cc_library(pooling SRCS pooling.cc DEPS device_context)
     cc_library(vol2col SRCS vol2col.cc DEPS device_context)
-    cc_library(sequence_project SRCS sequence_project.cc DEPS device_context math_function)
+    cc_library(sequence_project SRCS sequence_project.cc DEPS device_context)
 endif()
 
 cc_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor)
diff --git a/paddle/operators/math/sequence_project.h b/paddle/operators/math/sequence_project.h
index 64a27d885dd6e733cb2194f3d0ed83f864a0b710..a2ab86f790df313b04866b259883d716a4ce5502 100644
--- a/paddle/operators/math/sequence_project.h
+++ b/paddle/operators/math/sequence_project.h
@@ -18,7 +18,6 @@ limitations under the License. */
 #include "paddle/framework/lod_tensor.h"
 #include "paddle/framework/tensor.h"
 #include "paddle/operators/math/im2col.h"
-#include "paddle/operators/math/math_function.h"
 
 namespace paddle {
 namespace operators {
@@ -32,37 +31,59 @@ template <typename T, int MajorType = Eigen::RowMajor,
           typename IndexType = Eigen::DenseIndex>
 using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
 /*
- * \brief Converts the feature data of four dimensions(CDHW) into a colData of
- *        seven dimensions in the Vol2ColFunctor calculation,
- *        And in the Col2VolFunctor calculation, it is reversed.
+ * \brief SequenceProject projects features of context_length time-steps of each
+ * instance.
  *
- * \param volData   Vol data.
- * \param volShape  The shape of volData,
- *                 [input_channels, input_depth, input_height, input_width].
- * \param colData  Column data.
- * \param colShape The shape of colData.
+ * \param in            Input data.
+ * \param inShape       The shape of Input data,
+ *                      [minibatch, number_of_input_features].
+ * \param inShape       A float LoDTensor.
  *
- * The shape of colData is:
- * [input_channels, filter_depth, filter_height, filter_width, output_depth,
- * output_height, output_width]
- * So, it is easy to reshape into a convolution matrix for convolution
- * calculation based on matrix multiplication.
- * The shape of convolution matrix is [height, width], where the height is equal
- * input_channels * filter_depth * filter_height * filter_width, and the width
- * is equal output_depth * output_height * output_width.
+ * \param padding_data  Padding data.
+ * \param inShape       The shape of Padding data,
+ *                      [up_pad + down_pad, number_of_input_features].
+ * \param inShape       A float LoDTensor.
  *
- * Reshape:
- *     shape of colData           shape of convolution matrix
- *     [input_channels,
- *      filter_depth,
- *      filter_height,
- *      filter_width,      ======>      [height, width]
- *      output_depth,
- *      output_height,
- *      output_width]
+ * \param col           Col data.
+ * \param inShape       The shape of Col data,
+ *                      [minibatch, 1].
+ * \param inShape       A float LoDTensor.
+ *
+ * For a mini-batch of 2 variable lengths sentences, containing 3, and 1
+ * time-steps:
+ *
+ * Assumed input (X) is a [4, M, N] float LoDTensor, and X->lod()[0] = [0, 3,
+ * 4].
+ * Besides, for the sake of simplicity, we assume M=1 and N=2.
+ *
+ * X = [[a1, a2;
+ *       b1, b2;
+ *       c1, c2]
+ *      [d1, d2]]
+ *
+ * This is to say that input (X) has 4 words and the dimension of each word
+ * representation is 2.
+ *
+ * - Case1:
+ * If context_start is -1 and padding_trainable is false, we use zero to pad
+ * instead of learned weight to pad,
+ * and the context_lenth is 3, the output (Out) is:
+ *
+ * Out =[[0,  0,  a1, a2, b1, b2;
+ *        a1, a2, b1, b2, c1, c2;
+ *        b1, b2, c1, c2, 0,  0 ]
+ *       [0,  0,  d1, d2, 0,  0 ]]
+ *
+ * - Case2:
+ * If context_start is -1 and padding_trainable is true, we use learned weight
+ * to pad,
+ * and the context_lenth is 3, the output (Out) is:
+ *
+ * Out = [[w1, w2, a1, a2, b1, b2;
+ *         a1, a2, b1, b2, c1, c2;
+ *         b1, b2, c1, c2, w3, w4]
+ *        [w1, w2, d1, d2, w3, w4]]
  *
- * \note The caller needs to ensure that volShape.inputChannels is equal to
- *       colShape.inputChannels.
  */
 
 template <typename Place, typename T>
@@ -96,14 +117,16 @@ class SequenceProjectFunctor {
 
       sequence_height = static_cast<int>(out_t.dims()[0]);
 
-      std::vector<int64_t> output_shape(
-          {sequence_height, 1, 1, context_length,
-           sequence_width});  // output_height, output_width,
-      // input_channels, filter_height, filter_width
-      out_t.Resize(framework::make_ddim(output_shape));
-
       if (input_row_begin < input_row_end) {
         framework::Tensor in_t = in->Slice(input_row_begin, input_row_end);
+
+        std::vector<int64_t> output_shape(
+            {sequence_height, 1, 1, context_length,
+             sequence_width});  // output_height, output_width,
+        // input_channels, filter_height, filter_width
+
+        out_t.Resize(framework::make_ddim(output_shape));
+
         std::vector<int64_t> input_shape(
             {1, input_row_end - input_row_begin,
              sequence_width});  // input_channels, input_height, input_width
diff --git a/paddle/operators/sequence_conv_op.cc b/paddle/operators/sequence_conv_op.cc
index 1fc23302dcb329ea94aeec4da00f5752ac43a712..d286d334a24c6ee516a530c73a092d45fdbb38ed 100644
--- a/paddle/operators/sequence_conv_op.cc
+++ b/paddle/operators/sequence_conv_op.cc
@@ -135,39 +135,18 @@ class SequenceConvOpMaker : public framework::OpProtoAndCheckerMaker {
         .GreaterThan(0);
 
     AddComment(R"DOC(
-    SequenceConvOp projects features of context_length time-steps of each instance.
-
-    For a mini-batch of 2 variable lengths sentences, containing 3, and 1 time-steps:
-
-    Assumed input (X) is a [4, M, N] float LoDTensor, and X->lod()[0] = [0, 3, 4].
-    Besides, for the sake of simplicity, we assume M=1 and N=2.
-
-    X = [[a1, a2;
-          b1, b2;
-          c1, c2]
-         [d1, d2]]
-
-    This is to say that input (X) has 4 words and the dimension of each word
-    representation is 2.
-
-    - Case1:
-    If context_start is -1 and padding_trainable is false, we use zero to pad instead of learned weight to pad,
-    and the context_lenth is 3, the output (Out) is:
-
-    Out =[[0,  0,  a1, a2, b1, b2;
-           a1, a2, b1, b2, c1, c2;
-           b1, b2, c1, c2, 0,  0 ]
-           [0,  0,  d1, d2, 0,  0 ]]
-
-    - Case2:
-    If context_start is -1 and padding_trainable is true, we use learned weight to pad,
-    and the context_lenth is 3, the output (Out) is:
-
-    Out = [[w1, w2, a1, a2, b1, b2;
-           a1, a2, b1, b2, c1, c2;
-           b1, b2, c1, c2, w3, w4]
-           [w1, w2, d1, d2, w3, w4]]
-
+    SequenceConvOp performs convolution operation on features of
+    context_length time-steps of each instance.
+    The convolution operation calculates the output based on the input, filter
+    and strides, paddings parameters. The size of each dimension of the
+    parameters is checked in the infer-shape.
+
+Example:
+  Input:
+       X shape: (minibatch, number_of_input_features)
+       Filter shape: (context_length, number_of_input_features)
+  Output:
+       Out shape: (minibatch, 1)
     )DOC");
   }
 };
diff --git a/paddle/operators/sequence_conv_op.h b/paddle/operators/sequence_conv_op.h
index a8bda2f046d0e7e4f92df0fceb28409dc3c6bf31..b6ae12f6bb0d4f6650db29b9a358ac43b0052945 100644
--- a/paddle/operators/sequence_conv_op.h
+++ b/paddle/operators/sequence_conv_op.h
@@ -182,12 +182,6 @@ class SequenceConvGradKernel : public framework::OpKernel<T> {
       functor(context.device_context(), padding_data_g, 0);
 
       for (int i = 0; i < static_cast<int>(lod_g_level_0.size()) - 1; ++i) {
-        input_row_begin =
-            (context_start > 0)
-                ? static_cast<int>(lod_g_level_0[i]) + context_start
-                : static_cast<int>(lod_g_level_0[i]);
-        input_row_end = static_cast<int>(lod_g_level_0[i + 1]);
-
         Tensor col_t = col.Slice(static_cast<int>(lod_g_level_0[i]),
                                  static_cast<int>(lod_g_level_0[i + 1]));