diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc
index 1247daafc57714394858f161e609acdd0b941493..e4a2f5765a10cbeb24b71b5cea7677172c058875 100644
--- a/paddle/framework/lod_tensor.cc
+++ b/paddle/framework/lod_tensor.cc
@@ -110,7 +110,7 @@ Vector<size_t> repeat_lod(Vector<size_t> data, Vector<size_t> starts,
   size_t p = 0, start = 0, end = 0;
   if (is_first == true) {
     for (size_t i = 0; i < times.size(); ++i) {
-      result.push_back(data.back() + times[i] * (data[i + 1] - data[i]));
+      result.push_back(result.back() + times[i] * (data[i + 1] - data[i]));
     }
   } else {
     for (size_t i = 0; i < times.size(); ++i) {
diff --git a/paddle/operators/seq_expand_op.cc b/paddle/operators/seq_expand_op.cc
index 63b17a10f59d069d654e73d11666fad68d742ef2..59d713548930e4e80e0db0225bfabca4da35f95a 100644
--- a/paddle/operators/seq_expand_op.cc
+++ b/paddle/operators/seq_expand_op.cc
@@ -60,7 +60,8 @@ As an example:
 
 Given:
 
-X = [1, 2 , 3]
+X.data = [1, 2 , 3, 4]
+X.lod = [[0, 3, 4], [0, 1, 3, 4]]
 
 and
 
@@ -69,8 +70,8 @@ repeat = 2
 
 then we get
 
-Out.data = [1, 1, 2, 2, 3, 3]
-Out.lod = [[0, 2, 4, 6]]
+Out.data = [1, 2, 3, 1, 2, 3, 4, 4]
+Out.lod = [[0, 6, 8], [0, 3, 6, 7, 8], [0, 1, 3, 4, 6, 7, 8]]
 
 )DOC");
   }
@@ -83,6 +84,7 @@ class SeqExpandOpGrad : public framework::OperatorWithKernel {
  protected:
   void InferShape(framework::InferShapeContext* ctx) const override {
     PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
+    PADDLE_ENFORCE(ctx->HasInput("Out"), "Input(Out) should not be null");
     PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
                    "Input(Out@GRAD) should not be null");
     auto x_dims = ctx->GetInputDim("X");
@@ -93,30 +95,12 @@ class SeqExpandOpGrad : public framework::OperatorWithKernel {
   }
 };
 
-class SeqExpandOpGradMaker : public framework::SingleGradOpDescMaker {
- public:
-  using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
-
- protected:
-  std::unique_ptr<framework::OpDescBind> Apply() const override {
-    auto* bind = new framework::OpDescBind();
-    bind->SetInput("X", Input("X"));
-    bind->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
-    bind->SetOutput(framework::GradVarName("X"), InputGrad("X"));
-    bind->SetAttrMap(Attrs());
-    bind->SetType("seq_expand_grad");
-    return std::unique_ptr<framework::OpDescBind>(bind);
-  }
-};
-
 }  // namespace operators
 }  // namespace paddle
 
 namespace ops = paddle::operators;
-
-REGISTER_OPERATOR(seq_expand, ops::SeqExpandOp, ops::SeqExpandOpMaker,
-                  ops::SeqExpandOpGradMaker);
-REGISTER_OPERATOR(seq_expand_grad, ops::SeqExpandOpGrad);
+REGISTER_OP(seq_expand, ops::SeqExpandOp, ops::SeqExpandOpMaker,
+            seq_expand_grad, ops::SeqExpandOpGrad);
 REGISTER_OP_CPU_KERNEL(seq_expand,
                        ops::SeqExpandKernel<paddle::platform::CPUPlace, float>);
 REGISTER_OP_CPU_KERNEL(
diff --git a/paddle/operators/seq_expand_op.h b/paddle/operators/seq_expand_op.h
index 221393f9093848c2aa9e68433fe0b039baaf2d0b..8b7bda54c05d101a448911461d59d7501c92ec1f 100644
--- a/paddle/operators/seq_expand_op.h
+++ b/paddle/operators/seq_expand_op.h
@@ -16,6 +16,7 @@
 
 #include "paddle/framework/op_registry.h"
 #include "paddle/memory/memcpy.h"
+#include "unsupported/Eigen/CXX11/Tensor"
 
 namespace paddle {
 namespace operators {
@@ -93,9 +94,29 @@ template <typename Place, typename T>
 class SeqExpandGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* d_out = context.Input<Tensor>(framework::GradVarName("Out"));
-    auto* d_x = context.Output<Tensor>(framework::GradVarName("X"));
-    d_x->mutable_data<T>(context.GetPlace());
+    auto* d_out = context.Input<LoDTensor>(framework::GradVarName("Out"));
+    auto* d_x = context.Output<LoDTensor>(framework::GradVarName("X"));
+    auto* x = context.Input<LoDTensor>("X");
+    auto* out = context.Input<LoDTensor>("Out");
+    auto out_lod = out->lod();
+    d_x->set_lod(x->lod());
+    const T* d_out_data = d_out->data<T>();
+    auto d_out_dims = d_out->dims();
+    T* d_x_data = d_x->mutable_data<T>(context.GetPlace());
+    size_t element_len = framework::product(d_out_dims) / d_out_dims[0];
+    for (size_t i = 0; i < out->NumElements(); ++i) {
+      size_t ele_count = out_lod[0][i + 1] - out_lod[0][i];
+      size_t repeat = out->NumElements(0, i);
+      Eigen::TensorMap<Eigen::Tensor<const T, 2>> d_out_t(
+          d_out_data, static_cast<int>(repeat),
+          static_cast<int>((ele_count * element_len) / repeat));
+      Eigen::TensorMap<Eigen::Tensor<T, 1>> d_x_t(
+          d_x_data, static_cast<int>((ele_count * element_len) / repeat));
+      auto place = context.GetEigenDevice<Place>();
+      d_x_t.device(place) = d_out_t.sum(Eigen::array<int, 1>({0}));
+      d_out_data += (ele_count * element_len);
+      d_x_data += ((ele_count * element_len) / repeat);
+    }
   }
 };
 
diff --git a/paddle/operators/sequence_concat_op.cc b/paddle/operators/sequence_concat_op.cc
index 1fce96cdfe20fc3ab33a3cd00e9a03833c9b94f8..46f73e3c279835bbb4bfdd7dede03a5535186b24 100644
--- a/paddle/operators/sequence_concat_op.cc
+++ b/paddle/operators/sequence_concat_op.cc
@@ -68,12 +68,12 @@ class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker {
                  "The level should be less than the level number of inputs.")
         .SetDefault(0);
     AddComment(R"DOC(
-    The sequence_concat operator concatenates multiple LoDTensors. 
-    It only supports sequence (LoD Tensor with level number is 1) 
+    The sequence_concat operator concatenates multiple LoDTensors.
+    It only supports sequence (LoD Tensor with level number is 1)
     or a nested sequence (LoD tensor with level number is 2) as its input.
     - Case1:
       If the axis is other than 0(here, axis is 1 and level is 1),
-      each input should have the same LoD information and the LoD 
+      each input should have the same LoD information and the LoD
       information of the output keeps the same as the input.
 
       LoD(x0) = {{0,2,4}, {0,1,2,3,4}}; Dims(x0) = (4,3,4)
@@ -81,7 +81,7 @@ class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker {
       LoD(Out) = {{0,2,4}, {0,1,2,3,4}}; Dims(Out) = (4,7,4)
 
     - Case2:
-      If the axis is 0(here, leve is 0), the inputs are concatenated along 
+      If the axis is 0(here, leve is 0), the inputs are concatenated along
       time steps, the LoD information of the output need to re-compute.
 
       LoD(x0) = {{0,2,4}, {0,1,2,3,4}}; Dims(x0) = (4,3,4)
@@ -94,7 +94,7 @@ class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker {
       LoD(x0) = {{0,2,4}, {0,1,2,3,4}}; Dims(x0) = (4,3,4)
       LoD(x1) = {{0,3,5}, {0,1,3,4,5}}; Dims(x1) = (5,3,4)
       LoD(Out) = {{0,5,9}, {0,2,5,7,9}}; Dims(Out) = (9,3,4)
-      
+
     NOTE: The levels of all the inputs should be the same.
     )DOC");
   }
diff --git a/python/paddle/v2/framework/tests/op_test.py b/python/paddle/v2/framework/tests/op_test.py
index 3ef8ec3164b52c5ca0347c7f5d760d43355832f9..a88e9f0bb8213e28ffe3c605044efc89d9fcb7fe 100644
--- a/python/paddle/v2/framework/tests/op_test.py
+++ b/python/paddle/v2/framework/tests/op_test.py
@@ -246,9 +246,6 @@ class OpTest(unittest.TestCase):
             else:
                 actual = np.array(self.scope.find_var(out_name).get_tensor())
                 expect = self.outputs[out_name]
-                print "out_name: %s" % out_name
-                print "actual: %s" % actual
-                print "expcept: %s" % expect
                 self.assertTrue(
                     np.allclose(
                         actual, expect, atol=atol),
diff --git a/python/paddle/v2/framework/tests/test_seq_expand.py b/python/paddle/v2/framework/tests/test_seq_expand.py
index 2b9509413e30f064dd20f3ee6c083626a46da132..87e39d72bf5b4fdf8836de7807c9c70c9280b6db 100644
--- a/python/paddle/v2/framework/tests/test_seq_expand.py
+++ b/python/paddle/v2/framework/tests/test_seq_expand.py
@@ -62,9 +62,8 @@ class TestSeqExpand(OpTest):
     def test_check_output(self):
         self.check_output()
 
-
-#    def test_check_grad(self):
-#        self.check_grad(["X"], "Out")
+    def test_check_grad(self):
+        self.check_grad(["X"], "Out")
 
 
 class TestSeqExpandCase1(TestSeqExpand):