diff --git a/src/operators/kernel/arm/im2sequence_kernel.cpp b/src/operators/kernel/arm/im2sequence_kernel.cpp
index 8295fd94a31db2ad1c10d32a8c639b067e422f45..cc6ae2ae8bc7cde9b365817ba9cafc19776da913 100644
--- a/src/operators/kernel/arm/im2sequence_kernel.cpp
+++ b/src/operators/kernel/arm/im2sequence_kernel.cpp
@@ -35,7 +35,7 @@ template <>
 void Im2SequenceKernel<CPU, float>::Compute(
     const Im2SequenceParam<CPU> &param) const {
   const Tensor *in_x = param.Input();
-  Tensor *out = param.Output();
+  framework::LoDTensor *out = param.Output();
   out->mutable_data<float>();
 
   std::vector<int> kernels = param.Kernels();
@@ -52,22 +52,31 @@ void Im2SequenceKernel<CPU, float>::Compute(
                                        paddings[2], strides[0]);
   int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1],
                                       paddings[3], strides[1]);
-  const std::vector<int> dilations({1, 1});
 
+  out->mutable_data<float>({batch_size * output_height * output_width,
+                            img_channels * kernels[0] * kernels[1]});
+  const std::vector<int> dilations({1, 1});
   // TODO: verify
   auto out_dims = out->dims();
   out->Resize({batch_size, out->numel() / batch_size});
-
   for (int i = 0; i < batch_size; i++) {
     const Tensor src =
         in_x->Slice(i, i + 1).Resize({img_channels, img_height, img_width});
     Tensor dst = out->Slice(i, i + 1).Resize(
         {output_height, output_width, img_channels, kernels[0], kernels[1]});
-
     math::Im2ColFunctor<math::ColFormat::kOCF, CPU, float> f;
     f(src, dilations, strides, paddings, &dst);
   }
   out->Resize(out_dims);
+  framework::LoD lod(1);
+  lod[0].reserve(batch_size + 1);
+  int offset = 0;
+  lod[0].push_back(offset);
+  for (int i = 0; i < batch_size; ++i) {
+    offset += output_height * output_width;
+    lod[0].push_back(offset);
+  }
+  out->set_lod(lod);
 }
 
 template class Im2SequenceKernel<CPU, float>;
diff --git a/test/net/test_eng.cpp b/test/net/test_eng.cpp
index d10cb33a6922f4a60b798211ab4415312b86796c..4a78af8310cf7f1db976fbc344a21dd0bb4b25a6 100644
--- a/test/net/test_eng.cpp
+++ b/test/net/test_eng.cpp
@@ -27,9 +27,9 @@ int main() {
                          true)) {
     auto time2 = time();
     std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
-    std::vector<int64_t> dims{1, 1, 48, 48};
+    std::vector<int64_t> dims{1, 1, 48, 512};
     LoDTensor input_tensor;
-    SetupTensor<float>(&input_tensor, {1, 1, 48, 48}, static_cast<float>(0),
+    SetupTensor<float>(&input_tensor, {1, 1, 48, 512}, static_cast<float>(0),
                        static_cast<float>(1));
 
     std::vector<float> input(input_tensor.data<float>(),
@@ -39,9 +39,9 @@ int main() {
       paddle_mobile.PredictLod(input_tensor);
     }
     auto time3 = time();
-    //        for (int i = 0; i < 10; ++i) {
-    //            paddle_mobile.Predict(input, dims);
-    //        }
+    for (int i = 0; i < 1; ++i) {
+      paddle_mobile.PredictLod(input_tensor);
+    }
     auto time4 = time();
     std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
               << std::endl;