From 1251501caf2c268e3e221520e624d5f64cce8c58 Mon Sep 17 00:00:00 2001
From: xutianbing <xutianbing@baidu.com>
Date: Tue, 24 Jan 2017 21:19:59 -0800
Subject: [PATCH] rewrite unit test using new Function Test.

---
 paddle/function/BufferArg.h                 |   3 +-
 paddle/function/ContextProjectionOp.cpp     |  70 +++++-----
 paddle/function/ContextProjectionOp.h       |   1 -
 paddle/function/ContextProjectionOpTest.cpp | 139 ++++++-------------
 paddle/function/FunctionTest.h              | 146 +++++++++++++++-----
 5 files changed, 185 insertions(+), 174 deletions(-)
diff --git a/paddle/function/BufferArg.h b/paddle/function/BufferArg.h
index 349b21e7e6..f7df56846f 100644
--- a/paddle/function/BufferArg.h
+++ b/paddle/function/BufferArg.h
@@ -226,7 +226,8 @@ public:
   SequenceArg(ValueType valueType,
               const TensorShape& shape,
               ArgType argType = UNSPECIFIED)
-      : BufferArg(valueType, shape, argType), startPositions_(TensorShape()) {
+      : BufferArg(valueType, shape, argType),
+        startPositions_(TensorShape({shape[0]})) {
     bufferType_ = TENSOR_SEQUENCE_DATA;
   }
 
diff --git a/paddle/function/ContextProjectionOp.cpp b/paddle/function/ContextProjectionOp.cpp
index 6cd4e4abee..d81ea58401 100644
--- a/paddle/function/ContextProjectionOp.cpp
+++ b/paddle/function/ContextProjectionOp.cpp
@@ -108,26 +108,23 @@ public:
   }
 
   void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
-    CHECK(1 == inputs.size() || 2 == inputs.size());
-    CHECK_EQ((size_t)1, outputs.size());
+    CHECK(1UL == inputs.size() || 2UL == inputs.size());
+    CHECK_EQ(1UL, outputs.size());
     CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg())
         << "SequenceArg required here";
     const auto val_seqs = dynamic_cast<const SequenceArg&>(inputs[0]);
     auto out_seq = dynamic_cast<const SequenceArg&>(outputs[0]);
 
     CHECK(out_seq.data() && val_seqs.data() && val_seqs.getSequenceId().data());
-    CHECK_EQ(out_seq.shape().ndims(), (size_t)2);
-    CHECK_EQ(val_seqs.shape().ndims(), (size_t)2);
-    CHECK_EQ(val_seqs.getSequenceId().shape().ndims(), (size_t)1);
-    if (2 == inputs.size()) {
-      CHECK_EQ(inputs[1].shape().ndims(), (size_t)2);
-    }
+    CHECK_EQ(out_seq.shape().ndims(), 2UL);
+    CHECK_EQ(val_seqs.shape().ndims(), 2UL);
     /// dim of output = dim of input * context_length
     CHECK_EQ(out_seq.shape()[1], val_seqs.shape()[1] * context_length_);
     /// input and output has the same batch_size
     CHECK_EQ(val_seqs.shape()[0], out_seq.shape()[0]);
-    /// dim of input == dim of weight
-    if (2 == inputs.size()) {
+    if (2UL == inputs.size()) {
+      CHECK_EQ(inputs[1].shape().ndims(), 2UL);
+      /// dim of input == dim of weight
       CHECK_EQ(val_seqs.shape()[1], inputs[1].shape()[1]);
     }
 
@@ -135,10 +132,11 @@ public:
     auto out_mat = out_seq.matrix<Device>();
     const auto in_mat = val_seqs.matrix<Device>();
     const auto w_mat =
-        (2 == inputs.size())
+        (2UL == inputs.size())
             ? inputs[1].matrix<Device>()
             : typename Tensor<real, Device>::Matrix(nullptr, 0, 0);
     const auto seq_vec = val_seqs.getSequenceId().vector<int, Device>();
+
     ContextProjectionForward<Device>(out_mat,
                                      in_mat,
                                      w_mat,
@@ -235,36 +233,40 @@ public:
   }
 
   void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
-    CHECK_EQ((size_t)1, inputs.size());
-    CHECK_EQ((size_t)2, outputs.size());
+    CHECK_EQ(1UL, inputs.size());
+    CHECK(1UL == outputs.size() || 2UL == outputs.size());
     CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg())
         << "SequenceArg required here";
     const auto in_seq = dynamic_cast<const SequenceArg&>(inputs[0]);
     auto out_seq = dynamic_cast<const SequenceArg&>(outputs[0]);
     CHECK(in_seq.data() && in_seq.getSequenceId().data());
-    CHECK_EQ(in_seq.shape().ndims(), (size_t)2);
-    CHECK_EQ(in_seq.getSequenceId().shape().ndims(), (size_t)1);
-    CHECK_EQ(out_seq.shape().ndims(), (size_t)2);
-    CHECK_EQ(out_seq.getSequenceId().shape().ndims(), (size_t)1);
-    CHECK_EQ(outputs[1].shape().ndims(), (size_t)2);
+    CHECK_EQ(in_seq.shape().ndims(), 2UL);
+    CHECK_EQ(out_seq.shape().ndims(), 2UL);
+    CHECK_EQ(out_seq.getSequenceId().shape().ndims(), 1UL);
 
-    /// dim of input grad == dim of weight
-    CHECK_EQ(out_seq.shape()[1], outputs[1].shape()[1]);
     /// input and output grad has the same batch_size
     CHECK_EQ(out_seq.shape()[0], in_seq.shape()[0]);
     /// dim of output grad = dim of input grad * context_length
     CHECK_EQ(in_seq.shape()[1], out_seq.shape()[1] * context_length_);
     CHECK_EQ(out_seq.getArgType(), ADD_TO);
-    CHECK_EQ(outputs[1].getArgType(), ADD_TO);
+
+    if (2UL == outputs.size()) {
+      CHECK_EQ(outputs[1].shape().ndims(), 2UL);
+      /// dim of input grad == dim of weight
+      CHECK_EQ(out_seq.shape()[1], outputs[1].shape()[1]);
+      CHECK_EQ(outputs[1].getArgType(), ADD_TO);
+    }
 
     const auto seq_vec = in_seq.getSequenceId().vector<int, Device>();
     const auto out_grad_mat = in_seq.matrix<Device>();
     auto in_grad_mat =
         !out_seq.data() ? typename Tensor<real, Device>::Matrix(nullptr, 0, 0)
                         : out_seq.matrix<Device>();
-    auto w_grad_mat = !outputs[1].data()
-                          ? typename Tensor<real, Device>::Matrix(nullptr, 0, 0)
-                          : outputs[1].matrix<Device>();
+    auto w_grad_mat =
+        (2UL == outputs.size())
+            ? outputs[1].matrix<Device>()
+            : typename Tensor<real, Device>::Matrix(nullptr, 0, 0);
+
     ContextProjectionBackward<Device>(out_grad_mat,
                                       in_grad_mat,
                                       w_grad_mat,
@@ -304,17 +306,17 @@ public:
   }
 
   void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
-    CHECK_EQ(1, static_cast<int>(inputs.size()));
-    CHECK_EQ(1, static_cast<int>(outputs.size()));
+    CHECK_EQ(1UL, inputs.size());
+    CHECK_EQ(1UL, outputs.size());
     CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg())
         << "SequenceArg required here";
     const auto in_seq = dynamic_cast<const SequenceArg&>(inputs[0]);
     const auto out_seq = dynamic_cast<const SequenceArg&>(outputs[0]);
 
     CHECK(in_seq.data() && out_seq.data() && in_seq.getSequenceId().data());
-    CHECK_EQ(static_cast<int>(out_seq.shape().ndims()), 2);
-    CHECK_EQ(static_cast<int>(in_seq.shape().ndims()), 2);
-    CHECK_EQ(static_cast<int>(in_seq.getSequenceId().shape().ndims()), 1);
+    CHECK_EQ(out_seq.shape().ndims(), 2UL);
+    CHECK_EQ(in_seq.shape().ndims(), 2UL);
+    CHECK_EQ(in_seq.getSequenceId().shape().ndims(), 1UL);
     /// output layer grad dim == input layer grad dim * context_length_
     CHECK_EQ(in_seq.shape().ndims(), out_seq.shape().ndims() * context_length_);
     /// input and output has the same batch_size
@@ -355,14 +357,14 @@ public:
   }
 
   void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
-    CHECK_EQ(1, static_cast<int>(inputs.size()));
-    CHECK_EQ(1, static_cast<int>(outputs.size()));
+    CHECK_EQ(1UL, inputs.size());
+    CHECK_EQ(1UL, outputs.size());
     CHECK(inputs[0].isSequenceArg()) << "SequenceArg required here";
     const auto in_seq = dynamic_cast<const SequenceArg&>(inputs[0]);
     CHECK(in_seq.data() && in_seq.getSequenceId().data() && outputs[0].data());
-    CHECK_EQ(static_cast<int>(outputs[0].shape().ndims()), 2);
-    CHECK_EQ(static_cast<int>(in_seq.shape().ndims()), 2);
-    CHECK_EQ(static_cast<int>(in_seq.getSequenceId().shape().ndims()), 1);
+    CHECK_EQ(outputs[0].shape().ndims(), 2UL);
+    CHECK_EQ(in_seq.shape().ndims(), 2UL);
+    CHECK_EQ(in_seq.getSequenceId().shape().ndims(), 1UL);
     CHECK_EQ(in_seq.shape()[0], outputs[0].shape()[0]);
     /// output layer grad dim == weight dim * context_length_
     CHECK_EQ(in_seq.shape()[1], outputs[0].shape()[1] * context_length_);
diff --git a/paddle/function/ContextProjectionOp.h b/paddle/function/ContextProjectionOp.h
index 2bdd47e4e9..6f7d936379 100644
--- a/paddle/function/ContextProjectionOp.h
+++ b/paddle/function/ContextProjectionOp.h
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
-
 #include "Function.h"
 
 namespace paddle {
diff --git a/paddle/function/ContextProjectionOpTest.cpp b/paddle/function/ContextProjectionOpTest.cpp
index c9db2ff800..0f5d6a848d 100644
--- a/paddle/function/ContextProjectionOpTest.cpp
+++ b/paddle/function/ContextProjectionOpTest.cpp
@@ -28,55 +28,26 @@ void testMatrixProjectionForward(int context_start,
                std::max(0, (int)(context_start + context_length - 1));
   if (pad == 0) is_padding = false;
 
-  FunctionCompare compare("ContextProjectionForward",
-                          FuncConfig()
-                              .set("context_length", context_length)
-                              .set("context_start", context_start)
-                              .set("begin_pad", std::max(0, -context_start)));
-
-  CpuMatrix cpu_in(batch_size, input_dim);
-  cpu_in.randomizeUniform();
-  GpuMatrix gpu_in(batch_size, input_dim);
-  gpu_in.copyFrom(cpu_in);
-  auto cpu_weight =
-      is_padding ? std::make_shared<CpuMatrix>(pad, input_dim) : nullptr;
-  auto gpu_weight =
-      is_padding ? std::make_shared<GpuMatrix>(pad, input_dim) : nullptr;
-  if (is_padding) {
-    cpu_weight->randomizeUniform();
-    gpu_weight->copyFrom(*cpu_weight);
+  FunctionCompare test("ContextProjectionForward",
+                       FuncConfig()
+                           .set("context_length", context_length)
+                           .set("context_start", context_start)
+                           .set("begin_pad", std::max(0, -context_start)));
+
+  // prepare input arguments
+  test.addSequence(SequenceIdArg(TensorShape{batch_size}));
+  test.addInputs(
+      SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batch_size, input_dim}));
+  if (is_padding) {  // weight
+    test.addInputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{pad, input_dim}));
   }
-  IVectorPtr cpu_seq;
-  generateSequenceStartPositions(batch_size, cpu_seq);
-  IVectorPtr gpu_seq = IVector::create(cpu_seq->getSize(), true);
-  gpu_seq->copyFrom(*cpu_seq);
-
-  CpuMatrix cpu_out(batch_size, input_dim * context_length);
-  GpuMatrix gpu_out(batch_size, input_dim * context_length);
-  cpu_out.randomizeUniform();
-  gpu_out.copyFrom(cpu_out);
-
-  BufferArgs cpu_inputs;
-  BufferArgs cpu_outputs;
-  cpu_inputs.addArg(cpu_in, *cpu_seq);
-  if (cpu_weight) {
-    cpu_inputs.addArg(*cpu_weight, *cpu_seq);
-  }
-  cpu_outputs.addArg(cpu_out, *cpu_seq, ADD_TO);
-
-  compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs);
+  test.addOutputs(
+      SequenceArg(VALUE_TYPE_FLOAT,
+                  TensorShape{batch_size, input_dim * context_length}),
+      ADD_TO);
 
-  BufferArgs gpu_inputs;
-  BufferArgs gpu_outputs;
-  gpu_inputs.addArg(gpu_in, *gpu_seq);
-  if (gpu_weight) {
-    gpu_inputs.addArg(*gpu_weight, *gpu_seq);
-  }
-  gpu_outputs.addArg(gpu_out, *gpu_seq, ADD_TO);
-
-  compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs);
-
-  autotest::TensorCheckEqual(cpu_out, gpu_out);
+  // run Function
+  test.run();
 }
 
 void testMatrixProjectionBackward(int context_start,
@@ -88,63 +59,31 @@ void testMatrixProjectionBackward(int context_start,
                std::max(0, (int)(context_start + context_length - 1));
   if (pad == 0) is_padding = false;
 
-  FunctionCompare compare("ContextProjectionBackward",
-                          FuncConfig()
-                              .set("context_length", context_length)
-                              .set("context_start", context_start)
-                              .set("begin_pad", std::max(0, -context_start))
-                              .set("is_padding", is_padding)
-                              .set("total_pad", pad));
-
-  CpuMatrix cpu_in_grad(batch_size, input_dim);
-  cpu_in_grad.randomizeUniform();
-  GpuMatrix gpu_in_grad(batch_size, input_dim);
-  gpu_in_grad.copyFrom(cpu_in_grad);
-
-  CpuMatrix cpu_out_grad(batch_size, input_dim * context_length);
-  cpu_out_grad.randomizeUniform();
-  GpuMatrix gpu_out_grad(batch_size, input_dim * context_length);
-  gpu_out_grad.copyFrom(cpu_out_grad);
-
-  IVectorPtr cpu_seq;
-  generateSequenceStartPositions(batch_size, cpu_seq);
-  IVectorPtr gpu_seq = IVector::create(cpu_seq->getSize(), true);
-  gpu_seq->copyFrom(*cpu_seq);
-
-  auto cpu_w_grad =
-      is_padding ? std::make_shared<CpuMatrix>(pad, input_dim) : nullptr;
-  auto gpu_w_grad =
-      is_padding ? std::make_shared<GpuMatrix>(pad, input_dim) : nullptr;
-  if (is_padding) {
-    cpu_w_grad->randomizeUniform();
-    gpu_w_grad->copyFrom(*cpu_w_grad);
+  FunctionCompare test("ContextProjectionBackward",
+                       FuncConfig()
+                           .set("context_length", context_length)
+                           .set("context_start", context_start)
+                           .set("begin_pad", std::max(0, -context_start))
+                           .set("is_padding", is_padding)
+                           .set("total_pad", pad));
+
+  // prepare input arguments
+  test.addSequence(SequenceIdArg(TensorShape{batch_size}));
+  test.addInputs(SequenceArg(
+      VALUE_TYPE_FLOAT, TensorShape{batch_size, input_dim * context_length}));
+  test.addOutputs(
+      SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batch_size, input_dim}),
+      ADD_TO);
+  if (is_padding) {  // weight
+    test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{pad, input_dim}),
+                    ADD_TO);
   }
 
-  BufferArgs cpu_inputs;
-  BufferArgs cpu_outputs;
-  cpu_inputs.addArg(cpu_out_grad, *cpu_seq);
-  cpu_outputs.addArg(cpu_in_grad, *cpu_seq, ADD_TO);
-  cpu_outputs.addArg(
-      cpu_w_grad ? *cpu_w_grad : CpuMatrix(nullptr, 0, input_dim), ADD_TO);
-
-  compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs);
-
-  BufferArgs gpu_inputs;
-  BufferArgs gpu_outputs;
-  gpu_inputs.addArg(gpu_out_grad, *gpu_seq);
-  gpu_outputs.addArg(gpu_in_grad, *gpu_seq, ADD_TO);
-  gpu_outputs.addArg(
-      gpu_w_grad ? *gpu_w_grad : GpuMatrix(nullptr, 0, input_dim), ADD_TO);
-
-  compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs);
-
-  autotest::TensorCheckErr(cpu_in_grad, gpu_in_grad);
-  if (is_padding) {
-    autotest::TensorCheckErr(*cpu_w_grad, *gpu_w_grad);
-  }
+  // run Function
+  test.run();
 }
 
-TEST(ContextProjection, projection) {
+TEST(ContextProjection, Projection) {
   for (auto context_start : {-5, -3, -1, 0, 3}) {
     for (auto context_length : {1, 2, 5, 7}) {
       for (auto trainable_padding : {false, true}) {
diff --git a/paddle/function/FunctionTest.h b/paddle/function/FunctionTest.h
index 00f59f97d4..076aace14c 100644
--- a/paddle/function/FunctionTest.h
+++ b/paddle/function/FunctionTest.h
@@ -69,6 +69,54 @@ public:
         gpuMemory_.back()->getBuf(), input.valueType(), input.shape()));
   }
 
+  // assume one copy of sequence is shared by different SequenceArgs
+  void addSequence(const SequenceIdArg& input) {
+    CHECK_EQ(input.shape().ndims(), 1UL);
+    size_t batchSize = input.shape()[0];
+    size_t numSeqs = batchSize / 10 + 1;
+    size_t sizeId = (numSeqs + 1) * sizeOfValuType(VALUE_TYPE_INT32);
+    cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(sizeId));
+    gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(sizeId));
+    cpuSeq_ = std::make_shared<SequenceIdArg>(cpuMemory_.back()->getBuf(),
+                                              TensorShape{numSeqs + 1});
+    gpuSeq_ = std::make_shared<SequenceIdArg>(gpuMemory_.back()->getBuf(),
+                                              TensorShape{numSeqs + 1});
+    /// init sequence Id
+    initArg(*cpuSeq_, batchSize);
+
+    // todo(tianbing), delete it
+    CHECK_EQ(cpuSeq_->shape().getElements(), cpuSeq_->numSeqs() + 1);
+
+    CpuIVector cpuSeq(cpuSeq_->shape().getElements(), (int*)cpuSeq_->data());
+    GpuIVector gpuSeq(gpuSeq_->shape().getElements(), (int*)gpuSeq_->data());
+    gpuSeq.copyFrom(cpuSeq);
+  }
+
+  void addInputs(const SequenceArg& input) {
+    CHECK_EQ(input.shape().ndims(), 2UL);
+    size_t batchSize = input.shape()[0];
+    if (!cpuSeq_ || !gpuSeq_) {  // sequence not exist
+      addSequence(SequenceIdArg(TensorShape{batchSize}));
+    }
+
+    size_t size =
+        input.shape().getElements() * sizeOfValuType(input.valueType());
+    cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));
+    gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size));
+
+    /// SequenceArg
+    cpuInputs_.emplace_back(
+        std::make_shared<SequenceArg>(cpuMemory_.back()->getBuf(),
+                                      input.valueType(),
+                                      input.shape(),
+                                      *cpuSeq_));
+    gpuInputs_.emplace_back(
+        std::make_shared<SequenceArg>(gpuMemory_.back()->getBuf(),
+                                      input.valueType(),
+                                      input.shape(),
+                                      *gpuSeq_));
+  }
+
   // output need only contains shape, do not contains data.
   void addOutputs(const BufferArg& output, ArgType argType = ASSIGN_TO) {
     size_t size =
@@ -86,6 +134,7 @@ public:
                                     output.valueType(),
                                     output.shape(),
                                     argType));
+<<<<<<< HEAD
   }
 
   /// add and init output sparse matrix
@@ -116,24 +165,31 @@ public:
         std::make_shared<SparseMatrixArg>(*gpuSparse_, argType));
   }
 
-  void addInputs(const SequenceArg& input) {
-    size_t batchSize = input.shape()[0];
-    size_t numSeqs = batchSize / 10 + 1;
-
-    size_t sizeId = (numSeqs + 1) * sizeOfValuType(VALUE_TYPE_INT32);
-    cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(sizeId));
-    gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(sizeId));
-
-    TensorShape seqsId({numSeqs + 1});
-    // void* cpuBuffer = cpuMemory_.back()->getBuf();
-    // void* gpuBuffer = gpuMemory_.back()->getBuf();
+  void addOutputs(const SequenceArg& output, ArgType argType = ASSIGN_TO) {
+    CHECK_EQ(output.shape().ndims(), 2UL);
+    size_t batchSize = output.shape()[0];
 
+    if (!cpuSeq_ || !gpuSeq_) {  // sequence not exist
+      addSequence(SequenceIdArg(TensorShape{batchSize}));
+    }
     size_t size =
-        input.shape().getElements() * sizeOfValuType(input.valueType());
+        output.shape().getElements() * sizeOfValuType(output.valueType());
     cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));
     gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size));
 
-    // TODO: need be implemented.
+    /// SequenceArg
+    cpuOutputs_.emplace_back(
+        std::make_shared<SequenceArg>(cpuMemory_.back()->getBuf(),
+                                      output.valueType(),
+                                      output.shape(),
+                                      *cpuSeq_,
+                                      argType));
+    gpuOutputs_.emplace_back(
+        std::make_shared<SequenceArg>(gpuMemory_.back()->getBuf(),
+                                      output.valueType(),
+                                      output.shape(),
+                                      *gpuSeq_,
+                                      argType));
   }
 
   void addInputs(const SparseMatrixArg& input) {
@@ -193,14 +249,44 @@ public:
   std::shared_ptr<FunctionBase> getGpuFunction() const { return gpuFunc_; }
 
 protected:
+  // only init cpu argument, gpu argument copy from cpu argument.
+  void initArg(BufferArg& arg) {
+    CpuVector vector(arg.shape().getElements(), (real*)arg.data());
+    vector.uniform(0.001, 1);
+  }
+
+  void initArg(SequenceArg& arg) {
+    /// init only matrix
+    CpuVector vector(arg.shape().getElements(), (real*)arg.data());
+    vector.uniform(0.001, 1);
+  }
+
+  void initArg(SequenceIdArg& arg, size_t batchSize) {
+    size_t numSeqs = arg.numSeqs();
+    int* buf = reinterpret_cast<int*>(arg.data());
+    int pos = 0;
+    size_t maxLen = 2 * batchSize / numSeqs;
+    for (int i = 0; i < (int)numSeqs; ++i) {
+      int len = 1 + uniformRandom(std::min<int64_t>(
+                        maxLen, batchSize - pos - numSeqs + i));
+      buf[i] = pos;
+      pos += len;
+      VLOG(1) << " len=" << len;
+    }
+    buf[numSeqs] = batchSize;
+  }
+
   void initInputs() {
     for (size_t i = 0; i < cpuInputs_.size(); i++) {
       if (cpuInputs_[i]->isSparseArg()) {
         continue;  /// sparse matrix already init
       }
 
-      initArg(*cpuInputs_[i]);
-
+      if (cpuInputs_[i]->isSequenceArg()) {
+        initArg(dynamic_cast<SequenceArg&>(*cpuInputs_[i]));
+      } else {
+        initArg(*cpuInputs_[i]);
+      }
       // TODO: Need a BufferCopy used to copy from one BufferArg to another.
       CpuVector cpuVector(cpuInputs_[i]->shape().getElements(),
                           (real*)cpuInputs_[i]->data());
@@ -217,7 +303,11 @@ protected:
         continue;  /// sparse matrix already init
       }
 
-      initArg(*cpuOutputs_[i]);
+      if (cpuOutputs_[i]->isSequenceArg()) {
+        initArg(dynamic_cast<SequenceArg&>(*cpuOutputs_[i]));
+      } else {
+        initArg(*cpuOutputs_[i]);
+      }
 
       // TODO: Need a BufferCopy used to copy from one BufferArg to another.
       CpuVector cpuVector(cpuOutputs_[i]->shape().getElements(),
@@ -241,28 +331,6 @@ protected:
     }
   }
 
-  // only init cpu argument, gpu argument copy from cpu argument.
-  void initArg(BufferArg& arg) {
-    CpuVector vector(arg.shape().getElements(), (real*)arg.data());
-    vector.uniform(0.001, 1);
-  }
-
-  void initArg(SequenceIdArg& arg, size_t batchSize) {
-    size_t numSeqs = arg.numSeqs();
-    int* buf = reinterpret_cast<int*>(arg.data());
-    int pos = 0;
-    size_t maxLen = 2 * batchSize / numSeqs;
-    for (int i = 0; i < (int)numSeqs; ++i) {
-      int len = uniformRandom(
-                    std::min<int64_t>(maxLen, batchSize - pos - numSeqs + i)) +
-                1;
-      buf[i] = pos;
-      pos += len;
-      VLOG(1) << " len=" << len;
-    }
-    buf[numSeqs] = batchSize;
-  }
-
 protected:
   std::shared_ptr<FunctionBase> cpuFunc_;
   std::shared_ptr<FunctionBase> gpuFunc_;
@@ -274,6 +342,8 @@ protected:
   std::vector<BufferArgPtr> gpuOutputs_;
   std::shared_ptr<CpuSparseMatrix> cpuSparse_;
   std::shared_ptr<GpuSparseMatrix> gpuSparse_;
+  std::shared_ptr<SequenceIdArg> cpuSeq_;
+  std::shared_ptr<SequenceIdArg> gpuSeq_;
 };
 
 }  // namespace paddle
-- 
GitLab