follow comments.

42c102a0 · caoying03 · ffafc5c9 · 42c102a0 · 42c102a0 · 42c102a0
5 changed file
--- a/paddle/gserver/layers/PrintLayer.cpp
+++ b/paddle/gserver/layers/PrintLayer.cpp
@@ -29,7 +29,7 @@ public:
      vals.push_back(s.str());
    }
    size_t pos = 0;
-    int i = 0;
+    size_t i = 0;
    std::ostringstream s;
    const std::string& format = config_.user_arg();
    while (true) {

--- a/paddle/gserver/layers/SubNestedSequenceLayer.cpp
+++ b/paddle/gserver/layers/SubNestedSequenceLayer.cpp
@@ -31,22 +31,42 @@ public:
  void backward(const UpdateCallback& callback = nullptr) override;
 private:
-  void reorganizeSeqInfo(const ICpuGpuVectorPtr seqStartPos,
+  /*
-                         const ICpuGpuVectorPtr subSeqStartPos);
+   * This functions generates the indices of rows in a batch according to the
-  void calSelectedCols(const MatrixPtr selectedIndices,
+   * indices of selected sub-sequence in each sequence.
-                       const std::vector<std::vector<int>> inputSeqInfo);
+   *
-  void buildOutputSeqInfo();
+   * Examples:
+   * selectedIndices:
+   *   [
+   *     [0, 1, -1],
+   *     [0, 1, 2],
+   *     [0, -1, -1],
+   *     [0, 2, 3],
+   *   ]
+   * inputSeqInfo:
+   *   [
+   *     [0,3,4],
+   *     [4,5,7,10,15],
+   *     [15,20],
+   *     [20,22,23,25,28]
+   *   ]
+   *
+   * ths output is saved to private member rowIndice_;
+   * [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
+   *  16,17,18,19,20,21,22,23,24,25,26,27]
+   */
-  std::vector<int> outSeqStartInfo_;
+  void calSelectedCols(const MatrixPtr selectedIndices,
-  std::vector<int> outSubSeqStartInfo_;
+                       const std::vector<std::vector<int>>& inputSeqInfo);
  // if the second input of this layer is on GPU memory, copy it to CPU memory.
  MatrixPtr selIdsCpu_;
-  // reorganize sequenceStartPositions and subSequenceStartPositions altogether
+  // reorganized sequenceStartPositions and subSequenceStartPositions
  // into a 2d vector to facilitate the sequence selection process.
-  std::vector<std::vector<int>> inputSeqInfo_;
+  std::vector<std::vector<int>> inputSeqInfoVec_;
-  // the final seleted row indices in a batch,
+  // the final selected row indices in a batch,
  // rowIdx_ and selectedRows_ actually share a same memory.
  IVectorPtr rowIndice_;
  std::vector<int> selectedRows_;
@@ -63,30 +83,13 @@ bool SubNestedSequenceLayer::init(const LayerMap& layerMap,
  return true;
 }
-void SubNestedSequenceLayer::reorganizeSeqInfo(
-    const ICpuGpuVectorPtr seqStartPos, const ICpuGpuVectorPtr subSeqStartPos) {
-  int* seqStarts = seqStartPos->getMutableData(false);
-  int* subSeqStarts = subSeqStartPos->getMutableData(false);
-  int seqNum = seqStartPos->getSize() - 1;
-  inputSeqInfo_.resize(seqNum, std::vector<int>());
-  int seqIdx = 0;
-  for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) {
-    inputSeqInfo_[seqIdx].push_back(subSeqStarts[i]);
-    if (subSeqStarts[i] == seqStarts[seqIdx + 1]) {
-      seqIdx++;
-      if (seqIdx == seqNum) return;
-      inputSeqInfo_[seqIdx].push_back(subSeqStarts[i]);
-    }
-  }
-}
 void SubNestedSequenceLayer::calSelectedCols(
    const MatrixPtr selectedIndices,
-    const std::vector<std::vector<int>> inputSeqInfo) {
+    const std::vector<std::vector<int>>& inputSeqInfo) {
  selectedRows_.clear();
-  outSubSeqStartInfo_.resize(1, 0);
-  outSeqStartInfo_.resize(1, 0);
+  std::vector<int> outSeqStartInfo(1, 0);
+  std::vector<int> outSubSeqStartInfo(1, 0);
  size_t seqNum = selectedIndices->getHeight();
  size_t beamSize = selectedIndices->getWidth();
@@ -94,30 +97,35 @@ void SubNestedSequenceLayer::calSelectedCols(
    for (size_t j = 0; j < beamSize; ++j) {
      if (selectedIndices->getElement(i, j) == -1.) break;
      int selSubSeqIdx = selectedIndices->getElement(i, j);
-      CHECK_GT(inputSeqInfo_[i].size() - 1, selSubSeqIdx);
+      CHECK_GT(inputSeqInfoVec_[i].size() - 1, selSubSeqIdx);
-      size_t subSeqLen =
+      size_t subSeqLen = inputSeqInfoVec_[i][selSubSeqIdx + 1] -
-          inputSeqInfo_[i][selSubSeqIdx + 1] - inputSeqInfo_[i][selSubSeqIdx];
+                         inputSeqInfoVec_[i][selSubSeqIdx];
      for (size_t k = 0; k < subSeqLen; ++k)
-        selectedRows_.push_back(inputSeqInfo_[i][selSubSeqIdx] + k);
+        selectedRows_.push_back(inputSeqInfoVec_[i][selSubSeqIdx] + k);
-      outSubSeqStartInfo_.push_back(outSubSeqStartInfo_.back() + subSeqLen);
+      outSubSeqStartInfo.push_back(outSubSeqStartInfo.back() + subSeqLen);
    }
-    outSeqStartInfo_.push_back(outSubSeqStartInfo_.back());
+    outSeqStartInfo.push_back(outSubSeqStartInfo.back());
  }
-}
-void SubNestedSequenceLayer::buildOutputSeqInfo() {
+  if (useGpu_) {
-  Argument& output = getOutput();
+    rowIndice_ = IVector::create(selectedRows_.size(), useGpu_);
+    rowIndice_->copyFrom(selectedRows_.data(), selectedRows_.size());
+  } else {
+    rowIndice_ =
+        IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_);
+  }
+  // create the sequence information for the output.
  ICpuGpuVector::resizeOrCreate(
-      output.sequenceStartPositions, outSeqStartInfo_.size(), false);
+      output_.sequenceStartPositions, outSeqStartInfo.size(), false);
-  output.sequenceStartPositions->copyFrom(
+  output_.sequenceStartPositions->copyFrom(
-      outSeqStartInfo_.data(), outSeqStartInfo_.size(), false);
+      outSeqStartInfo.data(), outSeqStartInfo.size(), false);
  ICpuGpuVector::resizeOrCreate(
-      output.subSequenceStartPositions, outSubSeqStartInfo_.size(), false);
+      output_.subSequenceStartPositions, outSubSeqStartInfo.size(), false);
-  output.subSequenceStartPositions->copyFrom(
+  output_.subSequenceStartPositions->copyFrom(
-      outSubSeqStartInfo_.data(), outSubSeqStartInfo_.size(), false);
+      outSubSeqStartInfo.data(), outSubSeqStartInfo.size(), false);
 }
 void SubNestedSequenceLayer::forward(PassType passType) {
@@ -131,7 +139,7 @@ void SubNestedSequenceLayer::forward(PassType passType) {
  if (dynamic_cast<GpuMatrix*>(selectedIndices.get())) {
    /*
-     * Currently, the second input for this layer generated by
+     * Currently, the second input for this layer is generated by
     * kmax_sequence_score_layer whose output is always stored on CPU,
     * or a data_layer which canbe on GPU.
     *
@@ -149,20 +157,12 @@ void SubNestedSequenceLayer::forward(PassType passType) {
    selIdsCpu_ = selectedIndices;
  }
-  reorganizeSeqInfo(inputSeq.sequenceStartPositions,
+  Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions,
-                    inputSeq.subSequenceStartPositions);
+                              inputSeq.subSequenceStartPositions,
-  calSelectedCols(selIdsCpu_, inputSeqInfo_);
+                              inputSeqInfoVec_);
-  resetOutput(selectedRows_.size(), getSize());
+  calSelectedCols(selIdsCpu_, inputSeqInfoVec_);
-  if (useGpu_) {
+  resetOutput(selectedRows_.size(), getSize());
-    rowIndice_ = IVector::create(selectedRows_.size(), useGpu_);
-    rowIndice_->copyFrom(selectedRows_.data(), selectedRows_.size());
-  } else {
-    rowIndice_ =
-        IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_);
-  }
-  buildOutputSeqInfo();
  getOutputValue()->selectRows(*getInputValue(0), *rowIndice_);
 }

--- a/paddle/parameter/Argument.cpp
+++ b/paddle/parameter/Argument.cpp
@@ -666,4 +666,24 @@ void Argument::subArgFrom(const Argument& input,
  }
 }
+void Argument::reorganizeSeqInfo(
+    const ICpuGpuVectorPtr seqStartPos,
+    const ICpuGpuVectorPtr subSeqStartPos,
+    std::vector<std::vector<int>>& reorganizedSeqInfo) {
+  int* seqStarts = seqStartPos->getMutableData(false);
+  int* subSeqStarts = subSeqStartPos->getMutableData(false);
+  int seqNum = seqStartPos->getSize() - 1;
+  reorganizedSeqInfo.resize(seqNum, std::vector<int>());
+  int seqIdx = 0;
+  for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) {
+    reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]);
+    if (subSeqStarts[i] == seqStarts[seqIdx + 1]) {
+      seqIdx++;
+      if (seqIdx == seqNum) return;
+      reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]);
+    }
+  }
+}
 }  // namespace paddle
--- a/paddle/parameter/Argument.h
+++ b/paddle/parameter/Argument.h
@@ -317,6 +317,30 @@ struct Argument {
   */
  void printValueString(std::ostream& stream,
                        const std::string& prefix = "") const;
+  /**
+   * @brief reorganizeSeqInfo will reorganize sequenceStartPositions and
+   * subSequenceStartPositions into a 2 dimensional arrary: reorganizedSeqInfo.
+   *
+   * @param seqStartPos: sequenceStartPositions of an Argument.
+   * @param subSeqStartPos: subSequenceStartPositions of an Argument.
+   * @param the reorganized sequence start position information.
+   *
+   * Examples:
+   * seqStartPos: [0, 4, 15, 20, 28]
+   * subSeqStartPos: [0, 3, 4, 5, 7, 10, 15, 20, 22, 23, 25, 28]
+   * reorganizedSeqInfo:
+   *   [
+   *     [0,3,4],
+   *     [4,5,7,10,15],
+   *     [15,20],
+   *     [20,22,23,25,28]
+   *   ]
+   */
+  static void reorganizeSeqInfo(
+      const ICpuGpuVectorPtr seqStartPos,
+      const ICpuGpuVectorPtr subSeqStartPos,
+      std::vector<std::vector<int>>& reorganizedSeqInfo);
 };
 }  // namespace paddle
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -6097,15 +6097,14 @@ def sub_nested_seq_layer(input, selected_indices, name=None):
    The sub_nested_seq_layer accepts two inputs: the first one is a nested
    sequence; the second one is a set of selceted indices in the nested sequence.
-    Then sub_nest_seq_layer trims the first nested sequence input according to
+    Then sub_nest_seq_layer trims the first nested sequence input according
-    the selected indices to form a new output.
+    to the selected indices to form a new output. This layer is useful in
+    beam training.
-    This layer is useful in beam training.
    The example usage is:
    .. code-block:: python
        sub_nest_seq = sub_nested_seq_layer(input=[data, selected_indices])
@@ -6118,6 +6117,7 @@ def sub_nested_seq_layer(input, selected_indices, name=None):
    :return: LayerOutput object.
    :rtype: LayerOutput
    """
    assert isinstance(input, LayerOutput), (
        'The first input of '
        'sub_nested_seq_layer must be a Paddle layer.')