From 26bc5b12596c945956f7a6b003712805e579a36d Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Tue, 8 Aug 2017 18:48:11 +0800
Subject: [PATCH] add implementations.

---
 paddle/gserver/layers/KmaxSeqScoreLayer.cpp   |   5 +
 paddle/gserver/layers/SequenceSliceLayer.cpp  | 228 ++++++++++++++++++
 .../gserver/layers/SubNestedSequenceLayer.cpp |  16 +-
 .../gserver/tests/test_SeqSliceLayerGrad.cpp  |  25 +-
 paddle/parameter/Argument.cpp                 |  27 ++-
 5 files changed, 278 insertions(+), 23 deletions(-)
 create mode 100644 paddle/gserver/layers/SequenceSliceLayer.cpp
diff --git a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp
index 8ce591d4762..e96fd61fc1e 100644
--- a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp
+++ b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp
@@ -97,6 +97,11 @@ void KmaxSeqScoreLayer::forward(PassType passType) {
     scores_ = inputScore;
   }
 
+  // TODO(caoying)
+  // Here selSubSeqIdx is automatically converted from real to int
+  // This is very dangerous if user fill this matrix himself, invalid data may
+  // occur. The selected indices should be stored in
+  // CpuSparseMatrix with SparseValueType set to NO_VALUE.
   Matrix::resizeOrCreate(
       output_.value,
       input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(),
diff --git a/paddle/gserver/layers/SequenceSliceLayer.cpp b/paddle/gserver/layers/SequenceSliceLayer.cpp
new file mode 100644
index 00000000000..410aba663e0
--- /dev/null
+++ b/paddle/gserver/layers/SequenceSliceLayer.cpp
@@ -0,0 +1,228 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "Layer.h"
+#include "paddle/math/Matrix.h"
+#include "paddle/math/Vector.h"
+#include "paddle/utils/Logging.h"
+#include "paddle/utils/Stat.h"
+
+namespace paddle {
+
+class SequenceSliceLayer : public Layer {
+public:
+  explicit SequenceSliceLayer(const LayerConfig& config) : Layer(config) {}
+
+  bool init(const LayerMap& layerMap,
+            const ParameterMap& parameterMap) override;
+
+  void forward(PassType passType) override;
+  void backward(const UpdateCallback& callback = nullptr) override;
+
+private:
+  // TODO(caoying)
+  // Here selSubSeqIdx is automatically converted from real to int
+  // This is very dangerous if user fill this matrix himself, invalid data
+  // may occur. The selected indices should be stored in CpuSparseMatrix
+  // with SparseValueType set to NO_VALUE.
+  MatrixPtr startIdsOnCpu_;
+  MatrixPtr endIdsOnCpu_;
+
+  std::vector<int> selectedRows_;
+  IVectorPtr rowIndice_;
+  std::vector<std::vector<int>> inputSeqInfoVec_;
+  std::vector<int> outSubSeqStartPos_;
+  std::vector<int> outSeqStartPos_;
+
+  void checkInputs();
+  void copySliceIdsToCpu();
+  void calSelectedRows(const MatrixPtr starts, const MatrixPtr ends);
+};
+
+REGISTER_LAYER(seq_slice, SequenceSliceLayer);
+
+bool SequenceSliceLayer::init(const LayerMap& layerMap,
+                              const ParameterMap& parameterMap) {
+  /* Initialize the basic parent class */
+  Layer::init(layerMap, parameterMap);
+  CHECK_GE(inputLayers_.size(), 2U);
+  CHECK_LE(inputLayers_.size(), 3U);
+
+  setNeedSequenceInfo(false);
+  return true;
+}
+
+void SequenceSliceLayer::checkInputs() {
+  const Argument& inputSeq = getInput(0);
+  CHECK(inputSeq.hasSeq()) << "The first input of sequence slic layer "
+                           << "must be a sequence.";
+  // Check inputs
+  const MatrixPtr indices1 = getInputValue(1);
+  CHECK_EQ(indices1->getHeight(),
+           inputSeq.hasSubseq() ? inputSeq.getNumSubSequences()
+                                : inputSeq.getNumSequences())
+      << "Height of the second input should be equal to number of sequence "
+      << "in the first input.";
+  if (inputLayers_.size() == 3) {
+    const MatrixPtr indices2 = getInputValue(2);
+    CHECK_EQ(indices2->getHeight(), indices1->getHeight())
+        << "start indices and end indices should have the same height.";
+    CHECK_EQ(indices2->getWidth(), indices1->getWidth())
+        << "start indices and end indices should have the same Width.";
+  }
+}
+
+void SequenceSliceLayer::copySliceIdsToCpu() {
+  if (!useGpu_) {
+    if (inputLayers_.size() == 2U) {
+      if (config_.select_first()) {
+        startIdsOnCpu_ = getInputValue(1);
+        endIdsOnCpu_ = nullptr;
+      } else {
+        startIdsOnCpu_ = nullptr;
+        endIdsOnCpu_ = getInputValue(1);
+      }
+    } else if (inputLayers_.size() == 3U) {
+      startIdsOnCpu_ = getInputValue(1);
+      endIdsOnCpu_ = getInputValue(2);
+    }
+    return;
+  }
+
+  const MatrixPtr indices1 = getInputValue(1);
+  if (inputLayers_.size() == 2U) {
+    if (config_.select_first()) {
+      Matrix::resizeOrCreate(startIdsOnCpu_,
+                             indices1->getHeight(),
+                             indices1->getWidth(),
+                             false /* trans */,
+                             false /* useGpu */);
+      startIdsOnCpu_->copyFrom(*indices1);
+      endIdsOnCpu_ = nullptr;
+    } else {
+      Matrix::resizeOrCreate(endIdsOnCpu_,
+                             indices1->getHeight(),
+                             indices1->getWidth(),
+                             false /* trans */,
+                             false /* useGpu */);
+      endIdsOnCpu_->copyFrom(*indices1);
+      startIdsOnCpu_ = nullptr;
+    }
+  } else if (inputLayers_.size() == 3U) {
+    Matrix::resizeOrCreate(startIdsOnCpu_,
+                           indices1->getHeight(),
+                           indices1->getWidth(),
+                           false /* trans */,
+                           false /* useGpu */);
+    startIdsOnCpu_->copyFrom(*indices1);
+
+    const MatrixPtr indices2 = getInputValue(2);
+    Matrix::resizeOrCreate(endIdsOnCpu_,
+                           indices2->getHeight(),
+                           indices2->getWidth(),
+                           false /* trans */,
+                           false /* useGpu */);
+    endIdsOnCpu_->copyFrom(*indices2);
+  }
+}
+
+void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts,
+                                         const MatrixPtr ends) {
+  outSeqStartPos_.resize(1, 0);
+  outSubSeqStartPos_.resize(1, 0);
+  selectedRows_.clear();
+
+  size_t beamSize = starts ? starts->getWidth() : ends->getWidth();
+  // iterate over sequence
+  size_t rowIdx = 0;
+  for (size_t i = 0; i < inputSeqInfoVec_.size(); ++i) {
+    // iterate over sub-sequence in a sequence
+    for (size_t j = 0; j < inputSeqInfoVec_[i].size() - 1; ++j) {
+      // iterate over each index for slicing.
+      for (size_t k = 0; k < beamSize; ++k) {
+        if (starts) {
+          if (starts->getElement(rowIdx, k) == -1.) break;
+        } else if (ends->getElement(rowIdx, k) == -1.)
+          break;
+
+        int begPos = inputSeqInfoVec_[i][j];
+        if (starts) begPos += starts->getElement(rowIdx, k);
+
+        int endPos = inputSeqInfoVec_[i][j + 1] - 1;
+        if (ends) endPos = inputSeqInfoVec_[i][j] + ends->getElement(rowIdx, k);
+
+        int seqLen = endPos - begPos + 1;
+        CHECK(seqLen);
+        for (int m = begPos; m <= endPos; ++m) selectedRows_.push_back(m);
+        inputSeqInfoVec_.size() > 1
+            ? outSubSeqStartPos_.push_back(outSubSeqStartPos_.back() + seqLen)
+            : outSeqStartPos_.push_back(outSeqStartPos_.back() + seqLen);
+      }
+      rowIdx++;
+    }
+    if (inputSeqInfoVec_.size() > 1)
+      outSeqStartPos_.push_back(outSubSeqStartPos_.back());
+  }
+
+  if (useGpu_) {
+    rowIndice_ = IVector::create(selectedRows_.size(), useGpu_);
+    rowIndice_->copyFrom(selectedRows_.data(), selectedRows_.size());
+  } else {
+    rowIndice_ =
+        IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_);
+  }
+
+  // create the sequence information for the output.
+  ICpuGpuVector::resizeOrCreate(
+      output_.sequenceStartPositions, outSeqStartPos_.size(), false);
+  output_.sequenceStartPositions->copyFrom(
+      outSeqStartPos_.data(), outSeqStartPos_.size(), false);
+
+  if (inputSeqInfoVec_.size() > 1) {
+    ICpuGpuVector::resizeOrCreate(
+        output_.subSequenceStartPositions, outSubSeqStartPos_.size(), false);
+    output_.subSequenceStartPositions->copyFrom(
+        outSubSeqStartPos_.data(), outSubSeqStartPos_.size(), false);
+  }
+}
+
+void SequenceSliceLayer::forward(PassType passType) {
+  Layer::forward(passType);
+  checkInputs();
+
+  const Argument& inputSeq = getInput(0);
+  inputSeqInfoVec_.clear();
+  Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions,
+                              inputSeq.subSequenceStartPositions,
+                              inputSeqInfoVec_);
+  copySliceIdsToCpu();
+
+  // calculate the selected row indices in a batch,
+  // and build the output sequence information.
+  calSelectedRows(startIdsOnCpu_ ? startIdsOnCpu_ : nullptr,
+                  endIdsOnCpu_ ? endIdsOnCpu_ : nullptr);
+
+  resetOutput(selectedRows_.size(), getSize());
+
+  getOutputValue()->selectRows(*getInputValue(0), *rowIndice_);
+}
+
+void SequenceSliceLayer::backward(const UpdateCallback& callback) {
+  MatrixPtr inputSeqGrad = getInputGrad(0);
+  MatrixPtr outputGrad = getOutputGrad();
+
+  outputGrad->addToRows(*inputSeqGrad, *rowIndice_);
+}
+
+}  // namespace paddle
diff --git a/paddle/gserver/layers/SubNestedSequenceLayer.cpp b/paddle/gserver/layers/SubNestedSequenceLayer.cpp
index 76f587fff76..0db03002702 100644
--- a/paddle/gserver/layers/SubNestedSequenceLayer.cpp
+++ b/paddle/gserver/layers/SubNestedSequenceLayer.cpp
@@ -52,11 +52,10 @@ private:
    *   ]
    *
    * ths output is saved to private member rowIndice_;
-   * [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
-   *  16,17,18,19,20,21,22,23,24,25,26,27]
+   * [0,1,2,3,4,5,6,7,8,9,15,16,17,18,19,20,21,23,24,25,26,27]
    */
 
-  void calSelectedCols(const MatrixPtr selectedIndices,
+  void calSelectedRows(const MatrixPtr selectedIndices,
                        const std::vector<std::vector<int>>& inputSeqInfo);
 
   // if the second input of this layer is on GPU memory, copy it to CPU memory.
@@ -67,7 +66,7 @@ private:
   std::vector<std::vector<int>> inputSeqInfoVec_;
 
   // the final selected row indices in a batch,
-  // rowIdx_ and selectedRows_ actually share a same memory.
+  // rowIndice_ and selectedRows_ actually share a same memory.
   IVectorPtr rowIndice_;
   std::vector<int> selectedRows_;
 };
@@ -83,7 +82,7 @@ bool SubNestedSequenceLayer::init(const LayerMap& layerMap,
   return true;
 }
 
-void SubNestedSequenceLayer::calSelectedCols(
+void SubNestedSequenceLayer::calSelectedRows(
     const MatrixPtr selectedIndices,
     const std::vector<std::vector<int>>& inputSeqInfo) {
   selectedRows_.clear();
@@ -96,6 +95,11 @@ void SubNestedSequenceLayer::calSelectedCols(
   for (size_t i = 0; i < seqNum; ++i) {
     for (size_t j = 0; j < beamSize; ++j) {
       if (selectedIndices->getElement(i, j) == -1.) break;
+      // TODO(caoying)
+      // Here selSubSeqIdx is automatically converted from real to int
+      // This is very dangerous if user fill this matrix himself, invalid data
+      // may occur. The selected indices should be stored in
+      // CpuSparseMatrix with SparseValueType set to NO_VALUE.
       int selSubSeqIdx = selectedIndices->getElement(i, j);
       CHECK_GT(inputSeqInfoVec_[i].size() - 1, selSubSeqIdx);
 
@@ -160,7 +164,7 @@ void SubNestedSequenceLayer::forward(PassType passType) {
   Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions,
                               inputSeq.subSequenceStartPositions,
                               inputSeqInfoVec_);
-  calSelectedCols(selIdsCpu_, inputSeqInfoVec_);
+  calSelectedRows(selIdsCpu_, inputSeqInfoVec_);
 
   resetOutput(selectedRows_.size(), getSize());
   getOutputValue()->selectRows(*getInputValue(0), *rowIndice_);
diff --git a/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
index e456dd5db7f..d560ca650bc 100644
--- a/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
+++ b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
@@ -26,9 +26,9 @@ using namespace std;     // NOLINT
 DECLARE_int32(gpu_id);
 DECLARE_bool(thread_local_rand_use_global_seed);
 
-const int MAX_SEQ_NUM = 5;
-const int MAX_SEQ_LEN = 5;
-const int MAX_BEAM_SIZE = 3;
+const int MAX_SEQ_NUM = 17;
+const int MAX_SEQ_LEN = 23;
+const int MAX_BEAM_SIZE = 13;
 
 vector<real> randSampling(real range, int n) {
   CHECK_GE(range, n);
@@ -46,8 +46,7 @@ void genSeqInfo(vector<int>& seqStartPos, vector<int>& subSeqStartPos) {
   seqStartPos.resize(1, 0);
   subSeqStartPos.resize(1, 0);
 
-  // srand((size_t)(time(NULL)));
-  srand(1);
+  srand((size_t)(time(NULL)));
   int seqNum = 1 + (rand() % MAX_SEQ_NUM);
   for (int i = 0; i < seqNum; ++i) {
     int subSeqNum = 1 + (rand() % MAX_SEQ_NUM);
@@ -105,7 +104,7 @@ void genTestData(vector<int>& seqStartPos,
                  vector<vector<real>>& starts,
                  vector<vector<real>>& ends,
                  bool hasSubseq) {
-  size_t beamSize = MAX_BEAM_SIZE;
+  size_t beamSize = 1 + (rand() % MAX_BEAM_SIZE);
   genSeqInfo(seqStartPos, subSeqStartPos);
 
   genStarts(hasSubseq ? subSeqStartPos : seqStartPos, starts, beamSize);
@@ -167,16 +166,21 @@ void testSeqSliceLayer(bool hasSubseq,
     config.inputDefs.push_back(
         {INPUT_SELF_DEFINE_DATA, "starts", startMatrixPtr});
     config.layerConfig.add_inputs();
+    config.layerConfig.set_select_first(true);
   }
 
   // add end indices
   if (ends.size()) {
     vector<real> endsToVec;
     flatten2dVector(ends, endsToVec);
+
     MatrixPtr endMatrixPtr =
         Matrix::create(ends.size(), ends[0].size(), false, false);
+    endMatrixPtr->copyFrom(endsToVec.data(), endsToVec.size());
+
     config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, "ends", endMatrixPtr});
     config.layerConfig.add_inputs();
+    config.layerConfig.set_select_first(false);
   }
 
   testLayerGrad(config, "seq_slice", /*batchSize*/ 100, false, useGpu, false);
@@ -188,10 +192,15 @@ TEST(Layer, SeqSliceLayer) {
   vector<vector<real>> starts;
   vector<vector<real>> ends;
 
+  std::vector<bool> mode = {false};
+#ifndef PADDLE_ONLY_CPU
+  mode.push_back(true);
+#endif
   genSeqInfo(seqStartPos, subSeqStartPos);
-  for (bool hasSubseq : {false, true}) {
+  for (bool hasSubseq : {true, false}) {
+    LOG(INFO) << "hasSubSeq : " << hasSubseq;
     genTestData(seqStartPos, subSeqStartPos, starts, ends, hasSubseq);
-    for (bool useGpu : {false, true}) {
+    for (bool useGpu : mode) {
       vector<vector<real>> tmp;
       testSeqSliceLayer(
           hasSubseq, useGpu, seqStartPos, subSeqStartPos, tmp, ends);
diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp
index 0547ac93cd1..06f7e5245fb 100644
--- a/paddle/parameter/Argument.cpp
+++ b/paddle/parameter/Argument.cpp
@@ -670,19 +670,28 @@ void Argument::reorganizeSeqInfo(
     const ICpuGpuVectorPtr seqStartPos,
     const ICpuGpuVectorPtr subSeqStartPos,
     std::vector<std::vector<int>>& reorganizedSeqInfo) {
-  int* seqStarts = seqStartPos->getMutableData(false);
-  int* subSeqStarts = subSeqStartPos->getMutableData(false);
+  CHECK(seqStartPos);
 
   int seqNum = seqStartPos->getSize() - 1;
-  reorganizedSeqInfo.resize(seqNum, std::vector<int>());
-  int seqIdx = 0;
-  for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) {
-    reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]);
-    if (subSeqStarts[i] == seqStarts[seqIdx + 1]) {
-      seqIdx++;
-      if (seqIdx == seqNum) return;
+  int* seqStarts = seqStartPos->getMutableData(false);
+
+  if (subSeqStartPos) {
+    int* subSeqStarts = subSeqStartPos->getMutableData(false);
+    reorganizedSeqInfo.resize(seqNum, std::vector<int>());
+    int seqIdx = 0;
+    for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) {
       reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]);
+      if (subSeqStarts[i] == seqStarts[seqIdx + 1]) {
+        seqIdx++;
+        if (seqIdx == seqNum) return;
+        reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]);
+      }
     }
+  } else {
+    reorganizedSeqInfo.resize(1, std::vector<int>(seqNum + 1, 0));
+    memcpy(reorganizedSeqInfo[0].data(),
+           seqStarts,
+           sizeof(int) * seqStartPos->getSize());
   }
 }
 
-- 
GitLab