add implementations.

26bc5b12 · caoying03 · 34ff7522 · 26bc5b12 · 26bc5b12 · 26bc5b12
5 changed file
--- a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp
+++ b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp
@@ -97,6 +97,11 @@ void KmaxSeqScoreLayer::forward(PassType passType) {
    scores_ = inputScore;
  }

+  // TODO(caoying)
+  // Here selSubSeqIdx is automatically converted from real to int
+  // This is very dangerous if user fill this matrix himself, invalid data may
+  // occur. The selected indices should be stored in
+  // CpuSparseMatrix with SparseValueType set to NO_VALUE.
  Matrix::resizeOrCreate(
      output_.value,
      input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(),

--- a/paddle/gserver/layers/SequenceSliceLayer.cpp
+++ b/paddle/gserver/layers/SequenceSliceLayer.cpp
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "Layer.h"
+#include "paddle/math/Matrix.h"
+#include "paddle/math/Vector.h"
+#include "paddle/utils/Logging.h"
+#include "paddle/utils/Stat.h"
+
+namespace paddle {
+
+class SequenceSliceLayer : public Layer {
+public:
+  explicit SequenceSliceLayer(const LayerConfig& config) : Layer(config) {}
+
+  bool init(const LayerMap& layerMap,
+            const ParameterMap& parameterMap) override;
+
+  void forward(PassType passType) override;
+  void backward(const UpdateCallback& callback = nullptr) override;
+
+private:
+  // TODO(caoying)
+  // Here selSubSeqIdx is automatically converted from real to int
+  // This is very dangerous if user fill this matrix himself, invalid data
+  // may occur. The selected indices should be stored in CpuSparseMatrix
+  // with SparseValueType set to NO_VALUE.
+  MatrixPtr startIdsOnCpu_;
+  MatrixPtr endIdsOnCpu_;
+
+  std::vector<int> selectedRows_;
+  IVectorPtr rowIndice_;
+  std::vector<std::vector<int>> inputSeqInfoVec_;
+  std::vector<int> outSubSeqStartPos_;
+  std::vector<int> outSeqStartPos_;
+
+  void checkInputs();
+  void copySliceIdsToCpu();
+  void calSelectedRows(const MatrixPtr starts, const MatrixPtr ends);
+};
+
+REGISTER_LAYER(seq_slice, SequenceSliceLayer);
+
+bool SequenceSliceLayer::init(const LayerMap& layerMap,
+                              const ParameterMap& parameterMap) {
+  /* Initialize the basic parent class */
+  Layer::init(layerMap, parameterMap);
+  CHECK_GE(inputLayers_.size(), 2U);
+  CHECK_LE(inputLayers_.size(), 3U);
+
+  setNeedSequenceInfo(false);
+  return true;
+}
+
+void SequenceSliceLayer::checkInputs() {
+  const Argument& inputSeq = getInput(0);
+  CHECK(inputSeq.hasSeq()) << "The first input of sequence slic layer "
+                           << "must be a sequence.";
+  // Check inputs
+  const MatrixPtr indices1 = getInputValue(1);
+  CHECK_EQ(indices1->getHeight(),
+           inputSeq.hasSubseq() ? inputSeq.getNumSubSequences()
+                                : inputSeq.getNumSequences())
+      << "Height of the second input should be equal to number of sequence "
+      << "in the first input.";
+  if (inputLayers_.size() == 3) {
+    const MatrixPtr indices2 = getInputValue(2);
+    CHECK_EQ(indices2->getHeight(), indices1->getHeight())
+        << "start indices and end indices should have the same height.";
+    CHECK_EQ(indices2->getWidth(), indices1->getWidth())
+        << "start indices and end indices should have the same Width.";
+  }
+}
+
+void SequenceSliceLayer::copySliceIdsToCpu() {
+  if (!useGpu_) {
+    if (inputLayers_.size() == 2U) {
+      if (config_.select_first()) {
+        startIdsOnCpu_ = getInputValue(1);
+        endIdsOnCpu_ = nullptr;
+      } else {
+        startIdsOnCpu_ = nullptr;
+        endIdsOnCpu_ = getInputValue(1);
+      }
+    } else if (inputLayers_.size() == 3U) {
+      startIdsOnCpu_ = getInputValue(1);
+      endIdsOnCpu_ = getInputValue(2);
+    }
+    return;
+  }
+
+  const MatrixPtr indices1 = getInputValue(1);
+  if (inputLayers_.size() == 2U) {
+    if (config_.select_first()) {
+      Matrix::resizeOrCreate(startIdsOnCpu_,
+                             indices1->getHeight(),
+                             indices1->getWidth(),
+                             false /* trans */,
+                             false /* useGpu */);
+      startIdsOnCpu_->copyFrom(*indices1);
+      endIdsOnCpu_ = nullptr;
+    } else {
+      Matrix::resizeOrCreate(endIdsOnCpu_,
+                             indices1->getHeight(),
+                             indices1->getWidth(),
+                             false /* trans */,
+                             false /* useGpu */);
+      endIdsOnCpu_->copyFrom(*indices1);
+      startIdsOnCpu_ = nullptr;
+    }
+  } else if (inputLayers_.size() == 3U) {
+    Matrix::resizeOrCreate(startIdsOnCpu_,
+                           indices1->getHeight(),
+                           indices1->getWidth(),
+                           false /* trans */,
+                           false /* useGpu */);
+    startIdsOnCpu_->copyFrom(*indices1);
+
+    const MatrixPtr indices2 = getInputValue(2);
+    Matrix::resizeOrCreate(endIdsOnCpu_,
+                           indices2->getHeight(),
+                           indices2->getWidth(),
+                           false /* trans */,
+                           false /* useGpu */);
+    endIdsOnCpu_->copyFrom(*indices2);
+  }
+}
+
+void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts,
+                                         const MatrixPtr ends) {
+  outSeqStartPos_.resize(1, 0);
+  outSubSeqStartPos_.resize(1, 0);
+  selectedRows_.clear();
+
+  size_t beamSize = starts ? starts->getWidth() : ends->getWidth();
+  // iterate over sequence
+  size_t rowIdx = 0;
+  for (size_t i = 0; i < inputSeqInfoVec_.size(); ++i) {
+    // iterate over sub-sequence in a sequence
+    for (size_t j = 0; j < inputSeqInfoVec_[i].size() - 1; ++j) {
+      // iterate over each index for slicing.
+      for (size_t k = 0; k < beamSize; ++k) {
+        if (starts) {
+          if (starts->getElement(rowIdx, k) == -1.) break;
+        } else if (ends->getElement(rowIdx, k) == -1.)
+          break;
+
+        int begPos = inputSeqInfoVec_[i][j];
+        if (starts) begPos += starts->getElement(rowIdx, k);
+
+        int endPos = inputSeqInfoVec_[i][j + 1] - 1;
+        if (ends) endPos = inputSeqInfoVec_[i][j] + ends->getElement(rowIdx, k);
+
+        int seqLen = endPos - begPos + 1;
+        CHECK(seqLen);
+        for (int m = begPos; m <= endPos; ++m) selectedRows_.push_back(m);
+        inputSeqInfoVec_.size() > 1
+            ? outSubSeqStartPos_.push_back(outSubSeqStartPos_.back() + seqLen)
+            : outSeqStartPos_.push_back(outSeqStartPos_.back() + seqLen);
+      }
+      rowIdx++;
+    }
+    if (inputSeqInfoVec_.size() > 1)
+      outSeqStartPos_.push_back(outSubSeqStartPos_.back());
+  }
+
+  if (useGpu_) {
+    rowIndice_ = IVector::create(selectedRows_.size(), useGpu_);
+    rowIndice_->copyFrom(selectedRows_.data(), selectedRows_.size());
+  } else {
+    rowIndice_ =
+        IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_);
+  }
+
+  // create the sequence information for the output.
+  ICpuGpuVector::resizeOrCreate(
+      output_.sequenceStartPositions, outSeqStartPos_.size(), false);
+  output_.sequenceStartPositions->copyFrom(
+      outSeqStartPos_.data(), outSeqStartPos_.size(), false);
+
+  if (inputSeqInfoVec_.size() > 1) {
+    ICpuGpuVector::resizeOrCreate(
+        output_.subSequenceStartPositions, outSubSeqStartPos_.size(), false);
+    output_.subSequenceStartPositions->copyFrom(
+        outSubSeqStartPos_.data(), outSubSeqStartPos_.size(), false);
+  }
+}
+
+void SequenceSliceLayer::forward(PassType passType) {
+  Layer::forward(passType);
+  checkInputs();
+
+  const Argument& inputSeq = getInput(0);
+  inputSeqInfoVec_.clear();
+  Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions,
+                              inputSeq.subSequenceStartPositions,
+                              inputSeqInfoVec_);
+  copySliceIdsToCpu();
+
+  // calculate the selected row indices in a batch,
+  // and build the output sequence information.
+  calSelectedRows(startIdsOnCpu_ ? startIdsOnCpu_ : nullptr,
+                  endIdsOnCpu_ ? endIdsOnCpu_ : nullptr);
+
+  resetOutput(selectedRows_.size(), getSize());
+
+  getOutputValue()->selectRows(*getInputValue(0), *rowIndice_);
+}
+
+void SequenceSliceLayer::backward(const UpdateCallback& callback) {
+  MatrixPtr inputSeqGrad = getInputGrad(0);
+  MatrixPtr outputGrad = getOutputGrad();
+
+  outputGrad->addToRows(*inputSeqGrad, *rowIndice_);
+}
+
+}  // namespace paddle
--- a/paddle/gserver/layers/SubNestedSequenceLayer.cpp
+++ b/paddle/gserver/layers/SubNestedSequenceLayer.cpp
@@ -52,11 +52,10 @@ private:
   *   ]
   *
   * ths output is saved to private member rowIndice_;
-   * [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
-   *  16,17,18,19,20,21,22,23,24,25,26,27]
+   * [0,1,2,3,4,5,6,7,8,9,15,16,17,18,19,20,21,23,24,25,26,27]
   */

-  void calSelectedCols(const MatrixPtr selectedIndices,
+  void calSelectedRows(const MatrixPtr selectedIndices,
                       const std::vector<std::vector<int>>& inputSeqInfo);

  // if the second input of this layer is on GPU memory, copy it to CPU memory.
@@ -67,7 +66,7 @@ private:
  std::vector<std::vector<int>> inputSeqInfoVec_;

  // the final selected row indices in a batch,
-  // rowIdx_ and selectedRows_ actually share a same memory.
+  // rowIndice_ and selectedRows_ actually share a same memory.
  IVectorPtr rowIndice_;
  std::vector<int> selectedRows_;
 };
@@ -83,7 +82,7 @@ bool SubNestedSequenceLayer::init(const LayerMap& layerMap,
  return true;
 }

-void SubNestedSequenceLayer::calSelectedCols(
+void SubNestedSequenceLayer::calSelectedRows(
    const MatrixPtr selectedIndices,
    const std::vector<std::vector<int>>& inputSeqInfo) {
  selectedRows_.clear();
@@ -96,6 +95,11 @@ void SubNestedSequenceLayer::calSelectedCols(
  for (size_t i = 0; i < seqNum; ++i) {
    for (size_t j = 0; j < beamSize; ++j) {
      if (selectedIndices->getElement(i, j) == -1.) break;
+      // TODO(caoying)
+      // Here selSubSeqIdx is automatically converted from real to int
+      // This is very dangerous if user fill this matrix himself, invalid data
+      // may occur. The selected indices should be stored in
+      // CpuSparseMatrix with SparseValueType set to NO_VALUE.
      int selSubSeqIdx = selectedIndices->getElement(i, j);
      CHECK_GT(inputSeqInfoVec_[i].size() - 1, selSubSeqIdx);

@@ -160,7 +164,7 @@ void SubNestedSequenceLayer::forward(PassType passType) {
  Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions,
                              inputSeq.subSequenceStartPositions,
                              inputSeqInfoVec_);
-  calSelectedCols(selIdsCpu_, inputSeqInfoVec_);
+  calSelectedRows(selIdsCpu_, inputSeqInfoVec_);

  resetOutput(selectedRows_.size(), getSize());
  getOutputValue()->selectRows(*getInputValue(0), *rowIndice_);

--- a/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
+++ b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
@@ -26,9 +26,9 @@ using namespace std;     // NOLINT
 DECLARE_int32(gpu_id);
 DECLARE_bool(thread_local_rand_use_global_seed);

-const int MAX_SEQ_NUM = 5;
-const int MAX_SEQ_LEN = 5;
-const int MAX_BEAM_SIZE = 3;
+const int MAX_SEQ_NUM = 17;
+const int MAX_SEQ_LEN = 23;
+const int MAX_BEAM_SIZE = 13;

 vector<real> randSampling(real range, int n) {
  CHECK_GE(range, n);
@@ -46,8 +46,7 @@ void genSeqInfo(vector<int>& seqStartPos, vector<int>& subSeqStartPos) {
  seqStartPos.resize(1, 0);
  subSeqStartPos.resize(1, 0);

-  // srand((size_t)(time(NULL)));
-  srand(1);
+  srand((size_t)(time(NULL)));
  int seqNum = 1 + (rand() % MAX_SEQ_NUM);
  for (int i = 0; i < seqNum; ++i) {
    int subSeqNum = 1 + (rand() % MAX_SEQ_NUM);
@@ -105,7 +104,7 @@ void genTestData(vector<int>& seqStartPos,
                 vector<vector<real>>& starts,
                 vector<vector<real>>& ends,
                 bool hasSubseq) {
-  size_t beamSize = MAX_BEAM_SIZE;
+  size_t beamSize = 1 + (rand() % MAX_BEAM_SIZE);
  genSeqInfo(seqStartPos, subSeqStartPos);

  genStarts(hasSubseq ? subSeqStartPos : seqStartPos, starts, beamSize);
@@ -167,16 +166,21 @@ void testSeqSliceLayer(bool hasSubseq,
    config.inputDefs.push_back(
        {INPUT_SELF_DEFINE_DATA, "starts", startMatrixPtr});
    config.layerConfig.add_inputs();
+    config.layerConfig.set_select_first(true);
  }

  // add end indices
  if (ends.size()) {
    vector<real> endsToVec;
    flatten2dVector(ends, endsToVec);
+
    MatrixPtr endMatrixPtr =
        Matrix::create(ends.size(), ends[0].size(), false, false);
+    endMatrixPtr->copyFrom(endsToVec.data(), endsToVec.size());
+
    config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, "ends", endMatrixPtr});
    config.layerConfig.add_inputs();
+    config.layerConfig.set_select_first(false);
  }

  testLayerGrad(config, "seq_slice", /*batchSize*/ 100, false, useGpu, false);
@@ -188,10 +192,15 @@ TEST(Layer, SeqSliceLayer) {
  vector<vector<real>> starts;
  vector<vector<real>> ends;

+  std::vector<bool> mode = {false};
+#ifndef PADDLE_ONLY_CPU
+  mode.push_back(true);
+#endif
  genSeqInfo(seqStartPos, subSeqStartPos);
-  for (bool hasSubseq : {false, true}) {
+  for (bool hasSubseq : {true, false}) {
+    LOG(INFO) << "hasSubSeq : " << hasSubseq;
    genTestData(seqStartPos, subSeqStartPos, starts, ends, hasSubseq);
-    for (bool useGpu : {false, true}) {
+    for (bool useGpu : mode) {
      vector<vector<real>> tmp;
      testSeqSliceLayer(
          hasSubseq, useGpu, seqStartPos, subSeqStartPos, tmp, ends);

--- a/paddle/parameter/Argument.cpp
+++ b/paddle/parameter/Argument.cpp
@@ -670,19 +670,28 @@ void Argument::reorganizeSeqInfo(
    const ICpuGpuVectorPtr seqStartPos,
    const ICpuGpuVectorPtr subSeqStartPos,
    std::vector<std::vector<int>>& reorganizedSeqInfo) {
-  int* seqStarts = seqStartPos->getMutableData(false);
-  int* subSeqStarts = subSeqStartPos->getMutableData(false);
+  CHECK(seqStartPos);

  int seqNum = seqStartPos->getSize() - 1;
-  reorganizedSeqInfo.resize(seqNum, std::vector<int>());
-  int seqIdx = 0;
-  for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) {
-    reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]);
-    if (subSeqStarts[i] == seqStarts[seqIdx + 1]) {
-      seqIdx++;
-      if (seqIdx == seqNum) return;
+  int* seqStarts = seqStartPos->getMutableData(false);
+
+  if (subSeqStartPos) {
+    int* subSeqStarts = subSeqStartPos->getMutableData(false);
+    reorganizedSeqInfo.resize(seqNum, std::vector<int>());
+    int seqIdx = 0;
+    for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) {
      reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]);
+      if (subSeqStarts[i] == seqStarts[seqIdx + 1]) {
+        seqIdx++;
+        if (seqIdx == seqNum) return;
+        reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]);
+      }
    }
+  } else {
+    reorganizedSeqInfo.resize(1, std::vector<int>(seqNum + 1, 0));
+    memcpy(reorganizedSeqInfo[0].data(),
+           seqStarts,
+           sizeof(int) * seqStartPos->getSize());
  }
 }