SubNestedSequenceLayer.cpp 6.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "Layer.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/Vector.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"

namespace paddle {

class SubNestedSequenceLayer : public Layer {
public:
  explicit SubNestedSequenceLayer(const LayerConfig& config) : Layer(config) {}

  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;

  void forward(PassType passType) override;
  void backward(const UpdateCallback& callback = nullptr) override;

private:
C
caoying03 已提交
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
  /*
   * This functions generates the indices of rows in a batch according to the
   * indices of selected sub-sequence in each sequence.
   *
   * Examples:
   * selectedIndices:
   *   [
   *     [0, 1, -1],
   *     [0, 1, 2],
   *     [0, -1, -1],
   *     [0, 2, 3],
   *   ]
   * inputSeqInfo:
   *   [
   *     [0,3,4],
   *     [4,5,7,10,15],
   *     [15,20],
   *     [20,22,23,25,28]
   *   ]
   *
   * ths output is saved to private member rowIndice_;
C
caoying03 已提交
55
   * [0,1,2,3,4,5,6,7,8,9,15,16,17,18,19,20,21,23,24,25,26,27]
C
caoying03 已提交
56
   */
57

C
caoying03 已提交
58
  void calSelectedRows(const MatrixPtr selectedIndices,
C
caoying03 已提交
59
                       const std::vector<std::vector<int>>& inputSeqInfo);
60

C
caoying03 已提交
61 62 63 64 65 66 67 68 69 70 71
  /*
   * TODO(caoying)
   * In PaddePaddle, currently all matrices are real number types,
   * but the second is some selected indices of the give sequence to trim
   * the nested sequence, are actually filled with int types so that storing
   * int types information in real number matrices is very dangerous, since
   * real numbers will be convered to int types. If a user fills this matrix
   * himself, invalid data may occor.
   *
   * if the second input of this layer is on GPU memory, copy it to CPU memory.
   */
72
  MatrixPtr selIdsCpu_;
C
caoying03 已提交
73

C
caoying03 已提交
74 75 76 77
  /*
   * reorganize sequenceStartPositions and subSequenceStartPositions
   * into a 2d vector to facilitate the sequence selection process.
   */
C
caoying03 已提交
78
  std::vector<std::vector<int>> inputSeqInfoVec_;
79

C
caoying03 已提交
80
  /* store the final selected row indices in a batch */
81
  IVectorPtr rowIndice_;
C
caoying03 已提交
82
  /* rowIndice_ and selectedRows_ actually share a same memory. */
83 84 85 86 87 88 89 90 91 92 93 94 95 96
  std::vector<int> selectedRows_;
};

REGISTER_LAYER(sub_nested_seq, SubNestedSequenceLayer);

bool SubNestedSequenceLayer::init(const LayerMap& layerMap,
                                  const ParameterMap& parameterMap) {
  /* Initialize the basic parent class */
  Layer::init(layerMap, parameterMap);
  CHECK_EQ(2U, inputLayers_.size());
  setNeedSequenceInfo(false);
  return true;
}

C
caoying03 已提交
97
void SubNestedSequenceLayer::calSelectedRows(
98
    const MatrixPtr selectedIndices,
C
caoying03 已提交
99
    const std::vector<std::vector<int>>& inputSeqInfo) {
100
  selectedRows_.clear();
C
caoying03 已提交
101 102 103

  std::vector<int> outSeqStartInfo(1, 0);
  std::vector<int> outSubSeqStartInfo(1, 0);
104 105 106 107 108 109

  size_t seqNum = selectedIndices->getHeight();
  size_t beamSize = selectedIndices->getWidth();
  for (size_t i = 0; i < seqNum; ++i) {
    for (size_t j = 0; j < beamSize; ++j) {
      if (selectedIndices->getElement(i, j) == -1.) break;
Y
Yi Wang 已提交
110
      size_t selSubSeqIdx = selectedIndices->getElement(i, j);
C
caoying03 已提交
111
      CHECK_GT(inputSeqInfoVec_[i].size() - 1, selSubSeqIdx);
112

C
caoying03 已提交
113 114
      size_t subSeqLen = inputSeqInfoVec_[i][selSubSeqIdx + 1] -
                         inputSeqInfoVec_[i][selSubSeqIdx];
115
      for (size_t k = 0; k < subSeqLen; ++k)
C
caoying03 已提交
116 117
        selectedRows_.push_back(inputSeqInfoVec_[i][selSubSeqIdx] + k);
      outSubSeqStartInfo.push_back(outSubSeqStartInfo.back() + subSeqLen);
118
    }
C
caoying03 已提交
119
    outSeqStartInfo.push_back(outSubSeqStartInfo.back());
120
  }
121

C
caoying03 已提交
122 123 124 125 126 127 128
  if (useGpu_) {
    rowIndice_ = IVector::create(selectedRows_.size(), useGpu_);
    rowIndice_->copyFrom(selectedRows_.data(), selectedRows_.size());
  } else {
    rowIndice_ =
        IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_);
  }
129

C
caoying03 已提交
130
  // create the sequence information for the output.
131
  ICpuGpuVector::resizeOrCreate(
C
caoying03 已提交
132 133 134
      output_.sequenceStartPositions, outSeqStartInfo.size(), false);
  output_.sequenceStartPositions->copyFrom(
      outSeqStartInfo.data(), outSeqStartInfo.size(), false);
135 136

  ICpuGpuVector::resizeOrCreate(
C
caoying03 已提交
137 138 139
      output_.subSequenceStartPositions, outSubSeqStartInfo.size(), false);
  output_.subSequenceStartPositions->copyFrom(
      outSubSeqStartInfo.data(), outSubSeqStartInfo.size(), false);
140 141 142 143
}

void SubNestedSequenceLayer::forward(PassType passType) {
  Layer::forward(passType);
C
caoying03 已提交
144

145
  const Argument& inputSeq = getInput(0);
C
caoying03 已提交
146 147
  CHECK(inputSeq.hasSubseq()) << "The first input of SubNestSequence layer "
                              << "must be a nested sequence.";
148
  const MatrixPtr selectedIndices = getInputValue(1);
149
  CHECK_EQ(size_t(inputSeq.getNumSequences()), selectedIndices->getHeight());
150 151 152

  if (dynamic_cast<GpuMatrix*>(selectedIndices.get())) {
    /*
C
caoying03 已提交
153
     * Currently, the second input for this layer is generated by
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
     * kmax_sequence_score_layer whose output is always stored on CPU,
     * or a data_layer which canbe on GPU.
     *
     * If the second input is on GPU, copy it to CPU memory, because this
     * input always uses very few memory, and operations related to it are
     * all logic control, not computations.
     */
    Matrix::resizeOrCreate(selIdsCpu_,
                           selectedIndices->getHeight(),
                           selectedIndices->getWidth(),
                           false /* trans */,
                           false /* useGpu */);
    selIdsCpu_->copyFrom(*selectedIndices);
  } else {
    selIdsCpu_ = selectedIndices;
  }
170

C
caoying03 已提交
171 172 173
  Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions,
                              inputSeq.subSequenceStartPositions,
                              inputSeqInfoVec_);
C
caoying03 已提交
174
  calSelectedRows(selIdsCpu_, inputSeqInfoVec_);
175

C
caoying03 已提交
176
  resetOutput(selectedRows_.size(), getSize());
177 178 179 180
  getOutputValue()->selectRows(*getInputValue(0), *rowIndice_);
}

void SubNestedSequenceLayer::backward(const UpdateCallback& callback) {
C
caoying03 已提交
181
  MatrixPtr inputSeqGrad = getInputGrad(0);
182 183
  MatrixPtr outputGrad = getOutputGrad();

C
caoying03 已提交
184
  if (inputSeqGrad) outputGrad->addToRows(*inputSeqGrad, *rowIndice_);
185 186 187
}

}  // namespace paddle