diff --git a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp index 3b5060e3ce980a255321c35a61be10f928b090a3..d5407555b248d79a5156a5ea354042d43ecda02c 100644 --- a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp +++ b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp @@ -80,13 +80,14 @@ void KmaxSeqScoreLayer::forward(PassType passType) { << "input of " << getName() << " must be a sequence or a nested sequence."; CHECK_EQ(input.value->getWidth(), 1UL) - << "input of " << getName() - << " is score over a sequence or a nested sequence, so its width " - << " must be 1."; + << "input of " << getName() << " are scores over a sequence or " + << "a nested sequence, so its width must be 1."; if (useGpu_) { - // this Layer runs only in CPU, if the model is runing on GPU, - // then copy the input to this layer from GPU to CPU. + /* + * currently, this Layer only runs in CPU, if the other part of the model is + * runing on GPU, then copy the input to this layer from GPU to CPU. + */ Matrix::resizeOrCreate(scores_, inputScore->getHeight(), 1, @@ -97,13 +98,14 @@ void KmaxSeqScoreLayer::forward(PassType passType) { scores_ = inputScore; } - // TODO(caoying) - // In PaddlePaddle, the currently available matrixes all a have real-typed - // data field, but the selected indices information are actually int-typed - // (with -1 as a special token). Storing indices information in real-typed - // Matrix leads to converting real to int. This is very dangerous if a user - // fills this matrix himself, invalid data may occur. - // The selected indices should be stored in an int-typed matrix. + /* + * TODO(caoying) + * In PaddePaddle, currently all matrices are real number types, + * but output of this layer which is some selected indices of the give + * sequence are actually filled with int types so that storing int types + * information in a real number matrix is dangerous, since real numbers will + * be convered to int types. + */ Matrix::resizeOrCreate( output_.value, input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(), diff --git a/paddle/gserver/layers/SequenceSliceLayer.cpp b/paddle/gserver/layers/SequenceSliceLayer.cpp index 165ee6311a62d54822cedfa2672843da569af97d..4da65ade0b423a9bc005609e9f26e9160fdde993 100644 --- a/paddle/gserver/layers/SequenceSliceLayer.cpp +++ b/paddle/gserver/layers/SequenceSliceLayer.cpp @@ -31,13 +31,15 @@ public: void backward(const UpdateCallback& callback = nullptr) override; private: - // TODO(caoying) - // In PaddlePaddle, the currently available matrixes all a have real-typed - // data field, but the selected indices information are actually int-typed - // (with -1 as a special token). Storing indices information in real-typed - // Matrix leads to converting real to int. This is very dangerous if a user - // fills this matrix himself, invalid data may occur. - // The selected indices should be stored in an int-typed matrix. + /* + * TODO(caoying) + * In PaddePaddle, currently all matrices are real number types, + * but the second and the (optional) third input which are some + * selected indices of the give sequence to trim the sequence, are actually + * filled with int types so that storing int types information in real number + * matrices is very dangerous, since real numbers will be convered to int + * types. If a user fills this matrix himself, invalid data may occor. + */ MatrixPtr startIdsOnCpu_; MatrixPtr endIdsOnCpu_; @@ -68,7 +70,7 @@ bool SequenceSliceLayer::init(const LayerMap& layerMap, void SequenceSliceLayer::checkInputs() { const Argument& inputSeq = getInput(0); - CHECK(inputSeq.hasSeq()) << "The first input of sequence slic layer " + CHECK(inputSeq.hasSeq()) << "The first input of sequence slice layer " << "must be a sequence."; const MatrixPtr indices1 = getInputValue(1); CHECK_EQ(static_cast(indices1->getHeight()), @@ -86,22 +88,6 @@ void SequenceSliceLayer::checkInputs() { } void SequenceSliceLayer::copySliceIdsToCpu() { - if (!useGpu_) { - if (inputLayers_.size() == 2U) { - if (config_.select_first()) { - startIdsOnCpu_ = getInputValue(1); - endIdsOnCpu_ = nullptr; - } else { - startIdsOnCpu_ = nullptr; - endIdsOnCpu_ = getInputValue(1); - } - } else if (inputLayers_.size() == 3U) { - startIdsOnCpu_ = getInputValue(1); - endIdsOnCpu_ = getInputValue(2); - } - return; - } - const MatrixPtr indices1 = getInputValue(1); if (inputLayers_.size() == 2U) { if (config_.select_first()) { @@ -141,22 +127,19 @@ void SequenceSliceLayer::copySliceIdsToCpu() { void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts, const MatrixPtr ends) { + CHECK(starts && ends); + outSeqStartPos_.resize(1, 0); outSubSeqStartPos_.resize(1, 0); selectedRows_.clear(); size_t beamSize = starts ? starts->getWidth() : ends->getWidth(); - // iterate over sequence size_t rowIdx = 0; for (size_t i = 0; i < inputSeqInfoVec_.size(); ++i) { - // iterate over sub-sequence in a sequence for (size_t j = 0; j < inputSeqInfoVec_[i].size() - 1; ++j) { - // iterate over each index for slicing. for (size_t k = 0; k < beamSize; ++k) { - if (starts) { - if (starts->getElement(rowIdx, k) == -1.) break; - } else if (ends->getElement(rowIdx, k) == -1.) - break; + if (starts && starts->getElement(rowIdx, k) == -1.) break; + if (ends && ends->getElement(rowIdx, k) == -1.) break; int begPos = inputSeqInfoVec_[i][j]; if (starts) begPos += starts->getElement(rowIdx, k); @@ -165,7 +148,7 @@ void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts, if (ends) endPos = inputSeqInfoVec_[i][j] + ends->getElement(rowIdx, k); int seqLen = endPos - begPos + 1; - CHECK(seqLen); + CHECK_LT(seqLen, 0U); for (int m = begPos; m <= endPos; ++m) selectedRows_.push_back(m); inputSeqInfoVec_.size() > 1 ? outSubSeqStartPos_.push_back(outSubSeqStartPos_.back() + seqLen) @@ -208,7 +191,16 @@ void SequenceSliceLayer::forward(PassType passType) { Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions, inputSeq.subSequenceStartPositions, inputSeqInfoVec_); - copySliceIdsToCpu(); + if (!useGpu_) { + if (inputLayers_.size() == 2U) { + startIdsOnCpu_ = config_.select_first() ? getInputValue(1) : nullptr; + endIdsOnCpu_ = config_.select_first() ? nullptr : getInputValue(1); + } else if (inputLayers_.size() == 3U) { + startIdsOnCpu_ = getInputValue(1); + endIdsOnCpu_ = getInputValue(2); + } + } else + copySliceIdsToCpu(); // calculate the selected row indices in a batch, // and build the output sequence information. @@ -221,10 +213,7 @@ void SequenceSliceLayer::forward(PassType passType) { } void SequenceSliceLayer::backward(const UpdateCallback& callback) { - MatrixPtr inputSeqGrad = getInputGrad(0); - MatrixPtr outputGrad = getOutputGrad(); - - outputGrad->addToRows(*inputSeqGrad, *rowIndice_); + getOutputGrad()->addToRows(*getInputGrad(0), *rowIndice_); } } // namespace paddle diff --git a/paddle/gserver/layers/SubNestedSequenceLayer.cpp b/paddle/gserver/layers/SubNestedSequenceLayer.cpp index c8607d50f58789afa1ae5168ec5b4113863c58c1..e9bee77212065effdac78cba590caed2e9155f0a 100644 --- a/paddle/gserver/layers/SubNestedSequenceLayer.cpp +++ b/paddle/gserver/layers/SubNestedSequenceLayer.cpp @@ -58,23 +58,28 @@ private: void calSelectedRows(const MatrixPtr selectedIndices, const std::vector>& inputSeqInfo); - // if the second input of this layer is on GPU memory, copy it to CPU memory. - // TODO(caoying) - // In PaddlePaddle, the currently available matrixes all a have real-typed - // data field, but the selected indices information are actually int-typed - // (with -1 as a special token). Storing indices information in real-typed - // Matrix leads to converting real to int. This is very dangerous if a user - // fills this matrix himself, invalid data may occur. - // The selected indices should be stored in an int-typed matrix. + /* + * TODO(caoying) + * In PaddePaddle, currently all matrices are real number types, + * but the second is some selected indices of the give sequence to trim + * the nested sequence, are actually filled with int types so that storing + * int types information in real number matrices is very dangerous, since + * real numbers will be convered to int types. If a user fills this matrix + * himself, invalid data may occor. + * + * if the second input of this layer is on GPU memory, copy it to CPU memory. + */ MatrixPtr selIdsCpu_; - // reorganized sequenceStartPositions and subSequenceStartPositions - // into a 2d vector to facilitate the sequence selection process. + /* + * reorganize sequenceStartPositions and subSequenceStartPositions + * into a 2d vector to facilitate the sequence selection process. + */ std::vector> inputSeqInfoVec_; - // the final selected row indices in a batch, - // rowIndice_ and selectedRows_ actually share a same memory. + /* store the final selected row indices in a batch */ IVectorPtr rowIndice_; + /* rowIndice_ and selectedRows_ actually share a same memory. */ std::vector selectedRows_; }; diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index af14007de603ce91deebf590b0defc3f65658755..2fcccc69482cf79168e930a3b7e6c375f9297d76 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2717,10 +2717,7 @@ class SeqSliceLayer(LayerBase): 'If start and end indices are both given to' 'sequence slice layer, they should have the same width.') elif len(inputs) == 2: - if starts is not None: - self.config.select_first = True - else: - self.config.select_first = False + self.config.select_first = (starts is not None) @config_layer('sub_nested_seq')