From 26bc5b12596c945956f7a6b003712805e579a36d Mon Sep 17 00:00:00 2001 From: caoying03 Date: Tue, 8 Aug 2017 18:48:11 +0800 Subject: [PATCH] add implementations. --- paddle/gserver/layers/KmaxSeqScoreLayer.cpp | 5 + paddle/gserver/layers/SequenceSliceLayer.cpp | 228 ++++++++++++++++++ .../gserver/layers/SubNestedSequenceLayer.cpp | 16 +- .../gserver/tests/test_SeqSliceLayerGrad.cpp | 25 +- paddle/parameter/Argument.cpp | 27 ++- 5 files changed, 278 insertions(+), 23 deletions(-) create mode 100644 paddle/gserver/layers/SequenceSliceLayer.cpp diff --git a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp index 8ce591d4762..e96fd61fc1e 100644 --- a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp +++ b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp @@ -97,6 +97,11 @@ void KmaxSeqScoreLayer::forward(PassType passType) { scores_ = inputScore; } + // TODO(caoying) + // Here selSubSeqIdx is automatically converted from real to int + // This is very dangerous if user fill this matrix himself, invalid data may + // occur. The selected indices should be stored in + // CpuSparseMatrix with SparseValueType set to NO_VALUE. Matrix::resizeOrCreate( output_.value, input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(), diff --git a/paddle/gserver/layers/SequenceSliceLayer.cpp b/paddle/gserver/layers/SequenceSliceLayer.cpp new file mode 100644 index 00000000000..410aba663e0 --- /dev/null +++ b/paddle/gserver/layers/SequenceSliceLayer.cpp @@ -0,0 +1,228 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Layer.h" +#include "paddle/math/Matrix.h" +#include "paddle/math/Vector.h" +#include "paddle/utils/Logging.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +class SequenceSliceLayer : public Layer { +public: + explicit SequenceSliceLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; + +private: + // TODO(caoying) + // Here selSubSeqIdx is automatically converted from real to int + // This is very dangerous if user fill this matrix himself, invalid data + // may occur. The selected indices should be stored in CpuSparseMatrix + // with SparseValueType set to NO_VALUE. + MatrixPtr startIdsOnCpu_; + MatrixPtr endIdsOnCpu_; + + std::vector selectedRows_; + IVectorPtr rowIndice_; + std::vector> inputSeqInfoVec_; + std::vector outSubSeqStartPos_; + std::vector outSeqStartPos_; + + void checkInputs(); + void copySliceIdsToCpu(); + void calSelectedRows(const MatrixPtr starts, const MatrixPtr ends); +}; + +REGISTER_LAYER(seq_slice, SequenceSliceLayer); + +bool SequenceSliceLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + /* Initialize the basic parent class */ + Layer::init(layerMap, parameterMap); + CHECK_GE(inputLayers_.size(), 2U); + CHECK_LE(inputLayers_.size(), 3U); + + setNeedSequenceInfo(false); + return true; +} + +void SequenceSliceLayer::checkInputs() { + const Argument& inputSeq = getInput(0); + CHECK(inputSeq.hasSeq()) << "The first input of sequence slic layer " + << "must be a sequence."; + // Check inputs + const MatrixPtr indices1 = getInputValue(1); + CHECK_EQ(indices1->getHeight(), + inputSeq.hasSubseq() ? inputSeq.getNumSubSequences() + : inputSeq.getNumSequences()) + << "Height of the second input should be equal to number of sequence " + << "in the first input."; + if (inputLayers_.size() == 3) { + const MatrixPtr indices2 = getInputValue(2); + CHECK_EQ(indices2->getHeight(), indices1->getHeight()) + << "start indices and end indices should have the same height."; + CHECK_EQ(indices2->getWidth(), indices1->getWidth()) + << "start indices and end indices should have the same Width."; + } +} + +void SequenceSliceLayer::copySliceIdsToCpu() { + if (!useGpu_) { + if (inputLayers_.size() == 2U) { + if (config_.select_first()) { + startIdsOnCpu_ = getInputValue(1); + endIdsOnCpu_ = nullptr; + } else { + startIdsOnCpu_ = nullptr; + endIdsOnCpu_ = getInputValue(1); + } + } else if (inputLayers_.size() == 3U) { + startIdsOnCpu_ = getInputValue(1); + endIdsOnCpu_ = getInputValue(2); + } + return; + } + + const MatrixPtr indices1 = getInputValue(1); + if (inputLayers_.size() == 2U) { + if (config_.select_first()) { + Matrix::resizeOrCreate(startIdsOnCpu_, + indices1->getHeight(), + indices1->getWidth(), + false /* trans */, + false /* useGpu */); + startIdsOnCpu_->copyFrom(*indices1); + endIdsOnCpu_ = nullptr; + } else { + Matrix::resizeOrCreate(endIdsOnCpu_, + indices1->getHeight(), + indices1->getWidth(), + false /* trans */, + false /* useGpu */); + endIdsOnCpu_->copyFrom(*indices1); + startIdsOnCpu_ = nullptr; + } + } else if (inputLayers_.size() == 3U) { + Matrix::resizeOrCreate(startIdsOnCpu_, + indices1->getHeight(), + indices1->getWidth(), + false /* trans */, + false /* useGpu */); + startIdsOnCpu_->copyFrom(*indices1); + + const MatrixPtr indices2 = getInputValue(2); + Matrix::resizeOrCreate(endIdsOnCpu_, + indices2->getHeight(), + indices2->getWidth(), + false /* trans */, + false /* useGpu */); + endIdsOnCpu_->copyFrom(*indices2); + } +} + +void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts, + const MatrixPtr ends) { + outSeqStartPos_.resize(1, 0); + outSubSeqStartPos_.resize(1, 0); + selectedRows_.clear(); + + size_t beamSize = starts ? starts->getWidth() : ends->getWidth(); + // iterate over sequence + size_t rowIdx = 0; + for (size_t i = 0; i < inputSeqInfoVec_.size(); ++i) { + // iterate over sub-sequence in a sequence + for (size_t j = 0; j < inputSeqInfoVec_[i].size() - 1; ++j) { + // iterate over each index for slicing. + for (size_t k = 0; k < beamSize; ++k) { + if (starts) { + if (starts->getElement(rowIdx, k) == -1.) break; + } else if (ends->getElement(rowIdx, k) == -1.) + break; + + int begPos = inputSeqInfoVec_[i][j]; + if (starts) begPos += starts->getElement(rowIdx, k); + + int endPos = inputSeqInfoVec_[i][j + 1] - 1; + if (ends) endPos = inputSeqInfoVec_[i][j] + ends->getElement(rowIdx, k); + + int seqLen = endPos - begPos + 1; + CHECK(seqLen); + for (int m = begPos; m <= endPos; ++m) selectedRows_.push_back(m); + inputSeqInfoVec_.size() > 1 + ? outSubSeqStartPos_.push_back(outSubSeqStartPos_.back() + seqLen) + : outSeqStartPos_.push_back(outSeqStartPos_.back() + seqLen); + } + rowIdx++; + } + if (inputSeqInfoVec_.size() > 1) + outSeqStartPos_.push_back(outSubSeqStartPos_.back()); + } + + if (useGpu_) { + rowIndice_ = IVector::create(selectedRows_.size(), useGpu_); + rowIndice_->copyFrom(selectedRows_.data(), selectedRows_.size()); + } else { + rowIndice_ = + IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_); + } + + // create the sequence information for the output. + ICpuGpuVector::resizeOrCreate( + output_.sequenceStartPositions, outSeqStartPos_.size(), false); + output_.sequenceStartPositions->copyFrom( + outSeqStartPos_.data(), outSeqStartPos_.size(), false); + + if (inputSeqInfoVec_.size() > 1) { + ICpuGpuVector::resizeOrCreate( + output_.subSequenceStartPositions, outSubSeqStartPos_.size(), false); + output_.subSequenceStartPositions->copyFrom( + outSubSeqStartPos_.data(), outSubSeqStartPos_.size(), false); + } +} + +void SequenceSliceLayer::forward(PassType passType) { + Layer::forward(passType); + checkInputs(); + + const Argument& inputSeq = getInput(0); + inputSeqInfoVec_.clear(); + Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions, + inputSeq.subSequenceStartPositions, + inputSeqInfoVec_); + copySliceIdsToCpu(); + + // calculate the selected row indices in a batch, + // and build the output sequence information. + calSelectedRows(startIdsOnCpu_ ? startIdsOnCpu_ : nullptr, + endIdsOnCpu_ ? endIdsOnCpu_ : nullptr); + + resetOutput(selectedRows_.size(), getSize()); + + getOutputValue()->selectRows(*getInputValue(0), *rowIndice_); +} + +void SequenceSliceLayer::backward(const UpdateCallback& callback) { + MatrixPtr inputSeqGrad = getInputGrad(0); + MatrixPtr outputGrad = getOutputGrad(); + + outputGrad->addToRows(*inputSeqGrad, *rowIndice_); +} + +} // namespace paddle diff --git a/paddle/gserver/layers/SubNestedSequenceLayer.cpp b/paddle/gserver/layers/SubNestedSequenceLayer.cpp index 76f587fff76..0db03002702 100644 --- a/paddle/gserver/layers/SubNestedSequenceLayer.cpp +++ b/paddle/gserver/layers/SubNestedSequenceLayer.cpp @@ -52,11 +52,10 @@ private: * ] * * ths output is saved to private member rowIndice_; - * [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, - * 16,17,18,19,20,21,22,23,24,25,26,27] + * [0,1,2,3,4,5,6,7,8,9,15,16,17,18,19,20,21,23,24,25,26,27] */ - void calSelectedCols(const MatrixPtr selectedIndices, + void calSelectedRows(const MatrixPtr selectedIndices, const std::vector>& inputSeqInfo); // if the second input of this layer is on GPU memory, copy it to CPU memory. @@ -67,7 +66,7 @@ private: std::vector> inputSeqInfoVec_; // the final selected row indices in a batch, - // rowIdx_ and selectedRows_ actually share a same memory. + // rowIndice_ and selectedRows_ actually share a same memory. IVectorPtr rowIndice_; std::vector selectedRows_; }; @@ -83,7 +82,7 @@ bool SubNestedSequenceLayer::init(const LayerMap& layerMap, return true; } -void SubNestedSequenceLayer::calSelectedCols( +void SubNestedSequenceLayer::calSelectedRows( const MatrixPtr selectedIndices, const std::vector>& inputSeqInfo) { selectedRows_.clear(); @@ -96,6 +95,11 @@ void SubNestedSequenceLayer::calSelectedCols( for (size_t i = 0; i < seqNum; ++i) { for (size_t j = 0; j < beamSize; ++j) { if (selectedIndices->getElement(i, j) == -1.) break; + // TODO(caoying) + // Here selSubSeqIdx is automatically converted from real to int + // This is very dangerous if user fill this matrix himself, invalid data + // may occur. The selected indices should be stored in + // CpuSparseMatrix with SparseValueType set to NO_VALUE. int selSubSeqIdx = selectedIndices->getElement(i, j); CHECK_GT(inputSeqInfoVec_[i].size() - 1, selSubSeqIdx); @@ -160,7 +164,7 @@ void SubNestedSequenceLayer::forward(PassType passType) { Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions, inputSeq.subSequenceStartPositions, inputSeqInfoVec_); - calSelectedCols(selIdsCpu_, inputSeqInfoVec_); + calSelectedRows(selIdsCpu_, inputSeqInfoVec_); resetOutput(selectedRows_.size(), getSize()); getOutputValue()->selectRows(*getInputValue(0), *rowIndice_); diff --git a/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp index e456dd5db7f..d560ca650bc 100644 --- a/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp +++ b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp @@ -26,9 +26,9 @@ using namespace std; // NOLINT DECLARE_int32(gpu_id); DECLARE_bool(thread_local_rand_use_global_seed); -const int MAX_SEQ_NUM = 5; -const int MAX_SEQ_LEN = 5; -const int MAX_BEAM_SIZE = 3; +const int MAX_SEQ_NUM = 17; +const int MAX_SEQ_LEN = 23; +const int MAX_BEAM_SIZE = 13; vector randSampling(real range, int n) { CHECK_GE(range, n); @@ -46,8 +46,7 @@ void genSeqInfo(vector& seqStartPos, vector& subSeqStartPos) { seqStartPos.resize(1, 0); subSeqStartPos.resize(1, 0); - // srand((size_t)(time(NULL))); - srand(1); + srand((size_t)(time(NULL))); int seqNum = 1 + (rand() % MAX_SEQ_NUM); for (int i = 0; i < seqNum; ++i) { int subSeqNum = 1 + (rand() % MAX_SEQ_NUM); @@ -105,7 +104,7 @@ void genTestData(vector& seqStartPos, vector>& starts, vector>& ends, bool hasSubseq) { - size_t beamSize = MAX_BEAM_SIZE; + size_t beamSize = 1 + (rand() % MAX_BEAM_SIZE); genSeqInfo(seqStartPos, subSeqStartPos); genStarts(hasSubseq ? subSeqStartPos : seqStartPos, starts, beamSize); @@ -167,16 +166,21 @@ void testSeqSliceLayer(bool hasSubseq, config.inputDefs.push_back( {INPUT_SELF_DEFINE_DATA, "starts", startMatrixPtr}); config.layerConfig.add_inputs(); + config.layerConfig.set_select_first(true); } // add end indices if (ends.size()) { vector endsToVec; flatten2dVector(ends, endsToVec); + MatrixPtr endMatrixPtr = Matrix::create(ends.size(), ends[0].size(), false, false); + endMatrixPtr->copyFrom(endsToVec.data(), endsToVec.size()); + config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, "ends", endMatrixPtr}); config.layerConfig.add_inputs(); + config.layerConfig.set_select_first(false); } testLayerGrad(config, "seq_slice", /*batchSize*/ 100, false, useGpu, false); @@ -188,10 +192,15 @@ TEST(Layer, SeqSliceLayer) { vector> starts; vector> ends; + std::vector mode = {false}; +#ifndef PADDLE_ONLY_CPU + mode.push_back(true); +#endif genSeqInfo(seqStartPos, subSeqStartPos); - for (bool hasSubseq : {false, true}) { + for (bool hasSubseq : {true, false}) { + LOG(INFO) << "hasSubSeq : " << hasSubseq; genTestData(seqStartPos, subSeqStartPos, starts, ends, hasSubseq); - for (bool useGpu : {false, true}) { + for (bool useGpu : mode) { vector> tmp; testSeqSliceLayer( hasSubseq, useGpu, seqStartPos, subSeqStartPos, tmp, ends); diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index 0547ac93cd1..06f7e5245fb 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -670,19 +670,28 @@ void Argument::reorganizeSeqInfo( const ICpuGpuVectorPtr seqStartPos, const ICpuGpuVectorPtr subSeqStartPos, std::vector>& reorganizedSeqInfo) { - int* seqStarts = seqStartPos->getMutableData(false); - int* subSeqStarts = subSeqStartPos->getMutableData(false); + CHECK(seqStartPos); int seqNum = seqStartPos->getSize() - 1; - reorganizedSeqInfo.resize(seqNum, std::vector()); - int seqIdx = 0; - for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) { - reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]); - if (subSeqStarts[i] == seqStarts[seqIdx + 1]) { - seqIdx++; - if (seqIdx == seqNum) return; + int* seqStarts = seqStartPos->getMutableData(false); + + if (subSeqStartPos) { + int* subSeqStarts = subSeqStartPos->getMutableData(false); + reorganizedSeqInfo.resize(seqNum, std::vector()); + int seqIdx = 0; + for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) { reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]); + if (subSeqStarts[i] == seqStarts[seqIdx + 1]) { + seqIdx++; + if (seqIdx == seqNum) return; + reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]); + } } + } else { + reorganizedSeqInfo.resize(1, std::vector(seqNum + 1, 0)); + memcpy(reorganizedSeqInfo[0].data(), + seqStarts, + sizeof(int) * seqStartPos->getSize()); } } -- GitLab