diff --git a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp index 7ac087663a6c64e4570bd66c3880d1c19b85719f..c70c2b74211814559b8982aae94eed92258444e8 100644 --- a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp +++ b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp @@ -72,7 +72,8 @@ bool SequenceLastInstanceLayer::init(const LayerMap& layerMap, void SequenceLastInstanceLayer::forward(PassType passType) { SequencePoolLayer::forward(passType); - const int* starts = startPositions_->getData(false); + auto starts = (stride_ > 0) ? stridePositions_->getData() + : startPositions_->getData(false); MatrixPtr inputValue = getInputValue(0); MatrixPtr outputValue = getOutputValue(); @@ -82,10 +83,7 @@ void SequenceLastInstanceLayer::forward(PassType passType) { insId_.clear(); for (size_t seqId = 0; seqId < newBatchSize_; ++seqId) { - int insId = (stride_ > 0) - ? (select_first_ ? stridePositions_[seqId] - : stridePositions_[seqId + 1] - 1) - : (select_first_ ? starts[seqId] : starts[seqId + 1] - 1); + int insId = select_first_ ? starts[seqId] : starts[seqId + 1] - 1; insId_.push_back(insId); outputValue->subMatrix(seqId, 1, tmpDest_) diff --git a/paddle/gserver/layers/SequencePoolLayer.cpp b/paddle/gserver/layers/SequencePoolLayer.cpp index 2bf180a043b98fc3a42b4612dd4eef65ae42b5db..f853905103a0e5814d84b04a6dd0eb5ca6beb8d8 100644 --- a/paddle/gserver/layers/SequencePoolLayer.cpp +++ b/paddle/gserver/layers/SequencePoolLayer.cpp @@ -70,7 +70,7 @@ void SequencePoolLayer::forward(PassType passType) { CHECK_EQ(input.hasSubseq(), 0UL) << "sequence stride pooling is not suitable for hasSubseq now"; output_.poolSequenceWithStride(input, stride_, &stridePositions_); - newBatchSize_ = stridePositions_.size() - 1; + newBatchSize_ = stridePositions_->getSize() - 1; } resetOutput(newBatchSize_, dim); diff --git a/paddle/gserver/layers/SequencePoolLayer.h b/paddle/gserver/layers/SequencePoolLayer.h index 5ca1c1a82b913f2462bb5c227e15555c31590ceb..92d7a841f0c73421e26e5882241f2b0d0e2fba50 100644 --- a/paddle/gserver/layers/SequencePoolLayer.h +++ b/paddle/gserver/layers/SequencePoolLayer.h @@ -48,7 +48,7 @@ protected: ICpuGpuVectorPtr startPositions_; int stride_; // store the start position of each stride window - std::vector stridePositions_; + IVectorPtr stridePositions_; public: explicit SequencePoolLayer(const LayerConfig& config) : Layer(config) {} diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index 2657c00ebb3c79c2ec08c4dc59226682fe961fe5..3cc637587bc28ce02218c38489f48df352b1e574 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -561,7 +561,7 @@ void Argument::degradeSequence(const Argument& input) { void Argument::poolSequenceWithStride(const Argument& input, size_t stride, - std::vector* stridePostions) { + IVectorPtr* stridePostions) { /* * If input.sequenceStartPositions = [0, 9, 14, 17, 30] and stride = 5, * then sequenceStartPositions = [0, 2, 3, 4, 7], @@ -577,10 +577,10 @@ void Argument::poolSequenceWithStride(const Argument& input, int* tgtBuf = sequenceStartPositions->getMutableData(false); // first index of target sequence and stride positions are both 0 tgtBuf[0] = 0; - (*stridePostions).clear(); + std::vector stridePos; for (size_t seqId = 0; seqId < numSequences; ++seqId) { size_t seqLength = starts[seqId + 1] - starts[seqId]; - (*stridePostions).emplace_back(starts[seqId]); + stridePos.emplace_back(starts[seqId]); if (seqLength == 0) { // empty sequence tgtBuf[seqId + 1] = tgtBuf[seqId]; @@ -591,12 +591,15 @@ void Argument::poolSequenceWithStride(const Argument& input, int size = (seqLength % stride) ? seqLength / stride : seqLength / stride - 1; for (int i = 0; i < size; i++) { - (*stridePostions).emplace_back((*stridePostions).back() + stride); + stridePos.emplace_back(stridePos.back() + stride); } } } - (*stridePostions).emplace_back(starts[numSequences]); - CHECK_EQ((*stridePostions).size() - 1, tgtBuf[numSequences]); + stridePos.emplace_back(starts[numSequences]); + int size = stridePos.size(); + CHECK_EQ(size - 1, tgtBuf[numSequences]); + IVector::resizeOrCreate(*stridePostions, size, false); + (*stridePostions)->copyFrom(stridePos.data(), size); } void Argument::getValueString( diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h index 760029c2fe6ba3417c69c4772e3fafab3632e4b8..95ea90ffc2a604046252add36b0bb2e493b6050f 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/parameter/Argument.h @@ -298,7 +298,7 @@ struct Argument { */ void poolSequenceWithStride(const Argument& input, size_t stride, - std::vector* stridePositions); + IVectorPtr* stridePositions); /** * @brief getValueString will return the argument's output in string. There * are several kinds of output. The keys of output dictionary are 'value', diff --git a/paddle/parameter/tests/test_argument.cpp b/paddle/parameter/tests/test_argument.cpp index ba17e8a298e80139c23db414c0b77520e99b7272..692bbada10d03f87e591e6e15a5fa0fb0569c7fd 100644 --- a/paddle/parameter/tests/test_argument.cpp +++ b/paddle/parameter/tests/test_argument.cpp @@ -27,8 +27,7 @@ TEST(Argument, poolSequenceWithStride) { inStart[3] = 17; inStart[4] = 30; - std::vector stridePositions; - stridePositions.clear(); + IVectorPtr stridePositions; output.poolSequenceWithStride(input, 5 /* stride */, &stridePositions); const int* outStart = output.sequenceStartPositions->getData(false); @@ -38,10 +37,10 @@ TEST(Argument, poolSequenceWithStride) { CHECK_EQ(outStart[3], 4); CHECK_EQ(outStart[4], 7); - CHECK_EQ(stridePositions.size(), 8); + CHECK_EQ(stridePositions->getSize(), 8); int strideResult[] = {0, 5, 9, 14, 17, 22, 27, 30}; for (int i = 0; i < 8; i++) { - CHECK_EQ(stridePositions[i], strideResult[i]); + CHECK_EQ(stridePositions->getData()[i], strideResult[i]); } } diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 542bbbe086ec0e4bfdaa45cc2efef2345c80015f..5f3250e7224fff8dcbf13cace33f10b272064bdc 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -1406,7 +1406,6 @@ def first_seq(input, and a long sequence will be shorten. Note that for sequence with sub-sequence, stride is default -1 now. - The simple usage is: .. code-block:: python @@ -1418,6 +1417,8 @@ def first_seq(input, :type name: basestring :param input: Input layer name. :type input: LayerOutput + :param stride: parameter of stride window. + :type stride: Int :param layer_attr: extra layer attributes. :type layer_attr: ExtraLayerAttribute. :return: LayerOutput object.