提交 82e4fab4 编写于 作者: C caoying03

follow comments.

上级 b7359ee3
......@@ -80,13 +80,14 @@ void KmaxSeqScoreLayer::forward(PassType passType) {
<< "input of " << getName()
<< " must be a sequence or a nested sequence.";
CHECK_EQ(input.value->getWidth(), 1UL)
<< "input of " << getName()
<< " is score over a sequence or a nested sequence, so its width "
<< " must be 1.";
<< "input of " << getName() << " are scores over a sequence or "
<< "a nested sequence, so its width must be 1.";
if (useGpu_) {
// this Layer runs only in CPU, if the model is runing on GPU,
// then copy the input to this layer from GPU to CPU.
/*
* currently, this Layer only runs in CPU, if the other part of the model is
* runing on GPU, then copy the input to this layer from GPU to CPU.
*/
Matrix::resizeOrCreate(scores_,
inputScore->getHeight(),
1,
......@@ -97,13 +98,14 @@ void KmaxSeqScoreLayer::forward(PassType passType) {
scores_ = inputScore;
}
// TODO(caoying)
// In PaddlePaddle, the currently available matrixes all a have real-typed
// data field, but the selected indices information are actually int-typed
// (with -1 as a special token). Storing indices information in real-typed
// Matrix leads to converting real to int. This is very dangerous if a user
// fills this matrix himself, invalid data may occur.
// The selected indices should be stored in an int-typed matrix.
/*
* TODO(caoying)
* In PaddePaddle, currently all matrices are real number types,
* but output of this layer which is some selected indices of the give
* sequence are actually filled with int types so that storing int types
* information in a real number matrix is dangerous, since real numbers will
* be convered to int types.
*/
Matrix::resizeOrCreate(
output_.value,
input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(),
......
......@@ -31,13 +31,15 @@ public:
void backward(const UpdateCallback& callback = nullptr) override;
private:
// TODO(caoying)
// In PaddlePaddle, the currently available matrixes all a have real-typed
// data field, but the selected indices information are actually int-typed
// (with -1 as a special token). Storing indices information in real-typed
// Matrix leads to converting real to int. This is very dangerous if a user
// fills this matrix himself, invalid data may occur.
// The selected indices should be stored in an int-typed matrix.
/*
* TODO(caoying)
* In PaddePaddle, currently all matrices are real number types,
* but the second and the (optional) third input which are some
* selected indices of the give sequence to trim the sequence, are actually
* filled with int types so that storing int types information in real number
* matrices is very dangerous, since real numbers will be convered to int
* types. If a user fills this matrix himself, invalid data may occor.
*/
MatrixPtr startIdsOnCpu_;
MatrixPtr endIdsOnCpu_;
......@@ -68,7 +70,7 @@ bool SequenceSliceLayer::init(const LayerMap& layerMap,
void SequenceSliceLayer::checkInputs() {
const Argument& inputSeq = getInput(0);
CHECK(inputSeq.hasSeq()) << "The first input of sequence slic layer "
CHECK(inputSeq.hasSeq()) << "The first input of sequence slice layer "
<< "must be a sequence.";
const MatrixPtr indices1 = getInputValue(1);
CHECK_EQ(static_cast<size_t>(indices1->getHeight()),
......@@ -86,22 +88,6 @@ void SequenceSliceLayer::checkInputs() {
}
void SequenceSliceLayer::copySliceIdsToCpu() {
if (!useGpu_) {
if (inputLayers_.size() == 2U) {
if (config_.select_first()) {
startIdsOnCpu_ = getInputValue(1);
endIdsOnCpu_ = nullptr;
} else {
startIdsOnCpu_ = nullptr;
endIdsOnCpu_ = getInputValue(1);
}
} else if (inputLayers_.size() == 3U) {
startIdsOnCpu_ = getInputValue(1);
endIdsOnCpu_ = getInputValue(2);
}
return;
}
const MatrixPtr indices1 = getInputValue(1);
if (inputLayers_.size() == 2U) {
if (config_.select_first()) {
......@@ -141,22 +127,19 @@ void SequenceSliceLayer::copySliceIdsToCpu() {
void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts,
const MatrixPtr ends) {
CHECK(starts && ends);
outSeqStartPos_.resize(1, 0);
outSubSeqStartPos_.resize(1, 0);
selectedRows_.clear();
size_t beamSize = starts ? starts->getWidth() : ends->getWidth();
// iterate over sequence
size_t rowIdx = 0;
for (size_t i = 0; i < inputSeqInfoVec_.size(); ++i) {
// iterate over sub-sequence in a sequence
for (size_t j = 0; j < inputSeqInfoVec_[i].size() - 1; ++j) {
// iterate over each index for slicing.
for (size_t k = 0; k < beamSize; ++k) {
if (starts) {
if (starts->getElement(rowIdx, k) == -1.) break;
} else if (ends->getElement(rowIdx, k) == -1.)
break;
if (starts && starts->getElement(rowIdx, k) == -1.) break;
if (ends && ends->getElement(rowIdx, k) == -1.) break;
int begPos = inputSeqInfoVec_[i][j];
if (starts) begPos += starts->getElement(rowIdx, k);
......@@ -165,7 +148,7 @@ void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts,
if (ends) endPos = inputSeqInfoVec_[i][j] + ends->getElement(rowIdx, k);
int seqLen = endPos - begPos + 1;
CHECK(seqLen);
CHECK_LT(seqLen, 0U);
for (int m = begPos; m <= endPos; ++m) selectedRows_.push_back(m);
inputSeqInfoVec_.size() > 1
? outSubSeqStartPos_.push_back(outSubSeqStartPos_.back() + seqLen)
......@@ -208,6 +191,15 @@ void SequenceSliceLayer::forward(PassType passType) {
Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions,
inputSeq.subSequenceStartPositions,
inputSeqInfoVec_);
if (!useGpu_) {
if (inputLayers_.size() == 2U) {
startIdsOnCpu_ = config_.select_first() ? getInputValue(1) : nullptr;
endIdsOnCpu_ = config_.select_first() ? nullptr : getInputValue(1);
} else if (inputLayers_.size() == 3U) {
startIdsOnCpu_ = getInputValue(1);
endIdsOnCpu_ = getInputValue(2);
}
} else
copySliceIdsToCpu();
// calculate the selected row indices in a batch,
......@@ -221,10 +213,7 @@ void SequenceSliceLayer::forward(PassType passType) {
}
void SequenceSliceLayer::backward(const UpdateCallback& callback) {
MatrixPtr inputSeqGrad = getInputGrad(0);
MatrixPtr outputGrad = getOutputGrad();
outputGrad->addToRows(*inputSeqGrad, *rowIndice_);
getOutputGrad()->addToRows(*getInputGrad(0), *rowIndice_);
}
} // namespace paddle
......@@ -58,23 +58,28 @@ private:
void calSelectedRows(const MatrixPtr selectedIndices,
const std::vector<std::vector<int>>& inputSeqInfo);
// if the second input of this layer is on GPU memory, copy it to CPU memory.
// TODO(caoying)
// In PaddlePaddle, the currently available matrixes all a have real-typed
// data field, but the selected indices information are actually int-typed
// (with -1 as a special token). Storing indices information in real-typed
// Matrix leads to converting real to int. This is very dangerous if a user
// fills this matrix himself, invalid data may occur.
// The selected indices should be stored in an int-typed matrix.
/*
* TODO(caoying)
* In PaddePaddle, currently all matrices are real number types,
* but the second is some selected indices of the give sequence to trim
* the nested sequence, are actually filled with int types so that storing
* int types information in real number matrices is very dangerous, since
* real numbers will be convered to int types. If a user fills this matrix
* himself, invalid data may occor.
*
* if the second input of this layer is on GPU memory, copy it to CPU memory.
*/
MatrixPtr selIdsCpu_;
// reorganized sequenceStartPositions and subSequenceStartPositions
// into a 2d vector to facilitate the sequence selection process.
/*
* reorganize sequenceStartPositions and subSequenceStartPositions
* into a 2d vector to facilitate the sequence selection process.
*/
std::vector<std::vector<int>> inputSeqInfoVec_;
// the final selected row indices in a batch,
// rowIndice_ and selectedRows_ actually share a same memory.
/* store the final selected row indices in a batch */
IVectorPtr rowIndice_;
/* rowIndice_ and selectedRows_ actually share a same memory. */
std::vector<int> selectedRows_;
};
......
......@@ -2717,10 +2717,7 @@ class SeqSliceLayer(LayerBase):
'If start and end indices are both given to'
'sequence slice layer, they should have the same width.')
elif len(inputs) == 2:
if starts is not None:
self.config.select_first = True
else:
self.config.select_first = False
self.config.select_first = (starts is not None)
@config_layer('sub_nested_seq')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册