提交 8f4ca2d1 编写于 作者: C caoying03

add implementations.

上级 a037b099
...@@ -16,6 +16,168 @@ limitations under the License. */ ...@@ -16,6 +16,168 @@ limitations under the License. */
namespace paddle { namespace paddle {
void CostForOneSequence::calValidExpandStep() {
validExpansionCount_ = 0;
goldAsExtraPath_ = true;
for (size_t i = 0; i < beams_->expansionCount; ++i) {
real gold = static_cast<real>(beams_->gold[i]);
if (i) {
real* start = beams_->candidateIds[i - 1]->getData();
goldRowIds_[i] = std::count_if(
start,
start + goldRowIds_[i - 1] * beamSize_ + goldColIds_[i - 1],
[](const real& val) { return val != -1.; });
} else
goldRowIds_[i] = 0;
real* start =
beams_->candidateIds[i]->getData() + goldRowIds_[i] * beamSize_;
real* findEnd = std::find(start, start + beamSize_, gold);
validExpansionCount_++;
if (start + beamSize_ == findEnd) return;
goldColIds_[i] = findEnd - start;
}
if (goldColIds_[beams_->expansionCount - 1] != -1) goldAsExtraPath_ = false;
}
size_t CostForOneSequence::initLastExpansion() {
int beamId = validExpansionCount_ - 1;
const MatrixPtr candidates = beams_->candidateIds[beamId];
size_t height = candidates->getHeight();
/* initialization the last expansion. */
size_t pathCount = std::count_if(candidates->getData(),
candidates->getData() + height * beamSize_,
[](const real& val) { return val != -1; });
/*
* if the gold sequence falls off the beam during search,
* add the gold sequence as the last path into all expanded paths.
*/
if (goldAsExtraPath_) goldIdsInFinalExpansion_ = pathCount++;
pathRowIdsInEachBeam_.clear();
pathRowIdsInEachBeam_.resize(validExpansionCount_,
std::vector<int>(pathCount, 0));
parentIdsInBeam_.clear();
parentIdsInBeam_.resize(pathCount, 0);
if (goldAsExtraPath_) {
/* add gold sequence into the total expansion. */
pathRowIdsInEachBeam_[beamId].back() =
beams_->gold[beamId] +
getSeqStartPos(beamId, goldRowIds_[validExpansionCount_ - 1]);
parentIdsInBeam_.back() = goldRowIds_[validExpansionCount_ - 1];
} else {
size_t goldOffset = goldRowIds_[beamId] * beamSize_ + goldColIds_[beamId];
goldIdsInFinalExpansion_ =
std::count_if(candidates->getData(),
candidates->getData() + goldOffset,
[](const real& val) { return val != -1.; });
}
/*
* TODO(caoying): fix this, store the indices of selected candidate
* paths into Argument.ids
*/
real* ids = candidates->getData();
size_t curIdx = 0;
for (size_t i = 0; i < height; ++i) {
int basePos = getSeqStartPos(beamId, i);
for (size_t j = 0; j < beamSize_; ++j) {
int id = ids[i * beamSize_ + j];
if (id == -1) continue;
pathRowIdsInEachBeam_[beamId][curIdx] = id + basePos;
parentIdsInBeam_[curIdx++] = i;
}
}
return pathCount;
}
void CostForOneSequence::constructTotalExpansion() {
/*
* construct the entire expanded beam by begining with the last search
* in which gold falls off the beam.
*/
size_t totalPathCount = initLastExpansion();
for (int beamId = validExpansionCount_ - 2; beamId >= 0; --beamId) {
const MatrixPtr candidates = beams_->candidateIds[beamId];
real* ids = candidates->getData();
int lastParentIdInBeam = -1;
int basePos = -1;
for (size_t i = 0;
i < (goldAsExtraPath_ ? totalPathCount - 1 : totalPathCount);
++i) {
int id = ids[parentIdsInBeam_[i]];
int parentRowId = std::div(parentIdsInBeam_[i], beamSize_).quot;
if (parentIdsInBeam_[i] != lastParentIdInBeam)
basePos = getSeqStartPos(beamId, parentRowId);
pathRowIdsInEachBeam_[beamId][i] = id + basePos;
lastParentIdInBeam = parentIdsInBeam_[i];
parentIdsInBeam_[i] = parentRowId;
if (goldAsExtraPath_)
pathRowIdsInEachBeam_[beamId][totalPathCount - 1] =
beams_->gold[beamId] + getSeqStartPos(beamId, goldRowIds_[beamId]);
}
}
}
real CostForOneSequence::globallyNormalizedScore() {
expandedPathScores_.resize(validExpansionCount_);
Matrix::resizeOrCreate(
softmaxOut_, 1, pathRowIdsInEachBeam_[0].size(), false, false);
softmaxOut_->zero();
MatrixPtr tmp = Matrix::create(
softmaxOut_->getData(), softmaxOut_->getWidth(), 1, false, false);
for (size_t i = 0; i < validExpansionCount_; ++i) {
Matrix::resizeOrCreate(expandedPathScores_[i],
pathRowIdsInEachBeam_[i].size(),
1,
false,
false);
IVectorPtr rowIds = IVector::create(pathRowIdsInEachBeam_[i].data(),
pathRowIdsInEachBeam_[i].size(),
false);
expandedPathScores_[i]->selectRows(*(beams_->scores[i]), *rowIds);
tmp->add(*expandedPathScores_[i]);
}
softmaxOut_->softmax(*softmaxOut_);
return -std::log(softmaxOut_->getData()[goldIdsInFinalExpansion_]);
}
real CostForOneSequence::forward() {
calValidExpandStep();
constructTotalExpansion();
return globallyNormalizedScore();
}
void CostForOneSequence::backward() {
softmaxOut_->getData()[goldIdsInFinalExpansion_] -= 1.;
MatrixPtr tmp = Matrix::create(
softmaxOut_->getData(), softmaxOut_->getWidth(), 1, false, false);
for (size_t i = 0; i < validExpansionCount_; ++i) {
IVectorPtr rowIds = IVector::create(pathRowIdsInEachBeam_[i].data(),
pathRowIdsInEachBeam_[i].size(),
false);
/*
beams_->scoreGrad[i] has been intialized outside this class, this
class only keeps a pointer pointing to the original input gradients,
so here does not need to allocate or initalize the memory.
*/
tmp->addToRows(*beams_->scoreGrad[i], *rowIds);
}
}
REGISTER_LAYER(cross_entropy_over_beam, CrossEntropyOverBeam); REGISTER_LAYER(cross_entropy_over_beam, CrossEntropyOverBeam);
bool CrossEntropyOverBeam::init(const LayerMap& layerMap, bool CrossEntropyOverBeam::init(const LayerMap& layerMap,
...@@ -24,13 +186,189 @@ bool CrossEntropyOverBeam::init(const LayerMap& layerMap, ...@@ -24,13 +186,189 @@ bool CrossEntropyOverBeam::init(const LayerMap& layerMap,
Layer::init(layerMap, parameterMap); Layer::init(layerMap, parameterMap);
CHECK_EQ(0U, inputLayers_.size() % 3) << "Error input number."; CHECK_EQ(0U, inputLayers_.size() % 3) << "Error input number.";
setNeedSequenceInfo(false); beamExpanCount_ = inputLayers_.size() / 3;
candidateScores_.resize(beamExpanCount_);
candidateScoreGrad_.resize(beamExpanCount_);
candidateInBeam_.resize(beamExpanCount_);
goldSequence_.resize(beamExpanCount_);
gradToInputs_.resize(beamExpanCount_);
setNeedSequenceInfo(false);
return true; return true;
} }
void CrossEntropyOverBeam::forward(PassType passType) {} void CrossEntropyOverBeam::checkInputs() {
batchSize_ = 0;
for (size_t i = 0; i < beamExpanCount_; ++i) {
const Argument& scores = getInput(i * 3);
const Argument& selCandidates = getInput(i * 3 + 1);
const Argument& goldSeq = getInput(i * 3 + 2);
if (i) {
CHECK(scores.hasSubseq()) << "Beam expansion expect the first one, "
"should be a nested sequence";
CHECK_EQ(getInputValue(i * 3 + 1)->getWidth(), beamSize_);
CHECK_EQ(scores.getNumSequences(), batchSize_);
CHECK_EQ(scores.getNumSubSequences(), selCandidates.getBatchSize());
} else {
CHECK(scores.hasSeq()) << "The first beam expansion should be a sequence";
batchSize_ = scores.getNumSequences();
beamSize_ = getInputValue(i * 3 + 1)->getWidth();
CHECK_EQ(batchSize_, selCandidates.getBatchSize());
}
CHECK_EQ(1U, scores.value->getWidth());
CHECK_EQ(batchSize_, goldSeq.getBatchSize());
}
}
void CrossEntropyOverBeam::copyInputsToCpu() {
auto copyValue = [](const MatrixPtr& src, MatrixPtr& trg) {
if (dynamic_cast<GpuMatrix*>(src.get())) {
Matrix::resizeOrCreate(
trg, src->getHeight(), src->getWidth(), false, false);
trg->copyFrom(*src);
} else {
trg = std::move(src);
}
};
auto copyIds = [](const IVectorPtr& src, IVectorPtr& trg) {
if (dynamic_cast<GpuIVector*>(src.get())) {
IVector::resizeOrCreate(trg, src->getSize(), false);
trg->copyFrom(*src);
} else {
trg = std::move(src);
}
};
beamSplitPos_.clear();
beamSplitPos_.resize(batchSize_, std::vector<int>(beamExpanCount_, 0));
for (size_t i = 0; i < beamExpanCount_; ++i) {
copyValue(getInputValue(i * 3), candidateScores_[i]);
copyValue(getInputValue(i * 3 + 1), candidateInBeam_[i]);
copyIds(getInput(i * 3 + 2).ids, goldSequence_[i]);
if (i) {
ICpuGpuVectorPtr seqInfo = getInput(i * 3).sequenceStartPositions;
const int* seqStarts = seqInfo->getMutableData(false);
ICpuGpuVectorPtr subSeqInfo = getInput(i * 3).subSequenceStartPositions;
const int* subSeqStarts = subSeqInfo->getMutableData(false);
size_t seqId = 1;
for (size_t subSeqId = 0; subSeqId < subSeqInfo->getSize() - 1;
++subSeqId) {
CHECK_LT(seqId, seqInfo->getSize());
if (subSeqStarts[subSeqId] == seqStarts[seqId]) {
beamSplitPos_[seqId][i] = beamSplitPos_[seqId - 1][i];
seqId++;
}
beamSplitPos_[seqId - 1][i]++;
}
} else {
for (size_t j = 0; j < batchSize_; ++j) beamSplitPos_[j][i] = j + 1;
}
}
}
void CrossEntropyOverBeam::splitBatchBeams() {
beamCosts_.resize(batchSize_);
beamPerSeq_.resize(batchSize_, beamExpanCount_);
for (size_t i = 0; i < beamExpanCount_; ++i) {
int* seqStarts =
getInput(i * 3).sequenceStartPositions->getMutableData(false);
int* subSeqStarts = nullptr;
int maxLen = 0;
if (i) {
subSeqStarts =
getInput(i * 3).subSequenceStartPositions->getMutableData(false);
maxLen = getInput(i * 3).subSequenceStartPositions->getSize() - 1;
} else
maxLen = getInput(i).sequenceStartPositions->getSize() - 1;
for (size_t j = 0; j < batchSize_; ++j) {
beamPerSeq_[j].scores[i] =
Matrix::create(candidateScores_[i]->getData() + seqStarts[j],
seqStarts[j + 1] - seqStarts[j],
1,
false,
false);
beamPerSeq_[j].scoreGrad[i] =
Matrix::create(candidateScoreGrad_[i]->getData() + seqStarts[j],
seqStarts[j + 1] - seqStarts[j],
1,
false,
false);
int offset = j ? beamSplitPos_[j - 1][i] : 0;
int height = beamSplitPos_[j][i] - (j ? beamSplitPos_[j - 1][i] : 0);
CHECK_GE(maxLen, offset + height);
beamPerSeq_[j].seqInfo[i] = IVector::create(
(i ? subSeqStarts : seqStarts) + offset, height + 1, false);
void CrossEntropyOverBeam::backward(const UpdateCallback& callback) {} beamPerSeq_[j].candidateIds[i] =
Matrix::create(candidateInBeam_[i]->getData() + offset * beamSize_,
height,
beamSize_,
false,
false);
beamPerSeq_[j].gold[i] = goldSequence_[i]->getData()[j];
}
}
}
void CrossEntropyOverBeam::resizeOutput() {
Matrix::resizeOrCreate(output_.value, batchSize_, 1, false, false);
output_.value->zero();
for (size_t i = 0; i < beamExpanCount_; ++i) {
MatrixPtr inGrad = getInputGrad(i * 3);
if (dynamic_cast<GpuMatrix*>(inGrad.get())) {
Matrix::resizeOrCreate(candidateScoreGrad_[i],
inGrad->getHeight(),
inGrad->getWidth(),
false,
false);
} else
candidateScoreGrad_[i] = std::move(inGrad);
candidateScoreGrad_[i]->zero();
}
}
void CrossEntropyOverBeam::copyGradToGpu(size_t copyCount) {
for (size_t i = 0; i < beamExpanCount_; ++i) {
if (dynamic_cast<GpuMatrix*>(getInputGrad(i * 3).get()))
getInputGrad(i * 3)->copyFrom(*candidateScoreGrad_[i]);
if (i == copyCount - 1) break;
}
}
void CrossEntropyOverBeam::forward(PassType passType) {
Layer::forward(passType);
checkInputs();
copyInputsToCpu();
resizeOutput();
splitBatchBeams();
MatrixPtr outputValue = getOutputValue();
for (size_t i = 0; i < batchSize_; ++i) {
beamCosts_[i].setData(
std::move(std::make_shared<BeamExpansion>(beamPerSeq_[i])), beamSize_);
outputValue->getData()[i] = beamCosts_[i].forward();
}
}
void CrossEntropyOverBeam::backward(const UpdateCallback& callback) {
for (size_t i = 0; i < batchSize_; ++i) {
beamCosts_[i].backward();
copyGradToGpu(beamCosts_[i].getValidExpansionCount());
}
}
} // namespace paddle } // namespace paddle
...@@ -19,6 +19,79 @@ limitations under the License. */ ...@@ -19,6 +19,79 @@ limitations under the License. */
namespace paddle { namespace paddle {
struct BeamExpansion {
// store the entire beam expansion for a single sequence
std::vector<MatrixPtr> scores;
std::vector<IVectorPtr> seqInfo;
std::vector<MatrixPtr> candidateIds;
std::vector<int> gold;
std::vector<MatrixPtr> scoreGrad;
size_t expansionCount;
BeamExpansion(int n) {
expansionCount = n;
scores.resize(expansionCount);
seqInfo.resize(expansionCount);
candidateIds.resize(expansionCount);
scoreGrad.resize(expansionCount);
gold.resize(expansionCount);
};
};
typedef std::shared_ptr<BeamExpansion> BeamExpansionPtr;
class CostForOneSequence {
public:
CostForOneSequence()
: beamSize_(0), validExpansionCount_(0), goldAsExtraPath_(false) {}
void setData(const BeamExpansionPtr bPtr, size_t beamSize) {
beams_ = bPtr;
beamSize_ = beamSize;
expandedPathScores_.clear();
expandedPathScores_.resize(beams_->expansionCount);
goldRowIds_.clear();
goldRowIds_.resize(beams_->expansionCount, 0);
goldColIds_.clear();
goldColIds_.resize(beams_->expansionCount, -1);
}
size_t getValidExpansionCount() { return validExpansionCount_; }
real forward();
void backward();
private:
void calValidExpandStep();
void constructTotalExpansion();
size_t initLastExpansion();
real globallyNormalizedScore();
int getSeqStartPos(size_t beamId, size_t rowId) {
CHECK_GT(beams_->seqInfo[beamId]->getSize() - 1, rowId);
int* starts = beams_->seqInfo[beamId]->getData();
return starts[rowId] - starts[0];
};
size_t beamSize_;
size_t validExpansionCount_;
bool goldAsExtraPath_;
std::vector<int> goldRowIds_;
std::vector<int> goldColIds_;
BeamExpansionPtr beams_;
std::vector<std::vector<int>> pathRowIdsInEachBeam_;
std::vector<int> parentIdsInBeam_;
size_t goldIdsInFinalExpansion_;
std::vector<MatrixPtr> expandedPathScores_;
MatrixPtr softmaxOut_;
};
class CrossEntropyOverBeam : public Layer { class CrossEntropyOverBeam : public Layer {
public: public:
explicit CrossEntropyOverBeam(const LayerConfig& config) : Layer(config) {} explicit CrossEntropyOverBeam(const LayerConfig& config) : Layer(config) {}
...@@ -26,6 +99,31 @@ public: ...@@ -26,6 +99,31 @@ public:
const ParameterMap& parameterMap) override; const ParameterMap& parameterMap) override;
void forward(PassType passType) override; void forward(PassType passType) override;
void backward(const UpdateCallback& callback) override; void backward(const UpdateCallback& callback) override;
private:
void checkInputs();
void copyInputsToCpu();
void resizeOutput();
void copyGradToGpu(size_t copyCount);
void splitBatchBeams();
size_t beamExpanCount_;
size_t batchSize_;
size_t beamSize_;
// Currently, this layer only works on CPU, if its inputs is on GPU,
// copy them to CPU memory.
std::vector<MatrixPtr> candidateScores_;
std::vector<MatrixPtr> candidateScoreGrad_;
std::vector<MatrixPtr> candidateInBeam_;
std::vector<MatrixPtr> gradToInputs_;
std::vector<IVectorPtr> goldSequence_;
std::vector<std::vector<int>> beamSplitPos_;
// split entire bath of beams into beam per sequnence.
std::vector<BeamExpansion> beamPerSeq_;
// beamCosts_ is used to propagate error in one sequence.
std::vector<CostForOneSequence> beamCosts_;
}; };
} // namespace paddle } // namespace paddle
...@@ -28,9 +28,17 @@ using namespace paddle; // NOLINT ...@@ -28,9 +28,17 @@ using namespace paddle; // NOLINT
DECLARE_int32(gpu_id); DECLARE_int32(gpu_id);
DECLARE_bool(thread_local_rand_use_global_seed); DECLARE_bool(thread_local_rand_use_global_seed);
const size_t MAX_SEQ_NUM = 10; // const size_t MAX_SEQ_NUM = 5;
const size_t MAX_SEQ_LEN = 27; // const size_t MAX_SEQ_LEN = 10;
const size_t MAX_BEAM_SIZE = 10; // const size_t MAX_BEAM_SIZE = 3;
const size_t MAX_SEQ_NUM = 23;
const size_t MAX_SEQ_LEN = 50;
const size_t MAX_BEAM_SIZE = 27;
// const size_t SEED = 1503391792;
// const size_t SEED = 1;
const size_t SEED = (size_t)(time(NULL));
struct SingleBeamExpansion { struct SingleBeamExpansion {
vector<int> seqStartPos; vector<int> seqStartPos;
...@@ -43,11 +51,30 @@ struct SingleBeamExpansion { ...@@ -43,11 +51,30 @@ struct SingleBeamExpansion {
vector<int> groundTruth; vector<int> groundTruth;
vector<size_t> inBeam; vector<size_t> inBeam;
vector<int> rowIdxInBeam; vector<int> rowIdxInBeam;
vector<int> colIdxInBeam;
void resetGroundTruth(size_t n) {
groundTruth.clear();
groundTruth.resize(n, -1);
inBeam.clear();
inBeam.resize(n, 0);
rowIdxInBeam.clear();
rowIdxInBeam.resize(n, -1);
colIdxInBeam.clear();
colIdxInBeam.resize(n, -1);
}
}; };
inline float randFloat() {
return static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
}
void genRand(real* numbers, size_t n) { void genRand(real* numbers, size_t n) {
default_random_engine generator; default_random_engine generator;
uniform_real_distribution<double> distribution(0.0, 1.0); uniform_real_distribution<real> distribution(0.0, 1.0);
for (size_t i = 0; i < n; ++i) numbers[i] = distribution(generator); for (size_t i = 0; i < n; ++i) numbers[i] = distribution(generator);
} }
...@@ -72,8 +99,7 @@ void genCandidateScores(bool hasSubseq, ...@@ -72,8 +99,7 @@ void genCandidateScores(bool hasSubseq,
vector<int>& subSeqStartPos = curBeam.subSeqStartPos; vector<int>& subSeqStartPos = curBeam.subSeqStartPos;
subSeqStartPos.resize(1, 0); subSeqStartPos.resize(1, 0);
srand((size_t)(time(NULL))); srand(SEED);
// srand(1);
if (prevBeam.selectedIndices.size()) { if (prevBeam.selectedIndices.size()) {
if (prevBeam.subSeqStartPos.size() > 1) { if (prevBeam.subSeqStartPos.size() > 1) {
int seqIdx = 1; int seqIdx = 1;
...@@ -81,7 +107,6 @@ void genCandidateScores(bool hasSubseq, ...@@ -81,7 +107,6 @@ void genCandidateScores(bool hasSubseq,
for (size_t i = 1; i < prevBeam.subSeqStartPos.size(); ++i) { for (size_t i = 1; i < prevBeam.subSeqStartPos.size(); ++i) {
for (size_t j = 0; j < beamSize; ++j) { for (size_t j = 0; j < beamSize; ++j) {
if (prevBeam.selectedIndices[(i - 1) * beamSize + j] == -1.) break; if (prevBeam.selectedIndices[(i - 1) * beamSize + j] == -1.) break;
for (size_t k = 0; k < beamSize; ++k)
subSeqStartPos.push_back(1 + (rand() % MAX_SEQ_LEN) + subSeqStartPos.push_back(1 + (rand() % MAX_SEQ_LEN) +
subSeqStartPos.back()); subSeqStartPos.back());
} }
...@@ -91,7 +116,6 @@ void genCandidateScores(bool hasSubseq, ...@@ -91,7 +116,6 @@ void genCandidateScores(bool hasSubseq,
} }
} }
} else { } else {
// samples in previous beam are sequences.
for (size_t i = 0; i <= prevBeam.selectedIndices.size(); ++i) { for (size_t i = 0; i <= prevBeam.selectedIndices.size(); ++i) {
if (i && i % beamSize == 0) { if (i && i % beamSize == 0) {
seqStartPos.push_back(subSeqStartPos.back()); seqStartPos.push_back(subSeqStartPos.back());
...@@ -141,27 +165,41 @@ void genSelectedIndices(size_t beamSize, ...@@ -141,27 +165,41 @@ void genSelectedIndices(size_t beamSize,
void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions, void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions,
size_t beamSize) { size_t beamSize) {
size_t seqNum = beamExpansions[1].seqStartPos.size() - 1; SingleBeamExpansion& beam = beamExpansions[1];
size_t seqNum = beam.seqStartPos.size() - 1;
for (size_t i = 2; i < beamExpansions.size(); ++i) for (size_t i = 2; i < beamExpansions.size(); ++i)
CHECK_EQ(seqNum, beamExpansions[i - 1].seqStartPos.size() - 1); CHECK_EQ(seqNum, beamExpansions[i].seqStartPos.size() - 1);
// srand(1); srand(SEED);
srand((size_t)(time(NULL)));
// initialize the first beam. // initialize the first beam.
SingleBeamExpansion& beam = beamExpansions[1]; beam.resetGroundTruth(seqNum);
beam.groundTruth.resize(seqNum, 0);
beam.inBeam.resize(seqNum, 0);
beam.rowIdxInBeam.resize(seqNum, -1);
auto begPos = beam.selectedIndices.begin();
for (size_t i = 0; i < seqNum; ++i) { for (size_t i = 0; i < seqNum; ++i) {
int seqLen = beam.seqStartPos[i + 1] - beam.seqStartPos[i]; if (randFloat() > 0.5) {
int label = rand() % seqLen; // force the randomly generated label falls in the beam by chance 0.5.
auto endPos = begPos + beamSize; // otherwise, when sequence length is relatively long and beam size is
// relatively small, the gold sequences falls off the beam at in
// the first search.
real* begPos = beam.selectedIndices.data() + i * beamSize;
beam.colIdxInBeam[i] =
rand() % count_if(begPos, begPos + beamSize, [](const real& val) {
return val != -1.;
});
beam.groundTruth[i] =
beam.selectedIndices[i * beamSize + beam.colIdxInBeam[i]];
beam.inBeam[i] = 1;
} else {
int label = rand() % (beam.seqStartPos[i + 1] - beam.seqStartPos[i]);
beam.groundTruth[i] = label; beam.groundTruth[i] = label;
if (find(begPos, endPos, real(label)) != endPos) beam.inBeam[i] = 1;
begPos = endPos; real* begPos = beam.selectedIndices.data() + i * beamSize;
real* endPos = begPos + beamSize;
real* lblPos = find(begPos, endPos, real(label));
if (lblPos != endPos) {
beam.inBeam[i] = 1;
beam.colIdxInBeam[i] = lblPos - begPos;
}
}
beam.rowIdxInBeam[i] = i; beam.rowIdxInBeam[i] = i;
} }
...@@ -169,22 +207,33 @@ void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions, ...@@ -169,22 +207,33 @@ void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions,
for (size_t i = 2; i < beamExpansions.size(); ++i) { for (size_t i = 2; i < beamExpansions.size(); ++i) {
SingleBeamExpansion& curBeam = beamExpansions[i]; SingleBeamExpansion& curBeam = beamExpansions[i];
SingleBeamExpansion& prevBeam = beamExpansions[i - 1]; SingleBeamExpansion& prevBeam = beamExpansions[i - 1];
curBeam.resetGroundTruth(seqNum);
curBeam.groundTruth.resize(seqNum, 0);
curBeam.inBeam.resize(seqNum, 0);
curBeam.rowIdxInBeam.resize(seqNum, -1);
// iterate over each sequence // iterate over each sequence
for (size_t j = 0; j < seqNum; ++j) { for (size_t j = 0; j < seqNum; ++j) {
if (prevBeam.inBeam[j]) { if (!prevBeam.inBeam[j]) continue;
// gold sequence falls in the beam in previous search.
auto begPos = prevBeam.selectedIndices.begin();
auto endPos = begPos + prevBeam.rowIdxInBeam[j] * beamSize;
size_t totalExpansion =
prevBeam.rowIdxInBeam[j] * beamSize - count(begPos, endPos, -1.);
curBeam.rowIdxInBeam[j] = totalExpansion + prevBeam.groundTruth[j];
// gold sequence falls in the beam in previous search.
real* begPos = prevBeam.selectedIndices.data();
int offset =
prevBeam.rowIdxInBeam[j] * beamSize + prevBeam.colIdxInBeam[j];
curBeam.rowIdxInBeam[j] = count_if(
begPos, begPos + offset, [](const real& val) { return val != -1.; });
if (randFloat() > 0.5) {
// force the randomly generated label falls in the beam by chance 0.5.
// otherwise, when sequence length is relatively long and beam size is
// relatively small, the gold sequences falls off the beam at in
// the first search.
real* start =
curBeam.selectedIndices.data() + curBeam.rowIdxInBeam[j] * beamSize;
int n = rand() % count_if(start, start + beamSize, [](const real& val) {
return val != -1.;
});
curBeam.colIdxInBeam[j] = n;
curBeam.groundTruth[j] = *(start + n);
curBeam.inBeam[j] = 1;
} else {
CHECK_LE(curBeam.rowIdxInBeam[j] + 1, CHECK_LE(curBeam.rowIdxInBeam[j] + 1,
curBeam.subSeqStartPos.size() - 1); curBeam.subSeqStartPos.size() - 1);
int start = curBeam.subSeqStartPos[curBeam.rowIdxInBeam[j]]; int start = curBeam.subSeqStartPos[curBeam.rowIdxInBeam[j]];
...@@ -193,16 +242,14 @@ void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions, ...@@ -193,16 +242,14 @@ void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions,
int label = rand() % (end - start); int label = rand() % (end - start);
curBeam.groundTruth[j] = label; curBeam.groundTruth[j] = label;
auto findBeg = curBeam.selectedIndices.begin() + real* findBeg =
curBeam.rowIdxInBeam[j] * beamSize; curBeam.selectedIndices.data() + curBeam.rowIdxInBeam[j] * beamSize;
auto findEnd = findBeg + beamSize; real* lblPos =
if (find(findBeg, findEnd, real(label)) != findEnd) find(findBeg, findBeg + beamSize, static_cast<real>(label));
if (lblPos != (findBeg + beamSize)) {
curBeam.inBeam[j] = 1; curBeam.inBeam[j] = 1;
} else { curBeam.colIdxInBeam[j] = lblPos - findBeg;
// in previous search, gold sequence has fallen off the beam, }
// the beam search stops, here use -1 as a dummy label.
// It will not used in calculation the cost.
beamExpansions[i].groundTruth[j] = -1;
} }
} }
} }
...@@ -230,15 +277,12 @@ void genRandomBeamExpansion(size_t expansionCount, ...@@ -230,15 +277,12 @@ void genRandomBeamExpansion(size_t expansionCount,
genGroundTruth(beamExpansions, beamSize); genGroundTruth(beamExpansions, beamSize);
} }
void testCrossEntropyOverBeam(bool useGpu) { void testCrossEntropyOverBeam(bool useGpu,
size_t beamSize,
vector<SingleBeamExpansion>& beams) {
TestConfig config; TestConfig config;
config.layerConfig.set_type("cross_entropy_over_beam"); config.layerConfig.set_type("cross_entropy_over_beam");
const size_t expansionCount = 3;
const size_t beamSize = MAX_BEAM_SIZE;
vector<SingleBeamExpansion> beams;
genRandomBeamExpansion(expansionCount, beamSize, beams);
size_t seqNum = 0; size_t seqNum = 0;
for (size_t i = 1; i < beams.size(); ++i) { for (size_t i = 1; i < beams.size(); ++i) {
const SingleBeamExpansion& beam = beams[i]; const SingleBeamExpansion& beam = beams[i];
...@@ -291,7 +335,17 @@ void testCrossEntropyOverBeam(bool useGpu) { ...@@ -291,7 +335,17 @@ void testCrossEntropyOverBeam(bool useGpu) {
} }
TEST(Layer, CrossEntropyOverBeam) { TEST(Layer, CrossEntropyOverBeam) {
for (bool useGpu : {false, true}) testCrossEntropyOverBeam(useGpu); LOG(INFO) << "SEED = " << SEED;
const size_t beamSize = 1 + rand() % MAX_BEAM_SIZE;
LOG(INFO) << "beamSize = " << beamSize;
// TODO(caoying): test with more beam expansions.
const size_t expansionCount = 3;
vector<SingleBeamExpansion> beams;
genRandomBeamExpansion(expansionCount, beamSize, beams);
for (bool useGpu : {false, true})
testCrossEntropyOverBeam(useGpu, beamSize, beams);
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
...@@ -299,7 +353,7 @@ int main(int argc, char** argv) { ...@@ -299,7 +353,7 @@ int main(int argc, char** argv) {
hl_start(); hl_start();
hl_init(FLAGS_gpu_id); hl_init(FLAGS_gpu_id);
FLAGS_thread_local_rand_use_global_seed = true; FLAGS_thread_local_rand_use_global_seed = true;
srand(1); srand(SEED);
testing::InitGoogleTest(&argc, argv); testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS(); return RUN_ALL_TESTS();
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册