/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include #include #include #include "ModelConfig.pb.h" #include "paddle/gserver/layers/DataLayer.h" #include "paddle/trainer/Trainer.h" #include "LayerGradUtil.h" #include "paddle/testing/TestUtil.h" using namespace paddle; // NOLINT DECLARE_int32(gpu_id); DECLARE_bool(thread_local_rand_use_global_seed); // const size_t MAX_SEQ_NUM = 5; // const size_t MAX_SEQ_LEN = 10; // const size_t MAX_BEAM_SIZE = 3; const size_t MAX_SEQ_NUM = 23; const size_t MAX_SEQ_LEN = 50; const size_t MAX_BEAM_SIZE = 27; // const size_t SEED = 1503391792; // const size_t SEED = 1; const size_t SEED = (size_t)(time(NULL)); struct SingleBeamExpansion { vector seqStartPos; vector subSeqStartPos; vector candidateScores; // TODO(caoying): store this into Argument.ids vector selectedIndices; vector groundTruth; vector inBeam; vector rowIdxInBeam; vector colIdxInBeam; void resetGroundTruth(size_t n) { groundTruth.clear(); groundTruth.resize(n, -1); inBeam.clear(); inBeam.resize(n, 0); rowIdxInBeam.clear(); rowIdxInBeam.resize(n, -1); colIdxInBeam.clear(); colIdxInBeam.resize(n, -1); } }; inline float randFloat() { return static_cast(rand()) / static_cast(RAND_MAX); } void genRand(real* numbers, size_t n) { default_random_engine generator; uniform_real_distribution distribution(0.0, 1.0); for (size_t i = 0; i < n; ++i) numbers[i] = distribution(generator); } vector randSampling(real range, int n) { CHECK_GE(range, n); vector num(range); iota(begin(num), end(num), 0.); if (range == n) return num; random_shuffle(begin(num), end(num)); num.resize(n); sort(begin(num), end(num)); return num; } void genCandidateScores(bool hasSubseq, size_t beamSize, SingleBeamExpansion& prevBeam, SingleBeamExpansion& curBeam) { vector& seqStartPos = curBeam.seqStartPos; seqStartPos.resize(1, 0); vector& subSeqStartPos = curBeam.subSeqStartPos; subSeqStartPos.resize(1, 0); srand(SEED); if (prevBeam.selectedIndices.size()) { if (prevBeam.subSeqStartPos.size() > 1) { int seqIdx = 1; // samples in previous beam are nested sequences. for (size_t i = 1; i < prevBeam.subSeqStartPos.size(); ++i) { for (size_t j = 0; j < beamSize; ++j) { if (prevBeam.selectedIndices[(i - 1) * beamSize + j] == -1.) break; subSeqStartPos.push_back(1 + (rand() % MAX_SEQ_LEN) + subSeqStartPos.back()); } if (prevBeam.seqStartPos[seqIdx] == prevBeam.subSeqStartPos[i]) { seqStartPos.push_back(subSeqStartPos.back()); seqIdx++; } } } else { for (size_t i = 0; i <= prevBeam.selectedIndices.size(); ++i) { if (i && i % beamSize == 0) { seqStartPos.push_back(subSeqStartPos.back()); if (i == prevBeam.selectedIndices.size()) break; } if (prevBeam.selectedIndices[i] == -1.) continue; subSeqStartPos.push_back(subSeqStartPos.back() + (1 + (rand() % MAX_SEQ_LEN))); } } } else { // the first beam expansion int seqNum = 1 + (rand() % MAX_SEQ_NUM); for (int i = 0; i < seqNum; ++i) { if (hasSubseq) { for (size_t j = 0; j < 1 + (rand() % MAX_SEQ_NUM); ++j) subSeqStartPos.push_back(subSeqStartPos.back() + (1 + (rand() % MAX_SEQ_LEN))); seqStartPos.push_back(subSeqStartPos.back()); } else { seqStartPos.push_back(seqStartPos.back() + (1 + (rand() % MAX_SEQ_LEN))); } } } size_t totalSeqNum = hasSubseq ? subSeqStartPos.back() : seqStartPos.back(); curBeam.candidateScores.resize(totalSeqNum, 0.); genRand(curBeam.candidateScores.data(), totalSeqNum); } void genSelectedIndices(size_t beamSize, vector& seqStartPos, vector& selectedIndices) { size_t selectedIdsCount = beamSize * (seqStartPos.size() - 1); selectedIndices.resize(selectedIdsCount, -1.); for (size_t i = 0; i < seqStartPos.size() - 1; ++i) { int seqLen = seqStartPos[i + 1] - seqStartPos[i]; int n = min(seqLen, static_cast(beamSize)); vector ids = randSampling(seqLen, n); memcpy(selectedIndices.data() + i * beamSize, ids.data(), sizeof(real) * ids.size()); } } void genGroundTruth(vector& beamExpansions, size_t beamSize) { SingleBeamExpansion& beam = beamExpansions[1]; size_t seqNum = beam.seqStartPos.size() - 1; for (size_t i = 2; i < beamExpansions.size(); ++i) CHECK_EQ(seqNum, beamExpansions[i].seqStartPos.size() - 1); srand(SEED); // initialize the first beam. beam.resetGroundTruth(seqNum); for (size_t i = 0; i < seqNum; ++i) { if (randFloat() > 0.5) { // force the randomly generated label falls in the beam by chance 0.5. // otherwise, when sequence length is relatively long and beam size is // relatively small, the gold sequences falls off the beam at in // the first search. real* begPos = beam.selectedIndices.data() + i * beamSize; beam.colIdxInBeam[i] = rand() % count_if(begPos, begPos + beamSize, [](const real& val) { return val != -1.; }); beam.groundTruth[i] = beam.selectedIndices[i * beamSize + beam.colIdxInBeam[i]]; beam.inBeam[i] = 1; } else { int label = rand() % (beam.seqStartPos[i + 1] - beam.seqStartPos[i]); beam.groundTruth[i] = label; real* begPos = beam.selectedIndices.data() + i * beamSize; real* endPos = begPos + beamSize; real* lblPos = find(begPos, endPos, real(label)); if (lblPos != endPos) { beam.inBeam[i] = 1; beam.colIdxInBeam[i] = lblPos - begPos; } } beam.rowIdxInBeam[i] = i; } // iterate over each beam expansions for (size_t i = 2; i < beamExpansions.size(); ++i) { SingleBeamExpansion& curBeam = beamExpansions[i]; SingleBeamExpansion& prevBeam = beamExpansions[i - 1]; curBeam.resetGroundTruth(seqNum); // iterate over each sequence for (size_t j = 0; j < seqNum; ++j) { if (!prevBeam.inBeam[j]) continue; // gold sequence falls in the beam in previous search. real* begPos = prevBeam.selectedIndices.data(); int offset = prevBeam.rowIdxInBeam[j] * beamSize + prevBeam.colIdxInBeam[j]; curBeam.rowIdxInBeam[j] = count_if( begPos, begPos + offset, [](const real& val) { return val != -1.; }); if (randFloat() > 0.5) { // force the randomly generated label falls in the beam by chance 0.5. // otherwise, when sequence length is relatively long and beam size is // relatively small, the gold sequences falls off the beam at in // the first search. real* start = curBeam.selectedIndices.data() + curBeam.rowIdxInBeam[j] * beamSize; int n = rand() % count_if(start, start + beamSize, [](const real& val) { return val != -1.; }); curBeam.colIdxInBeam[j] = n; curBeam.groundTruth[j] = *(start + n); curBeam.inBeam[j] = 1; } else { CHECK_LE(curBeam.rowIdxInBeam[j] + 1, curBeam.subSeqStartPos.size() - 1); int start = curBeam.subSeqStartPos[curBeam.rowIdxInBeam[j]]; int end = curBeam.subSeqStartPos[curBeam.rowIdxInBeam[j] + 1]; CHECK_GT(size_t(end), size_t(start)); int label = rand() % (end - start); curBeam.groundTruth[j] = label; real* findBeg = curBeam.selectedIndices.data() + curBeam.rowIdxInBeam[j] * beamSize; real* lblPos = find(findBeg, findBeg + beamSize, static_cast(label)); if (lblPos != (findBeg + beamSize)) { curBeam.inBeam[j] = 1; curBeam.colIdxInBeam[j] = lblPos - findBeg; } } } } } void genOneBeam(size_t beamSize, bool hasSubseq, SingleBeamExpansion& prevBeam, SingleBeamExpansion& curBeam) { genCandidateScores(hasSubseq, beamSize, prevBeam, curBeam); genSelectedIndices(beamSize, hasSubseq ? curBeam.subSeqStartPos : curBeam.seqStartPos, curBeam.selectedIndices); } void genRandomBeamExpansion(size_t expansionCount, size_t beamSize, vector& beamExpansions) { beamExpansions.clear(); beamExpansions.resize(expansionCount + 1); // beamExpansions[0] is reserved. for (size_t i = 1; i <= expansionCount; ++i) genOneBeam(beamSize, bool(i - 1), beamExpansions[i - 1], beamExpansions[i]); genGroundTruth(beamExpansions, beamSize); } void testCrossEntropyOverBeam(bool useGpu, size_t beamSize, vector& beams) { TestConfig config; config.layerConfig.set_type("cross_entropy_over_beam"); size_t seqNum = 0; for (size_t i = 1; i < beams.size(); ++i) { const SingleBeamExpansion& beam = beams[i]; // create scores for all the candidates MatrixPtr candidateScorePtr = Matrix::create(beam.candidateScores.size(), 1, false, false); candidateScorePtr->copyFrom(beam.candidateScores.data(), beam.candidateScores.size()); ostringstream paramName; paramName << "candidate_scores_" << i; if (beam.subSeqStartPos.size() > 1) { seqNum = beam.subSeqStartPos.size() - 1; config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, paramName.str(), candidateScorePtr, beam.seqStartPos, beam.subSeqStartPos}); } else { seqNum = beam.seqStartPos.size() - 1; config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, paramName.str(), candidateScorePtr, beam.seqStartPos}); } config.layerConfig.add_inputs(); // create indices for the selected candidates MatrixPtr selectedCandidates = Matrix::create(seqNum, beamSize, false, false); selectedCandidates->copyFrom(beam.selectedIndices.data(), beam.selectedIndices.size()); paramName.clear(); paramName << "selected_candidates_" << i; config.inputDefs.push_back( {INPUT_SELF_DEFINE_DATA, paramName.str(), selectedCandidates}); config.layerConfig.add_inputs(); // create the ground truth paramName.clear(); paramName << "label_" << i; config.inputDefs.push_back( {INPUT_SELF_DEFINE_DATA, paramName.str(), beam.groundTruth}); config.layerConfig.add_inputs(); } testLayerGrad( config, "cross_entropy_over_beam", seqNum, false, useGpu, false); } TEST(Layer, CrossEntropyOverBeam) { LOG(INFO) << "SEED = " << SEED; const size_t beamSize = 1 + rand() % MAX_BEAM_SIZE; LOG(INFO) << "beamSize = " << beamSize; // TODO(caoying): test with more beam expansions. const size_t expansionCount = 3; vector beams; genRandomBeamExpansion(expansionCount, beamSize, beams); for (bool useGpu : {false, true}) testCrossEntropyOverBeam(useGpu, beamSize, beams); } int main(int argc, char** argv) { initMain(argc, argv); hl_start(); hl_init(FLAGS_gpu_id); FLAGS_thread_local_rand_use_global_seed = true; srand(SEED); testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); }