diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/gserver/layers/CrossEntropyOverBeam.cpp index 8b6223ec6a82622fe72f0955de839dbaa8160839..88d80aa83af5c12a92096b0836aa1c1c909b94d8 100644 --- a/paddle/gserver/layers/CrossEntropyOverBeam.cpp +++ b/paddle/gserver/layers/CrossEntropyOverBeam.cpp @@ -22,6 +22,7 @@ bool CrossEntropyOverBeam::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { /* Initialize the basic parent class */ Layer::init(layerMap, parameterMap); + CHECK_EQ(0U, inputLayers_.size() % 3) << "Error input number."; setNeedSequenceInfo(false); diff --git a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp index e9ecebcfe52044ba3870fff7a907f98bb9fd65ce..a5f06c15dc48084e999ea656a83c7e2765c72b93 100644 --- a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp +++ b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include #include #include @@ -27,6 +28,10 @@ using namespace paddle; // NOLINT DECLARE_int32(gpu_id); DECLARE_bool(thread_local_rand_use_global_seed); +const size_t MAX_SEQ_NUM = 10; +const size_t MAX_SEQ_LEN = 27; +const size_t MAX_BEAM_SIZE = 10; + struct SingleBeamExpansion { vector seqStartPos; vector subSeqStartPos; @@ -34,37 +39,195 @@ struct SingleBeamExpansion { // TODO(caoying): store this into Argument.ids vector selectedIndices; + vector groundTruth; - vector labelSeqStartPos; + vector inBeam; + vector rowIdxInBeam; }; -void genCandidateScores(bool hasSubSeq, - vector& scores, +void genRand(real* numbers, size_t n) { + default_random_engine generator; + uniform_real_distribution distribution(0.0, 1.0); + for (size_t i = 0; i < n; ++i) numbers[i] = distribution(generator); +} + +vector randSampling(real range, int n) { + CHECK_GE(range, n); + vector num(range); + iota(begin(num), end(num), 0.); + if (range == n) return num; + + random_shuffle(begin(num), end(num)); + num.resize(n); + sort(begin(num), end(num)); + return num; +} + +void genCandidateScores(bool hasSubseq, + size_t beamSize, + SingleBeamExpansion& prevBeam, + SingleBeamExpansion& curBeam) { + vector& seqStartPos = curBeam.seqStartPos; + seqStartPos.resize(1, 0); + vector& subSeqStartPos = curBeam.subSeqStartPos; + subSeqStartPos.resize(1, 0); + + srand((size_t)(time(NULL))); + // srand(1); + if (prevBeam.selectedIndices.size()) { + if (prevBeam.subSeqStartPos.size() > 1) { + int seqIdx = 1; + // samples in previous beam are nested sequences. + for (size_t i = 1; i < prevBeam.subSeqStartPos.size(); ++i) { + for (size_t j = 0; j < beamSize; ++j) { + if (prevBeam.selectedIndices[(i - 1) * beamSize + j] == -1.) break; + for (size_t k = 0; k < beamSize; ++k) + subSeqStartPos.push_back(1 + (rand() % MAX_SEQ_LEN) + + subSeqStartPos.back()); + } + if (prevBeam.seqStartPos[seqIdx] == prevBeam.subSeqStartPos[i]) { + seqStartPos.push_back(subSeqStartPos.back()); + seqIdx++; + } + } + } else { + // samples in previous beam are sequences. + for (size_t i = 0; i <= prevBeam.selectedIndices.size(); ++i) { + if (i && i % beamSize == 0) { + seqStartPos.push_back(subSeqStartPos.back()); + if (i == prevBeam.selectedIndices.size()) break; + } + if (prevBeam.selectedIndices[i] == -1.) continue; + subSeqStartPos.push_back(subSeqStartPos.back() + + (1 + (rand() % MAX_SEQ_LEN))); + } + } + } else { + // the first beam expansion + int seqNum = 1 + (rand() % MAX_SEQ_NUM); + for (int i = 0; i < seqNum; ++i) { + if (hasSubseq) { + for (size_t j = 0; j < 1 + (rand() % MAX_SEQ_NUM); ++j) + subSeqStartPos.push_back(subSeqStartPos.back() + + (1 + (rand() % MAX_SEQ_LEN))); + seqStartPos.push_back(subSeqStartPos.back()); + } else { + seqStartPos.push_back(seqStartPos.back() + + (1 + (rand() % MAX_SEQ_LEN))); + } + } + } + + size_t totalSeqNum = hasSubseq ? subSeqStartPos.back() : seqStartPos.back(); + curBeam.candidateScores.resize(totalSeqNum, 0.); + genRand(curBeam.candidateScores.data(), totalSeqNum); +} + +void genSelectedIndices(size_t beamSize, vector& seqStartPos, - vector& subSeqStartPos) {} - -void genSelectedIndicesAndGroundtruth(size_t beamSize, - vector& seqStartPos, - vector& selectedIndices) {} - -SingleBeamExpansion genOneBeam(size_t beamSize, bool hasSubSeq) { - SingleBeamExpansion beam; - genCandidateScores( - hasSubSeq, beam.candidateScores, beam.seqStartPos, beam.subSeqStartPos); - genSelectedIndicesAndGroundtruth( - beamSize, - hasSubSeq ? beam.subSeqStartPos : beam.seqStartPos, - beam.selectedIndices); - return beam; + vector& selectedIndices) { + size_t selectedIdsCount = beamSize * (seqStartPos.size() - 1); + selectedIndices.resize(selectedIdsCount, -1.); + + for (size_t i = 0; i < seqStartPos.size() - 1; ++i) { + int seqLen = seqStartPos[i + 1] - seqStartPos[i]; + int n = min(seqLen, static_cast(beamSize)); + vector ids = randSampling(seqLen, n); + memcpy(selectedIndices.data() + i * beamSize, + ids.data(), + sizeof(real) * ids.size()); + } +} + +void genGroundTruth(vector& beamExpansions, + size_t beamSize) { + size_t seqNum = beamExpansions[1].seqStartPos.size() - 1; + for (size_t i = 2; i < beamExpansions.size(); ++i) + CHECK_EQ(seqNum, beamExpansions[i - 1].seqStartPos.size() - 1); + + // srand(1); + srand((size_t)(time(NULL))); + + // initialize the first beam. + SingleBeamExpansion& beam = beamExpansions[1]; + beam.groundTruth.resize(seqNum, 0); + beam.inBeam.resize(seqNum, 0); + beam.rowIdxInBeam.resize(seqNum, -1); + + auto begPos = beam.selectedIndices.begin(); + for (size_t i = 0; i < seqNum; ++i) { + int seqLen = beam.seqStartPos[i + 1] - beam.seqStartPos[i]; + int label = rand() % seqLen; + auto endPos = begPos + beamSize; + beam.groundTruth[i] = label; + if (find(begPos, endPos, real(label)) != endPos) beam.inBeam[i] = 1; + begPos = endPos; + beam.rowIdxInBeam[i] = i; + } + + // iterate over each beam expansions + for (size_t i = 2; i < beamExpansions.size(); ++i) { + SingleBeamExpansion& curBeam = beamExpansions[i]; + SingleBeamExpansion& prevBeam = beamExpansions[i - 1]; + + curBeam.groundTruth.resize(seqNum, 0); + curBeam.inBeam.resize(seqNum, 0); + curBeam.rowIdxInBeam.resize(seqNum, -1); + + // iterate over each sequence + for (size_t j = 0; j < seqNum; ++j) { + if (prevBeam.inBeam[j]) { + // gold sequence falls in the beam in previous search. + + auto begPos = prevBeam.selectedIndices.begin(); + auto endPos = begPos + prevBeam.rowIdxInBeam[j] * beamSize; + size_t totalExpansion = + prevBeam.rowIdxInBeam[j] * beamSize - count(begPos, endPos, -1.); + curBeam.rowIdxInBeam[j] = totalExpansion + prevBeam.groundTruth[j]; + + CHECK_LE(curBeam.rowIdxInBeam[j] + 1, + curBeam.subSeqStartPos.size() - 1); + int start = curBeam.subSeqStartPos[curBeam.rowIdxInBeam[j]]; + int end = curBeam.subSeqStartPos[curBeam.rowIdxInBeam[j] + 1]; + CHECK_GT(size_t(end), size_t(start)); + int label = rand() % (end - start); + + curBeam.groundTruth[j] = label; + auto findBeg = curBeam.selectedIndices.begin() + + curBeam.rowIdxInBeam[j] * beamSize; + auto findEnd = findBeg + beamSize; + if (find(findBeg, findEnd, real(label)) != findEnd) + curBeam.inBeam[j] = 1; + } else { + // in previous search, gold sequence has fallen off the beam, + // the beam search stops, here use -1 as a dummy label. + // It will not used in calculation the cost. + beamExpansions[i].groundTruth[j] = -1; + } + } + } +} + +void genOneBeam(size_t beamSize, + bool hasSubseq, + SingleBeamExpansion& prevBeam, + SingleBeamExpansion& curBeam) { + genCandidateScores(hasSubseq, beamSize, prevBeam, curBeam); + genSelectedIndices(beamSize, + hasSubseq ? curBeam.subSeqStartPos : curBeam.seqStartPos, + curBeam.selectedIndices); } void genRandomBeamExpansion(size_t expansionCount, size_t beamSize, vector& beamExpansions) { beamExpansions.clear(); - for (size_t i = 0; i < expansionCount; ++i) { - beamExpansions.emplace_back(genOneBeam(beamSize, i)); - } + beamExpansions.resize(expansionCount + 1); + + // beamExpansions[0] is reserved. + for (size_t i = 1; i <= expansionCount; ++i) + genOneBeam(beamSize, bool(i - 1), beamExpansions[i - 1], beamExpansions[i]); + genGroundTruth(beamExpansions, beamSize); } void testCrossEntropyOverBeam(bool useGpu) { @@ -72,12 +235,12 @@ void testCrossEntropyOverBeam(bool useGpu) { config.layerConfig.set_type("cross_entropy_over_beam"); const size_t expansionCount = 3; - const size_t beamSize = 3; + const size_t beamSize = MAX_BEAM_SIZE; vector beams; genRandomBeamExpansion(expansionCount, beamSize, beams); size_t seqNum = 0; - for (size_t i = 0; i < beams.size(); ++i) { + for (size_t i = 1; i < beams.size(); ++i) { const SingleBeamExpansion& beam = beams[i]; // create scores for all the candidates MatrixPtr candidateScorePtr = @@ -88,7 +251,7 @@ void testCrossEntropyOverBeam(bool useGpu) { ostringstream paramName; paramName << "candidate_scores_" << i; - if (beam.subSeqStartPos.size()) { + if (beam.subSeqStartPos.size() > 1) { seqNum = beam.subSeqStartPos.size() - 1; config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, paramName.str(), @@ -118,10 +281,9 @@ void testCrossEntropyOverBeam(bool useGpu) { // create the ground truth paramName.clear(); paramName << "label_" << i; - config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, - paramName.str(), - beam.groundTruth, - beam.labelSeqStartPos}); + config.inputDefs.push_back( + {INPUT_SELF_DEFINE_DATA, paramName.str(), beam.groundTruth}); + config.layerConfig.add_inputs(); } testLayerGrad(