diff --git a/paddle/gserver/layers/DetectionUtil.cpp b/paddle/gserver/layers/DetectionUtil.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3e61adc66e60c54250e4f323452aa13045310879 --- /dev/null +++ b/paddle/gserver/layers/DetectionUtil.cpp @@ -0,0 +1,576 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "DetectionUtil.h" + +namespace paddle { + +size_t appendWithPermute(const Matrix& inMatrix, + size_t height, + size_t width, + size_t outTotalSize, + size_t outOffset, + size_t batchSize, + Matrix& outMatrix, + PermMode permMode) { + CHECK_EQ(inMatrix.useGpu(), outMatrix.useGpu()); + bool useGpu = inMatrix.useGpu(); + if (permMode == kNCHWToNHWC) { + size_t inElementCnt = inMatrix.getElementCnt(); + size_t channels = inElementCnt / (height * width * batchSize); + size_t imgSize = height * width; + for (size_t i = 0; i < batchSize; ++i) { + size_t offset = i * (outTotalSize / batchSize) + outOffset; + const MatrixPtr inTmp = Matrix::create( + const_cast(inMatrix.getData()) + i * channels * imgSize, + channels, + imgSize, + false, + useGpu); + MatrixPtr outTmp = + Matrix::create(const_cast(outMatrix.getData()) + offset, + imgSize, + channels, + false, + useGpu); + inTmp->transpose(outTmp, false); + } + return channels * imgSize; + } else { + LOG(FATAL) << "Unkown permute mode"; + } +} + +size_t decomposeWithPermute(const Matrix& inMatrix, + size_t height, + size_t width, + size_t inTotalSize, + size_t inOffset, + size_t batchSize, + Matrix& outMatrix, + PermMode permMode) { + CHECK_EQ(inMatrix.useGpu(), outMatrix.useGpu()); + bool useGpu = inMatrix.useGpu(); + if (permMode == kNHWCToNCHW) { + size_t outElementCnt = outMatrix.getElementCnt(); + size_t channels = outElementCnt / (height * width * batchSize); + size_t imgSize = height * width; + for (size_t i = 0; i < batchSize; ++i) { + size_t offset = i * (inTotalSize / batchSize) + inOffset; + const MatrixPtr inTmp = + Matrix::create(const_cast(inMatrix.getData()) + offset, + imgSize, + channels, + false, + useGpu); + MatrixPtr outTmp = Matrix::create( + const_cast(outMatrix.getData()) + i * channels * imgSize, + channels, + imgSize, + false, + useGpu); + inTmp->transpose(outTmp, false); + } + return channels * imgSize; + } else { + LOG(FATAL) << "Unkown permute mode"; + } +} + +real jaccardOverlap(const NormalizedBBox& bbox1, const NormalizedBBox& bbox2) { + if (bbox2.xMin > bbox1.xMax || bbox2.xMax < bbox1.xMin || + bbox2.yMin > bbox1.yMax || bbox2.yMax < bbox1.yMin) { + return 0.0; + } else { + real interXMin = std::max(bbox1.xMin, bbox2.xMin); + real interYMin = std::max(bbox1.yMin, bbox2.yMin); + real interXMax = std::min(bbox1.xMax, bbox2.xMax); + real interYMax = std::min(bbox1.yMax, bbox2.yMax); + + real interWidth = interXMax - interXMin; + real interHeight = interYMax - interYMin; + real interArea = interWidth * interHeight; + + real bboxArea1 = bbox1.getArea(); + real bboxArea2 = bbox2.getArea(); + + return interArea / (bboxArea1 + bboxArea2 - interArea); + } +} + +void encodeBBoxWithVar(const NormalizedBBox& priorBBox, + const vector& priorBBoxVar, + const NormalizedBBox& gtBBox, + vector& outVec) { + real priorBBoxWidth = priorBBox.getWidth(); + real priorBBoxHeight = priorBBox.getHeight(); + real priorBBoxCenterX = priorBBox.getCenterX(); + real priorBBoxCenterY = priorBBox.getCenterY(); + + real gtBBoxWidth = gtBBox.getWidth(); + real gtBBoxHeight = gtBBox.getHeight(); + real gtBBoxCenterX = gtBBox.getCenterX(); + real gtBBoxCenterY = gtBBox.getCenterY(); + + outVec.clear(); + outVec.push_back((gtBBoxCenterX - priorBBoxCenterX) / priorBBoxWidth / + priorBBoxVar[0]); + outVec.push_back((gtBBoxCenterY - priorBBoxCenterY) / priorBBoxHeight / + priorBBoxVar[1]); + outVec.push_back(std::log(std::fabs(gtBBoxWidth / priorBBoxWidth)) / + priorBBoxVar[2]); + outVec.push_back(std::log(std::fabs(gtBBoxHeight / priorBBoxHeight)) / + priorBBoxVar[3]); +} + +NormalizedBBox decodeBBoxWithVar(const NormalizedBBox& priorBBox, + const vector& priorBBoxVar, + const vector& locPredData) { + real priorBBoxWidth = priorBBox.getWidth(); + real priorBBoxHeight = priorBBox.getHeight(); + real priorBBoxCenterX = priorBBox.getCenterX(); + real priorBBoxCenterY = priorBBox.getCenterY(); + + real decodedBBoxCenterX = + priorBBoxVar[0] * locPredData[0] * priorBBoxWidth + priorBBoxCenterX; + real decodedBBoxCenterY = + priorBBoxVar[1] * locPredData[1] * priorBBoxHeight + priorBBoxCenterY; + real decodedBBoxWidth = + std::exp(priorBBoxVar[2] * locPredData[2]) * priorBBoxWidth; + real decodedBBoxHeight = + std::exp(priorBBoxVar[3] * locPredData[3]) * priorBBoxHeight; + + NormalizedBBox decodedBBox; + decodedBBox.xMin = decodedBBoxCenterX - decodedBBoxWidth / 2; + decodedBBox.yMin = decodedBBoxCenterY - decodedBBoxHeight / 2; + decodedBBox.xMax = decodedBBoxCenterX + decodedBBoxWidth / 2; + decodedBBox.yMax = decodedBBoxCenterY + decodedBBoxHeight / 2; + + return decodedBBox; +} + +void getBBoxFromPriorData(const real* priorData, + const size_t numBBoxes, + vector& bboxVec) { + size_t outOffset = bboxVec.size(); + bboxVec.resize(bboxVec.size() + numBBoxes); + for (size_t i = 0; i < numBBoxes; ++i) { + NormalizedBBox bbox; + bbox.xMin = *(priorData + i * 8); + bbox.yMin = *(priorData + i * 8 + 1); + bbox.xMax = *(priorData + i * 8 + 2); + bbox.yMax = *(priorData + i * 8 + 3); + bboxVec[outOffset + i] = bbox; + } +} + +void getBBoxVarFromPriorData(const real* priorData, + const size_t num, + vector>& varVec) { + size_t outOffset = varVec.size(); + varVec.resize(varVec.size() + num); + for (size_t i = 0; i < num; ++i) { + vector var; + var.push_back(*(priorData + i * 8 + 4)); + var.push_back(*(priorData + i * 8 + 5)); + var.push_back(*(priorData + i * 8 + 6)); + var.push_back(*(priorData + i * 8 + 7)); + varVec[outOffset + i] = var; + } +} + +void getBBoxFromLabelData(const real* labelData, + const size_t numBBoxes, + vector& bboxVec) { + size_t outOffset = bboxVec.size(); + bboxVec.resize(bboxVec.size() + numBBoxes); + for (size_t i = 0; i < numBBoxes; ++i) { + NormalizedBBox bbox; + bbox.xMin = *(labelData + i * 6 + 1); + bbox.yMin = *(labelData + i * 6 + 2); + bbox.xMax = *(labelData + i * 6 + 3); + bbox.yMax = *(labelData + i * 6 + 4); + real isDifficult = *(labelData + i * 6 + 5); + if (std::abs(isDifficult - 0.0) < 1e-6) + bbox.isDifficult = false; + else + bbox.isDifficult = true; + bboxVec[outOffset + i] = bbox; + } +} + +void getBBoxFromDetectData(const real* detectData, + const size_t numBBoxes, + vector& labelVec, + vector& scoreVec, + vector& bboxVec) { + size_t outOffset = bboxVec.size(); + labelVec.resize(outOffset + numBBoxes); + scoreVec.resize(outOffset + numBBoxes); + bboxVec.resize(outOffset + numBBoxes); + for (size_t i = 0; i < numBBoxes; ++i) { + labelVec[outOffset + i] = *(detectData + i * 7 + 1); + scoreVec[outOffset + i] = *(detectData + i * 7 + 2); + NormalizedBBox bbox; + bbox.xMin = *(detectData + i * 7 + 3); + bbox.yMin = *(detectData + i * 7 + 4); + bbox.xMax = *(detectData + i * 7 + 5); + bbox.yMax = *(detectData + i * 7 + 6); + bboxVec[outOffset + i] = bbox; + } +} + +void matchBBox(const vector& priorBBoxes, + const vector& gtBBoxes, + real overlapThreshold, + vector* matchIndices, + vector* matchOverlaps) { + map> overlaps; + size_t numPriors = priorBBoxes.size(); + size_t numGTs = gtBBoxes.size(); + + matchIndices->clear(); + matchIndices->resize(numPriors, -1); + matchOverlaps->clear(); + matchOverlaps->resize(numPriors, 0.0); + + // Store the positive overlap between predictions and ground truth + for (size_t i = 0; i < numPriors; ++i) { + for (size_t j = 0; j < numGTs; ++j) { + real overlap = jaccardOverlap(priorBBoxes[i], gtBBoxes[j]); + if (overlap > 1e-6) { + (*matchOverlaps)[i] = std::max((*matchOverlaps)[i], overlap); + overlaps[i][j] = overlap; + } + } + } + // Bipartite matching + vector gtPool; + for (size_t i = 0; i < numGTs; ++i) { + gtPool.push_back(i); + } + while (gtPool.size() > 0) { + // Find the most overlapped gt and corresponding predictions + int maxPriorIdx = -1; + int maxGTIdx = -1; + real maxOverlap = -1.0; + for (map>::iterator it = overlaps.begin(); + it != overlaps.end(); + ++it) { + size_t i = it->first; + if ((*matchIndices)[i] != -1) { + // The prediction already has matched ground truth or is ignored + continue; + } + for (size_t p = 0; p < gtPool.size(); ++p) { + int j = gtPool[p]; + if (it->second.find(j) == it->second.end()) { + // No overlap between the i-th prediction and j-th ground truth + continue; + } + // Find the maximum overlapped pair + if (it->second[j] > maxOverlap) { + maxPriorIdx = (int)i; + maxGTIdx = (int)j; + maxOverlap = it->second[j]; + } + } + } + if (maxPriorIdx == -1) { + break; + } else { + (*matchIndices)[maxPriorIdx] = maxGTIdx; + (*matchOverlaps)[maxPriorIdx] = maxOverlap; + gtPool.erase(std::find(gtPool.begin(), gtPool.end(), maxGTIdx)); + } + } + + // Get most overlaped for the rest prediction bboxes + for (map>::iterator it = overlaps.begin(); + it != overlaps.end(); + ++it) { + size_t i = it->first; + if ((*matchIndices)[i] != -1) { + // The prediction already has matched ground truth or is ignored + continue; + } + int maxGTIdx = -1; + real maxOverlap = -1; + for (size_t j = 0; j < numGTs; ++j) { + if (it->second.find(j) == it->second.end()) { + // No overlap between the i-th prediction and j-th ground truth + continue; + } + // Find the maximum overlapped pair + real overlap = it->second[j]; + if (overlap > maxOverlap && overlap >= overlapThreshold) { + maxGTIdx = j; + maxOverlap = overlap; + } + } + if (maxGTIdx != -1) { + (*matchIndices)[i] = maxGTIdx; + (*matchOverlaps)[i] = maxOverlap; + } + } +} + +pair generateMatchIndices( + const Matrix& priorValue, + const size_t numPriorBBoxes, + const Matrix& gtValue, + const int* gtStartPosPtr, + const size_t seqNum, + const vector>& maxConfScore, + const size_t batchSize, + const real overlapThreshold, + const real negOverlapThreshold, + const size_t negPosRatio, + vector>* matchIndicesVecPtr, + vector>* negIndicesVecPtr) { + vector priorBBoxes; // share same prior bboxes + getBBoxFromPriorData(priorValue.getData(), numPriorBBoxes, priorBBoxes); + size_t totalPos = 0; + size_t totalNeg = 0; + for (size_t n = 0; n < batchSize; ++n) { + vector matchIndices; + vector negIndices; + vector matchOverlaps; + matchIndices.resize(numPriorBBoxes, -1); + matchOverlaps.resize(numPriorBBoxes, 0.0); + size_t numGTBBoxes = 0; + if (n < seqNum) numGTBBoxes = gtStartPosPtr[n + 1] - gtStartPosPtr[n]; + if (!numGTBBoxes) { + matchIndicesVecPtr->push_back(matchIndices); + negIndicesVecPtr->push_back(negIndices); + continue; + } + vector gtBBoxes; + getBBoxFromLabelData( + gtValue.getData() + gtStartPosPtr[n] * 6, numGTBBoxes, gtBBoxes); + + matchBBox( + priorBBoxes, gtBBoxes, overlapThreshold, &matchIndices, &matchOverlaps); + + size_t numPos = 0; + size_t numNeg = 0; + for (size_t i = 0; i < matchIndices.size(); ++i) + if (matchIndices[i] != -1) ++numPos; + totalPos += numPos; + vector> scoresIndices; + for (size_t i = 0; i < matchIndices.size(); ++i) + if (matchIndices[i] == -1 && matchOverlaps[i] < negOverlapThreshold) { + scoresIndices.push_back(std::make_pair(maxConfScore[n][i], i)); + ++numNeg; + } + numNeg = std::min(static_cast(numPos * negPosRatio), numNeg); + std::sort(scoresIndices.begin(), + scoresIndices.end(), + sortScorePairDescend); + for (size_t i = 0; i < numNeg; ++i) + negIndices.push_back(scoresIndices[i].second); + totalNeg += numNeg; + matchIndicesVecPtr->push_back(matchIndices); + negIndicesVecPtr->push_back(negIndices); + } + return std::make_pair(totalPos, totalNeg); +} + +void getMaxConfidenceScores(const real* confData, + const size_t batchSize, + const size_t numPriorBBoxes, + const size_t numClasses, + const size_t backgroundId, + vector>* maxConfScoreVecPtr) { + maxConfScoreVecPtr->clear(); + for (size_t i = 0; i < batchSize; ++i) { + vector maxConfScore; + for (size_t j = 0; j < numPriorBBoxes; ++j) { + int offset = j * numClasses; + real maxVal = -FLT_MAX; + real maxPosVal = -FLT_MAX; + real maxScore = 0.0; + for (size_t c = 0; c < numClasses; ++c) { + maxVal = std::max(confData[offset + c], maxVal); + if (c != backgroundId) + maxPosVal = std::max(confData[offset + c], maxPosVal); + } + real sum = 0.0; + for (size_t c = 0; c < numClasses; ++c) + sum += std::exp(confData[offset + c] - maxVal); + maxScore = std::exp(maxPosVal - maxVal) / sum; + maxConfScore.push_back(maxScore); + } + confData += numPriorBBoxes * numClasses; + maxConfScoreVecPtr->push_back(maxConfScore); + } +} + +template +bool sortScorePairDescend(const pair& pair1, + const pair& pair2) { + return pair1.first > pair2.first; +} + +template <> +bool sortScorePairDescend(const pair& pair1, + const pair& pair2) { + return pair1.first > pair2.first; +} + +void applyNMSFast(const vector& bboxes, + const real* confScoreData, + size_t classIdx, + size_t topK, + real confThreshold, + real nmsThreshold, + size_t numPriorBBoxes, + size_t numClasses, + vector* indices) { + vector> scores; + for (size_t i = 0; i < numPriorBBoxes; ++i) { + size_t confOffset = i * numClasses + classIdx; + if (confScoreData[confOffset] > confThreshold) + scores.push_back(std::make_pair(confScoreData[confOffset], i)); + } + std::stable_sort(scores.begin(), scores.end(), sortScorePairDescend); + if (topK > 0 && topK < scores.size()) scores.resize(topK); + while (scores.size() > 0) { + const size_t idx = scores.front().second; + bool keep = true; + for (size_t i = 0; i < indices->size(); ++i) { + if (keep) { + const size_t savedIdx = (*indices)[i]; + real overlap = jaccardOverlap(bboxes[idx], bboxes[savedIdx]); + keep = overlap <= nmsThreshold; + } else { + break; + } + } + if (keep) indices->push_back(idx); + scores.erase(scores.begin()); + } +} + +size_t getDetectionIndices( + const real* confData, + const size_t numPriorBBoxes, + const size_t numClasses, + const size_t backgroundId, + const size_t batchSize, + const size_t confThreshold, + const size_t nmsTopK, + const real nmsThreshold, + const size_t keepTopK, + const vector>& allDecodedBBoxes, + vector>>* allDetectionIndices) { + size_t totalKeepNum = 0; + for (size_t n = 0; n < batchSize; ++n) { + const vector& decodedBBoxes = allDecodedBBoxes[n]; + size_t numDetected = 0; + map> indices; + size_t confOffset = n * numPriorBBoxes * numClasses; + for (size_t c = 0; c < numClasses; ++c) { + if (c == backgroundId) continue; + applyNMSFast(decodedBBoxes, + confData + confOffset, + c, + nmsTopK, + confThreshold, + nmsThreshold, + numPriorBBoxes, + numClasses, + &(indices[c])); + numDetected += indices[c].size(); + } + if (keepTopK > 0 && numDetected > keepTopK) { + vector>> scoreIndexPairs; + for (size_t c = 0; c < numClasses; ++c) { + const vector& labelIndices = indices[c]; + for (size_t i = 0; i < labelIndices.size(); ++i) { + size_t idx = labelIndices[i]; + scoreIndexPairs.push_back( + std::make_pair((confData + confOffset)[idx * numClasses + c], + std::make_pair(c, idx))); + } + } + std::sort(scoreIndexPairs.begin(), + scoreIndexPairs.end(), + sortScorePairDescend>); + scoreIndexPairs.resize(keepTopK); + map> newIndices; + for (size_t i = 0; i < scoreIndexPairs.size(); ++i) { + size_t label = scoreIndexPairs[i].second.first; + size_t idx = scoreIndexPairs[i].second.second; + newIndices[label].push_back(idx); + } + allDetectionIndices->push_back(newIndices); + totalKeepNum += keepTopK; + } else { + allDetectionIndices->push_back(indices); + totalKeepNum += numDetected; + } + } + return totalKeepNum; +} + +void getDetectionOutput(const real* confData, + const size_t numKept, + const size_t numPriorBBoxes, + const size_t numClasses, + const size_t batchSize, + const vector>>& allIndices, + const vector>& allDecodedBBoxes, + Matrix& out) { + MatrixPtr outBuffer; + Matrix::resizeOrCreate(outBuffer, numKept, 7, false, false); + real* bufferData = outBuffer->getData(); + size_t count = 0; + for (size_t n = 0; n < batchSize; ++n) { + for (map>::const_iterator it = allIndices[n].begin(); + it != allIndices[n].end(); + ++it) { + size_t label = it->first; + const vector& indices = it->second; + const vector& decodedBBoxes = allDecodedBBoxes[n]; + for (size_t i = 0; i < indices.size(); ++i) { + size_t idx = indices[i]; + size_t confOffset = n * numPriorBBoxes * numClasses + idx * numClasses; + bufferData[count * 7] = n; + bufferData[count * 7 + 1] = label; + bufferData[count * 7 + 2] = (confData + confOffset)[label]; + NormalizedBBox clippedBBox = clipBBox(decodedBBoxes[idx]); + bufferData[count * 7 + 3] = clippedBBox.xMin; + bufferData[count * 7 + 4] = clippedBBox.yMin; + bufferData[count * 7 + 5] = clippedBBox.xMax; + bufferData[count * 7 + 6] = clippedBBox.yMax; + ++count; + } + } + } + out.copyFrom(bufferData, numKept * 7); +} + +NormalizedBBox clipBBox(const NormalizedBBox& bbox) { + real realOne = static_cast(1.0); + real realZero = static_cast(0.0); + NormalizedBBox clippedBBox; + clippedBBox.xMin = std::max(std::min(bbox.xMin, realOne), realZero); + clippedBBox.yMin = std::max(std::min(bbox.yMin, realOne), realZero); + clippedBBox.xMax = std::max(std::min(bbox.xMax, realOne), realZero); + clippedBBox.yMax = std::max(std::min(bbox.yMax, realOne), realZero); + return clippedBBox; +} + +} // namespace paddle diff --git a/paddle/gserver/layers/DetectionUtil.h b/paddle/gserver/layers/DetectionUtil.h new file mode 100644 index 0000000000000000000000000000000000000000..fe4f9f075e4cf011c97f68f49598a828d62327b3 --- /dev/null +++ b/paddle/gserver/layers/DetectionUtil.h @@ -0,0 +1,307 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include "paddle/math/Matrix.h" + +using std::vector; +using std::pair; +using std::map; + +namespace paddle { + +template +struct BBoxBase { + BBoxBase(T xMin, T yMin, T xMax, T yMax) + : xMin(xMin), yMin(yMin), xMax(xMax), yMax(yMax), isDifficult(false) {} + + BBoxBase() {} + + T getWidth() const { return xMax - xMin; } + + T getHeight() const { return yMax - yMin; } + + T getCenterX() const { return (xMin + xMax) / 2; } + + T getCenterY() const { return (yMin + yMax) / 2; } + + T getArea() const { return getWidth() * getHeight(); } + + // coordinate of bounding box + T xMin; + T yMin; + T xMax; + T yMax; + // whether difficult object (e.g. object with heavy occlusion is difficult) + bool isDifficult; +}; + +struct NormalizedBBox : BBoxBase { + NormalizedBBox() : BBoxBase() {} +}; + +enum PermMode { kNCHWToNHWC, kNHWCToNCHW }; + +/** + * @brief First permute input maxtrix then append to output matrix + */ +size_t appendWithPermute(const Matrix& inMatrix, + size_t height, + size_t width, + size_t outTotalSize, + size_t outOffset, + size_t batchSize, + Matrix& outMatrix, + PermMode permMode); + +/** + * @brief First permute input maxtrix then decompose to output + */ +size_t decomposeWithPermute(const Matrix& inMatrix, + size_t height, + size_t width, + size_t totalSize, + size_t offset, + size_t batchSize, + Matrix& outMatrix, + PermMode permMode); + +/** + * @brief Compute jaccard overlap between two bboxes. + * @param bbox1 The first bbox + * @param bbox2 The second bbox + */ +real jaccardOverlap(const NormalizedBBox& bbox1, const NormalizedBBox& bbox2); + +/** + * @brief Compute offset parameters between prior bbox and ground truth bbox + * and variances of prior bbox are considered + * @param priorBBox Input prior bbox + * @param priorBBoxVar Variance parameters of prior bbox + * @param gtBBox Groundtruth bbox + * @param outVec Output vector + */ +void encodeBBoxWithVar(const NormalizedBBox& priorBBox, + const vector& priorBBoxVar, + const NormalizedBBox& gtBBox, + vector& outVec); + +/** + * @brief Decode prior bbox with offset parameters + * and variances of prior bbox are considered + * @param priorBBox Prior bbox to be decoded + * @param priorBBoxVar Variance parameters of prior bbox + * @param locPredData Offset parameters + */ +NormalizedBBox decodeBBoxWithVar(const NormalizedBBox& priorBBox, + const vector& priorBBoxVar, + const vector& locPredData); + +/** + * @brief Extract bboxes from prior matrix, the layout is + * xmin1 | ymin1 | xmax1 | ymax1 | xmin1Var | ymin1Var | xmax1Var | ymax1Var ... + * @param priorData Matrix of prior value + * @param numBBoxes Number of bbox to be extracted + * @param bboxVec Append to the vector + */ +void getBBoxFromPriorData(const real* priorData, + const size_t numBBoxes, + vector& bboxVec); + +/** + * @brief Extract labels, scores and bboxes from detection matrix, the layout is + * imageId | label | score | xmin | ymin | xmax | ymax + * @param detectData Matrix of detection value + * @param numBBoxes Number of bbox to be extracted + * @param labelVec Label of bbox + * @param scoreVec Score of bbox + * @param bboxVec Append to the vector + */ +void getBBoxFromDetectData(const real* detectData, + const size_t numBBoxes, + vector& labelVec, + vector& scoreVec, + vector& bboxVec); + +/** + * @brief Extract variances from prior matrix, the layout is + * xmin1 | ymin1 | xmax1 | ymax1 | xmin1Var | ymin1Var | xmax1Var | ymax1Var ... + * @param priorData Matrix of prior value + * @param num Number to be extracted + * @param varVec Append to the vector + */ +void getBBoxVarFromPriorData(const real* priorData, + const size_t num, + vector>& varVec); + +/** + * @brief Extract bboxes from label matrix, the layout is + * class1_1 | xmin1_1 | ymin1_1 | xmax1_1 | ymax1_1 | difficult1_1 | ... + * @param labelData Matrix of label value + * @param numBBoxes Number to be extracted + * @param bboxVec Append to the vector + */ +void getBBoxFromLabelData(const real* labelData, + const size_t numBBoxes, + vector& bboxVec); + +/** +* @brief Match prior bbox to groundtruth bbox, the strategy is: +1. Find the most overlaped bbox pair (prior and groundtruth) +2. For rest of prior bboxes find the most overlaped groundtruth bbox +* @param priorBBoxes prior bbox +* @param gtBBoxes groundtruth bbox +* @param overlapThreshold Low boundary of overlap (judge whether matched) +* @param matchIndices For each prior bbox, groundtruth bbox index if matched +otherwise -1 +* @param matchOverlaps For each prior bbox, overap with all groundtruth bboxes +*/ +void matchBBox(const vector& priorBBoxes, + const vector& gtBBoxes, + real overlapThreshold, + vector* matchIndices, + vector* matchOverlaps); + +/** +* @brief Generate positive bboxes and negative bboxes, +|positive bboxes|/|negative bboxes| is negPosRatio +* @param priorValue Prior value +* @param numPriorBBoxes Number of prior bbox +* @param gtValue Groundtruth value +* @param gtStartPosPtr Since groundtruth value stored as sequence type, +this parameter indicates start position of each record +* @param seqNum Number of sequence +* @param maxConfScore Classification score for prior bbox, used to mine +negative examples +* @param batchSize Image number +* @param overlapThreshold Low boundary of overap +* @param negOverlapThreshold Upper boundary of overap (judge negative example) +* @param negPosRatio Control number of negative bboxes +* @param matchIndicesVecPtr Save indices of matched prior bbox +* @param negIndicesVecPtr Save indices of negative prior bbox +*/ +pair generateMatchIndices( + const Matrix& priorValue, + const size_t numPriorBBoxes, + const Matrix& gtValue, + const int* gtStartPosPtr, + const size_t seqNum, + const vector>& maxConfScore, + const size_t batchSize, + const real overlapThreshold, + const real negOverlapThreshold, + const size_t negPosRatio, + vector>* matchIndicesVecPtr, + vector>* negIndicesVecPtr); + +/** + * @brief Get max confidence score for each prior bbox + * @param confData Confidence scores, layout is + * class1 score | class2 score | ... | classN score ... + * @param batchSize Image number + * @param numPriorBBoxes Prior bbox number + * @param numClasses Classes number + * @param backgroundId Background id + * @param maxConfScoreVecPtr Ouput + */ +void getMaxConfidenceScores(const real* confData, + const size_t batchSize, + const size_t numPriorBBoxes, + const size_t numClasses, + const size_t backgroundId, + vector>* maxConfScoreVecPtr); + +template +bool sortScorePairDescend(const pair& pair1, + const pair& pair2); + +template <> +bool sortScorePairDescend(const pair& pair1, + const pair& pair2); + +/** + * @brief Do NMS for bboxes to remove duplicated bboxes + * @param bboxes BBoxes to apply NMS + * @param confScoreData Confidence scores + * @param classIdx Class to do NMS + * @param topK Number to keep + * @param confThreshold Low boundary of confidence score + * @param nmsThreshold Threshold of overlap + * @param numPriorBBoxes Total number of prior bboxes + * @param numClasses Total class number + * @param indices Indices of high quality bboxes + */ +void applyNMSFast(const vector& bboxes, + const real* confScoreData, + size_t classIdx, + size_t topK, + real confThreshold, + real nmsThreshold, + size_t numPriorBBoxes, + size_t numClasses, + vector* indices); + +/** + * @brief Get detection results which satify requirements + * @param numPriorBBoxes Prior bbox number + * @param numClasses Class number + * @param backgroundId Background class + * @param batchSize Image number + * @param confThreshold Threshold of class confidence + * @param nmsTopK Used in NMS operation to keep top k bbox + * @param nmsThreshold Used in NMS, threshold of overlap + * @param keepTopK How many bboxes keeped in an image + * @param allDecodedBBoxes Decoded bboxes for all images + * @param allDetectionIndices Save detection bbox indices + */ +size_t getDetectionIndices( + const real* confData, + const size_t numPriorBBoxes, + const size_t numClasses, + const size_t backgroundId, + const size_t batchSize, + const size_t confThreshold, + const size_t nmsTopK, + const real nmsThreshold, + const size_t keepTopK, + const vector>& allDecodedBBoxes, + vector>>* allDetectionIndices); + +/** + * @brief Get detection results + * @param confData Confidence scores + * @param numPriorBBoxes Prior bbox number + * @param numClasses Class number + * @param batchSize Image number + * @param allIndices Indices of predicted bboxes + * @param allDecodedBBoxes BBoxes decoded + * @param out Output matrix + * image number | label | confidence score | xMin | yMin | xMax | yMax + */ +void getDetectionOutput(const real* confData, + const size_t numKept, + const size_t numPriorBBoxes, + const size_t numClasses, + const size_t batchSize, + const vector>>& allIndices, + const vector>& allDecodedBBoxes, + Matrix& out); + +NormalizedBBox clipBBox(const NormalizedBBox& bbox); + +} // namespace paddle