diff --git a/doc/release_notes.md b/doc/release_notes.md index 75f1d98d8958353b1b0a1ed2850f6721afdd9cf8..2ebae72b2faff4741daea875b5a62990deba01d2 100644 --- a/doc/release_notes.md +++ b/doc/release_notes.md @@ -370,13 +370,14 @@ OpenPose Library - Release Notes 1. Highly improved 3D triangulation for >3 cameras by fixing some small bugs. 2. Added community-based support for Nvidia NVCaffe. 3. Increased accuracy very lightly for CUDA version (about 0.01%) by adapting the threshold in `process()` in `bodyPartConnectorBase.cu` to `defaultNmsThreshold`. This also removes any posibility of future bugs in that function for using a default NMS threshold higher than 0.15 (which was the hard-coded value used previously). - 4. Increased mAP but reduced mAR (both about 0.01%) as well as reduction of false positives. Step 1: removed legs where only knee/ankle/feet are found. Step 2: If no people is found in an image, `removePeopleBelowThresholds` is re-run with `maximizePositives = true`. + 4. Increased mAP but reduced mAR (both about 0.01%) as well as reduction of false positives. Step 1: removed legs where only knee/ankle/feet are found. Step 2: If no people is found in an image, `removePeopleBelowThresholdsAndFillFaces` is re-run with `maximizePositives = true`. 5. Number of maximum people is not limited by the maximum number of max peaks anymore. However, the number of body part candidates for a specific keypoint (e.g., nose) is still limited to the number of max peaks. + 6. Added more checks during destructors of CUDA-related functions and safer CUDA frees. 2. Functions or parameters renamed: 1. `--3d_min_views` default value (-1) no longer means that all camera views are required. Instead, it will be equal to max(2, min(4, #cameras-1)). This should provide a good trade-off between recall and precission. 3. Main bugs fixed: 1. Windows: Added back support for OpenGL and Spinnaker, as well as DLLs for debug compilation. - 2. `06_face_from_image.cpp` and `07_hand_from_image.cpp` working again, they stopped working in version 1.5.0 with the GPU image resize for the GUI. + 2. `06_face_from_image.cpp`, `07_hand_from_image.cpp`, and `09_keypoints_from_heatmaps` working again, they stopped working in version 1.5.0 with the GPU image resize for the GUI. 4. Changes/additions that affect the compatibility with the OpenPose Unity Plugin: diff --git a/src/openpose/core/cvMatToOpInput.cpp b/src/openpose/core/cvMatToOpInput.cpp index 308d77355aa8212afcad0df6f8828b85d659a502..846d36c3f180247295471b46195da5dcfb926cbb 100644 --- a/src/openpose/core/cvMatToOpInput.cpp +++ b/src/openpose/core/cvMatToOpInput.cpp @@ -34,9 +34,23 @@ namespace op if (mGpuResize) { // Free temporary memory - cudaFree(pInputImageCuda); - cudaFree(pOutputImageCuda); - cudaFree(pInputImageReorderedCuda); + cudaCheck(__LINE__, __FUNCTION__, __FILE__); + if (pInputImageCuda != nullptr) + { + cudaFree(pInputImageCuda); + pInputImageCuda = nullptr; + } + if (pOutputImageCuda != nullptr) + { + cudaFree(pOutputImageCuda); + pOutputImageCuda = nullptr; + } + if (pInputImageReorderedCuda != nullptr) + { + cudaFree(pInputImageReorderedCuda); + pInputImageReorderedCuda = nullptr; + } + cudaCheck(__LINE__, __FUNCTION__, __FILE__); } #endif } diff --git a/src/openpose/core/cvMatToOpOutput.cpp b/src/openpose/core/cvMatToOpOutput.cpp index 8200aee6502571892b2cb42fcc8a43915809769e..d2fb7de68e5f17e4c113889d895a1840dc7803e3 100644 --- a/src/openpose/core/cvMatToOpOutput.cpp +++ b/src/openpose/core/cvMatToOpOutput.cpp @@ -38,8 +38,18 @@ namespace op if (mGpuResize) { // Free temporary memory - cudaFree(pInputImageCuda); - cudaFree(*spOutputImageCuda); + cudaCheck(__LINE__, __FUNCTION__, __FILE__); + if (pInputImageCuda != nullptr) + { + cudaFree(pInputImageCuda); + pInputImageCuda = nullptr; + } + if (*spOutputImageCuda != nullptr) + { + cudaFree(*spOutputImageCuda); + *spOutputImageCuda = nullptr; + } + cudaCheck(__LINE__, __FUNCTION__, __FILE__); } #endif } diff --git a/src/openpose/core/gpuRenderer.cpp b/src/openpose/core/gpuRenderer.cpp index ec79a0631dbb50680191be046565f62ac66c25c7..b79ccd5115cabc1817421e925942f762ec68d8e4 100644 --- a/src/openpose/core/gpuRenderer.cpp +++ b/src/openpose/core/gpuRenderer.cpp @@ -46,7 +46,10 @@ namespace op { #ifdef USE_CUDA if (mIsLastRenderer && spGpuMemory != nullptr) + { cudaFree(*spGpuMemory); + *spGpuMemory = nullptr; + } #endif } catch (const std::exception& e) diff --git a/src/openpose/core/opOutputToCvMat.cpp b/src/openpose/core/opOutputToCvMat.cpp index ed2d92d2a9c2058131272c146d593fe4c1b7c3e3..b351c68953215e583acb4abf7d9bbbea846ba659 100644 --- a/src/openpose/core/opOutputToCvMat.cpp +++ b/src/openpose/core/opOutputToCvMat.cpp @@ -37,8 +37,18 @@ namespace op if (mGpuResize) { // Free temporary memory - cudaFree(*spOutputImageFloatCuda); - cudaFree(pOutputImageUCharCuda); + cudaCheck(__LINE__, __FUNCTION__, __FILE__); + if (*spOutputImageFloatCuda != nullptr) + { + cudaFree(*spOutputImageFloatCuda); + *spOutputImageFloatCuda = nullptr; + } + if (pOutputImageUCharCuda != nullptr) + { + cudaFree(pOutputImageUCharCuda); + pOutputImageUCharCuda = nullptr; + } + cudaCheck(__LINE__, __FUNCTION__, __FILE__); } #endif } diff --git a/src/openpose/face/faceGpuRenderer.cpp b/src/openpose/face/faceGpuRenderer.cpp index e0bf4d38308ba7003c2887283f718fd20345d645..048604e5f44f78a89b7c3dd66618c2014c6a8e4e 100644 --- a/src/openpose/face/faceGpuRenderer.cpp +++ b/src/openpose/face/faceGpuRenderer.cpp @@ -24,14 +24,28 @@ namespace op { // Free CUDA pointers - Note that if pointers are 0 (i.e., nullptr), no operation is performed. #ifdef USE_CUDA + cudaCheck(__LINE__, __FUNCTION__, __FILE__); if (pGpuFace != nullptr) + { cudaFree(pGpuFace); + pGpuFace = nullptr; + } if (pMaxPtr != nullptr) + { cudaFree(pMaxPtr); + pMaxPtr = nullptr; + } if (pMinPtr != nullptr) + { cudaFree(pMinPtr); + pMinPtr = nullptr; + } if (pScalePtr != nullptr) + { cudaFree(pScalePtr); + pScalePtr = nullptr; + } + cudaCheck(__LINE__, __FUNCTION__, __FILE__); #endif } catch (const std::exception& e) diff --git a/src/openpose/hand/handGpuRenderer.cpp b/src/openpose/hand/handGpuRenderer.cpp index f76cd107a6a7d140ddc67faa74cdd8f3eb652edf..f455fbcf0a7c137d0fbdf18c8fef2724e13e4881 100644 --- a/src/openpose/hand/handGpuRenderer.cpp +++ b/src/openpose/hand/handGpuRenderer.cpp @@ -24,14 +24,28 @@ namespace op { // Free CUDA pointers - Note that if pointers are 0 (i.e., nullptr), no operation is performed. #ifdef USE_CUDA + cudaCheck(__LINE__, __FUNCTION__, __FILE__); if (pGpuHand != nullptr) + { cudaFree(pGpuHand); + pGpuHand = nullptr; + } if (pMaxPtr != nullptr) + { cudaFree(pMaxPtr); + pMaxPtr = nullptr; + } if (pMinPtr != nullptr) + { cudaFree(pMinPtr); + pMinPtr = nullptr; + } if (pScalePtr != nullptr) + { cudaFree(pScalePtr); + pScalePtr = nullptr; + } + cudaCheck(__LINE__, __FUNCTION__, __FILE__); #endif } catch (const std::exception& e) diff --git a/src/openpose/net/bodyPartConnectorBase.cpp b/src/openpose/net/bodyPartConnectorBase.cpp index 491b5b31d03b34dc58afe4394ea851b4a28fbbc3..7b236120d81f71e69c1370b7a531e5d1309fd2aa 100644 --- a/src/openpose/net/bodyPartConnectorBase.cpp +++ b/src/openpose/net/bodyPartConnectorBase.cpp @@ -61,14 +61,14 @@ namespace op template void getKeypointCounter( int& personCounter, const std::vector, T>>& peopleVector, - const unsigned int index, const int indexFirst, const int indexLast, const int minimum) + const unsigned int part, const int partFirst, const int partLast, const int minimum) { try { // Count keypoints auto keypointCounter = 0; - for (auto i = indexFirst ; i < indexLast ; i++) - keypointCounter += (peopleVector[index].first.at(i) > 0); + for (auto i = partFirst ; i < partLast ; i++) + keypointCounter += (peopleVector[part].first.at(i) > 0); // If enough keypoints --> subtract them and keep them at least as big as minimum if (keypointCounter > minimum) personCounter += minimum-keypointCounter; // personCounter = non-considered keypoints + minimum @@ -79,6 +79,53 @@ namespace op } } + template + void getRoiDiameterAndBounds( + Rectangle& roi, int& diameter, int& partFirstNon0, int& partLastNon0, + const std::vector& personVector, const T* const peaksPtr, + const int partInit, const int partEnd) + { + try + { + // Find ROI, partFirstNon0, and partLastNon0 + roi = Rectangle{0,0,0,0}; + partFirstNon0 = -1; + partLastNon0 = -1; + for (auto part = partInit ; part < partEnd ; part++) + { + const auto x = peaksPtr[personVector[part]-2]; + const auto y = peaksPtr[personVector[part]-1]; + const auto score = peaksPtr[personVector[part]]; + if (score > 0) + { + // ROI + if (roi.x > x) + roi.x = x; + if (roi.y > y) + roi.y = y; + if (roi.width < x) + roi.width = x; + if (roi.height > y) + roi.height = y; + // First keypoint? + if (partFirstNon0 < 0) + partFirstNon0 = part; + // Last keypoint? + partLastNon0 = part; + } + } + // From [p1, p2] to [p1, width, height] + roi.width -= roi.x; + roi.height -= roi.y; + // diameter + diameter = fastMax(roi.width, roi.height); + } + catch (const std::exception& e) + { + error(e.what(), __LINE__, __FUNCTION__, __FILE__); + } + } + template std::vector, T>> createPeopleVector( const T* const heatMapPtr, const T* const peaksPtr, const PoseModel poseModel, const Point& heatMapSize, @@ -643,32 +690,6 @@ namespace op } } - template - void getRoiDiameterAndBounds( - Rectangle& roi, int& diameter, int& indexFirstNon0, int& indexLastNon0, - const std::vector& personVector, const T* const peaksPtr, - const int indexInit, const int indexEnd) - { - try - { - roi = Rectangle{0,0,0,0}; - for (auto index = 0u ; index < personVector.size()-1 ; index++) - { - const auto x = peaksPtr[personVector[index]-2]; - const auto y = peaksPtr[personVector[index]-1]; - const auto score = peaksPtr[personVector[index]]; - if (roi.x > x) - roi.x = x; - if (roi.y > y) - roi.y = y; - } - } - catch (const std::exception& e) - { - error(e.what(), __LINE__, __FUNCTION__, __FILE__); - } - } - template void removePeopleBelowThresholdsAndFillFaces( std::vector& validSubsetIndexes, int& numberPeople, @@ -693,28 +714,28 @@ namespace op std::vector faceInvalidSubsetIndexes; faceInvalidSubsetIndexes.reserve(peopleVector.size()); // For each person candidate - for (auto index = 0u ; index < peopleVector.size() ; index++) + for (auto person = 0u ; person < peopleVector.size() ; person++) { - auto personCounter = peopleVector[index].first.back(); + auto personCounter = peopleVector[person].first.back(); // Analog for hand/face keypoints if (numberBodyParts >= 135) { // No consider face keypoints for personCounter const auto currentCounter = personCounter; - getKeypointCounter(personCounter, peopleVector, index, 65, 135, 1); + getKeypointCounter(personCounter, peopleVector, person, 65, 135, 1); const auto newCounter = personCounter; - if (personCounter == 0) + if (personCounter == 1) { - faceInvalidSubsetIndexes.emplace_back(index); + faceInvalidSubsetIndexes.emplace_back(person); continue; } // If body is still valid and facial points were removed, then add to valid faces else if (currentCounter != newCounter) - faceValidSubsetIndexes.emplace_back(index); + faceValidSubsetIndexes.emplace_back(person); // No consider right hand keypoints for personCounter - getKeypointCounter(personCounter, peopleVector, index, 45, 65, 1); + getKeypointCounter(personCounter, peopleVector, person, 45, 65, 1); // No consider left hand keypoints for personCounter - getKeypointCounter(personCounter, peopleVector, index, 25, 45, 1); + getKeypointCounter(personCounter, peopleVector, person, 25, 45, 1); } // Foot keypoints do not affect personCounter (too many false positives, // same foot usually appears as both left and right keypoints) @@ -724,7 +745,7 @@ namespace op if (!maximizePositives && (numberBodyParts == 25 || numberBodyParts > 70)) { const auto currentCounter = personCounter; - getKeypointCounter(personCounter, peopleVector, index, 19, 25, 0); + getKeypointCounter(personCounter, peopleVector, person, 19, 25, 0); const auto newCounter = personCounter; // Problem: Same leg/foot keypoints are considered for both left and right keypoints. // Solution: Remove legs that are duplicated and that do not have upper torso @@ -733,11 +754,11 @@ namespace op continue; } // Add only valid people - const auto personScore = peopleVector[index].second; + const auto personScore = peopleVector[person].second; if (personCounter >= minSubsetCnt && (personScore/personCounter) >= minSubsetScore) { numberPeople++; - validSubsetIndexes.emplace_back(index); + validSubsetIndexes.emplace_back(person); // // This is not required, it is OK if there are more people. No more GPU memory used. // if (numberPeople == maxPeaks) // break; @@ -747,25 +768,6 @@ namespace op error("Bad personCounter (" + std::to_string(personCounter) + "). Bug in this" " function if this happens.", __LINE__, __FUNCTION__, __FILE__); } -// // Random standalone facial keypoints --> Merge into a more complete face -// if (numberPeople > 0 && faceInvalidSubsetIndexes.size() > 0) -// { -// for (auto faceId = 0u ; faceId < faceInvalidSubsetIndexes.size() ; faceId++) -// { -// // Get ROI -// Rectangle roi; -// int diameter; -// int indexFirstNon0; -// int indexLastNon0; -// const auto index = faceValidSubsetIndexes[faceId]; -// getRoiDiameterAndBounds( -// roi, diameter, indexFirstNon0, indexLastNon0, peopleVector[index].first, peaksPtr, 65, 135); -// // const auto personCounter = peopleVector[index].first.back(); -// // const auto x = peaksPtr[peopleVector[index].first[part]-2]; -// // const auto y = peaksPtr[peopleVector[index].first[part]-1]; -// // const auto score = peaksPtr[peopleVector[index].first[part]]; -// } -// } // If no people found --> Repeat with maximizePositives = true // Result: Increased COCO mAP because we catch more foot-only images if (numberPeople == 0 && !maximizePositives) diff --git a/src/openpose/net/bodyPartConnectorCaffe.cpp b/src/openpose/net/bodyPartConnectorCaffe.cpp index 5eb2abb1cd8b4d17aefa9e5e9369d795fbaf3c23..1dcc57f854d72e620eb9e3973d1ccf18a2f92d78 100644 --- a/src/openpose/net/bodyPartConnectorCaffe.cpp +++ b/src/openpose/net/bodyPartConnectorCaffe.cpp @@ -3,6 +3,7 @@ #endif #ifdef USE_CUDA #include + #include #endif #ifdef USE_OPENCL #include @@ -41,14 +42,28 @@ namespace op try { #if defined USE_CAFFE && defined USE_CUDA - cudaFree(pBodyPartPairsGpuPtr); - cudaFree(pMapIdxGpuPtr); - cudaFree(pFinalOutputGpuPtr); + cudaCheck(__LINE__, __FUNCTION__, __FILE__); + if (pBodyPartPairsGpuPtr != nullptr) + { + cudaFree(pBodyPartPairsGpuPtr); + pBodyPartPairsGpuPtr = nullptr; + } + if (pMapIdxGpuPtr != nullptr) + { + cudaFree(pMapIdxGpuPtr); + pMapIdxGpuPtr = nullptr; + } + if (pFinalOutputGpuPtr != nullptr) + { + cudaFree(pFinalOutputGpuPtr); + pFinalOutputGpuPtr = nullptr; + } + cudaCheck(__LINE__, __FUNCTION__, __FILE__); #endif } catch (const std::exception& e) { - error(e.what(), __LINE__, __FUNCTION__, __FILE__); + errorDestructor(e.what(), __LINE__, __FUNCTION__, __FILE__); } } diff --git a/src/openpose/net/nmsCaffe.cpp b/src/openpose/net/nmsCaffe.cpp index 13c3944869a360ff870da69bfb041333c4f61451..bc3ebd04c1b004e728daffc82380eeca90a6fc34 100644 --- a/src/openpose/net/nmsCaffe.cpp +++ b/src/openpose/net/nmsCaffe.cpp @@ -45,9 +45,9 @@ namespace op #if defined USE_CAFFE && defined USE_OPENCL try { - if(mKernelGpuPtr != nullptr) + if (mKernelGpuPtr != nullptr) clReleaseMemObject((cl_mem)mKernelGpuPtr); - if(mKernelCpuPtr != nullptr) + if (mKernelCpuPtr != nullptr) delete mKernelCpuPtr; } catch (const std::exception& e) diff --git a/src/openpose/net/resizeAndMergeBase.cu b/src/openpose/net/resizeAndMergeBase.cu index 8def34ed64f0698891138e0b091420ab39bb1da9..ac76f0ba18d0b60b5ffdd89562cb93b2e8e32038 100644 --- a/src/openpose/net/resizeAndMergeBase.cu +++ b/src/openpose/net/resizeAndMergeBase.cu @@ -4,8 +4,18 @@ namespace op { + const auto THREADS_PER_BLOCK = 256u; const auto THREADS_PER_BLOCK_1D = 16u; + template + __global__ void fillKernel( + T* targetPtr, const T* const sourcePtr, const int N) + { + const auto x = (blockIdx.x * blockDim.x) + threadIdx.x; + if (x < N) + targetPtr[x] = sourcePtr[x]; + } + // template // __global__ void resizeKernelOld( // T* targetPtr, const T* const sourcePtr, const int widthSource, const int heightSource, const int widthTarget, @@ -329,18 +339,29 @@ namespace op // Optimized function for 8x resize // OP_CUDA_PROFILE_INIT(REPS); - if (widthTarget / widthSource != 8 || heightTarget / heightSource != 8) - error("Kernel only implemented for 8x resize. Notify us if this error appears.", - __LINE__, __FUNCTION__, __FILE__); - const auto rescaleFactor = (unsigned int) std::ceil(heightTarget / (float)(heightSource)); - const dim3 threadsPerBlock{rescaleFactor, rescaleFactor, 1}; - const dim3 numBlocks{ - getNumberCudaBlocks(widthTarget, threadsPerBlock.x), - getNumberCudaBlocks(heightTarget, threadsPerBlock.y), - getNumberCudaBlocks(num * channels, threadsPerBlock.z)}; - resize8TimesKernel<<>>( - targetPtr, sourcePtrs.at(0), widthSource, heightSource, widthTarget, heightTarget, - rescaleFactor); + if (widthTarget / widthSource == 1 && heightTarget / heightSource == 1) + { + const auto N = widthTarget * heightTarget * num * channels; + const dim3 threadsPerBlock{THREADS_PER_BLOCK}; + const dim3 numBlocks{getNumberCudaBlocks(N, threadsPerBlock.x)}; + fillKernel<<>>( + targetPtr, sourcePtrs.at(0), N); + } + else + { + if (widthTarget / widthSource != 8 || heightTarget / heightSource != 8) + error("Kernel only implemented for 8x resize. Notify us if this error appears.", + __LINE__, __FUNCTION__, __FILE__); + const auto rescaleFactor = (unsigned int) std::ceil(heightTarget / (float)(heightSource)); + const dim3 threadsPerBlock{rescaleFactor, rescaleFactor, 1}; + const dim3 numBlocks{ + getNumberCudaBlocks(widthTarget, threadsPerBlock.x), + getNumberCudaBlocks(heightTarget, threadsPerBlock.y), + getNumberCudaBlocks(num * channels, threadsPerBlock.z)}; + resize8TimesKernel<<>>( + targetPtr, sourcePtrs.at(0), widthSource, heightSource, widthTarget, heightTarget, + rescaleFactor); + } // OP_CUDA_PROFILE_END(timeNormalize3, 1e3, REPS); // // Profiling code @@ -482,10 +503,14 @@ namespace op widthTarget, heightTarget, sourcePtrs[0], sourcePtrs[1], sourcePtrs[2], sourcePtrs[3], sourcePtrs[4], sourcePtrs[5], sourcePtrs[6], sourcePtrs[7]); // Free memory - cudaFree(widthSources); - cudaFree(heightSources); - cudaFree(scaleWidths); - cudaFree(scaleHeights); + if (widthSources != nullptr) + cudaFree(widthSources); + if (heightSources != nullptr) + cudaFree(heightSources); + if (scaleWidths != nullptr) + cudaFree(scaleWidths); + if (scaleHeights != nullptr) + cudaFree(scaleHeights); // OP_CUDA_PROFILE_END(timeNormalize3, 1e3, REPS); // // Profiling code diff --git a/src/openpose/pose/poseGpuRenderer.cpp b/src/openpose/pose/poseGpuRenderer.cpp index 2c93d754933f3f60583e2028b92d3aa717c7bf4d..e14698b3f66e0fe2673b2bfc61e9872ac560ed98 100644 --- a/src/openpose/pose/poseGpuRenderer.cpp +++ b/src/openpose/pose/poseGpuRenderer.cpp @@ -34,16 +34,32 @@ namespace op try { // Free CUDA pointers - Note that if pointers are 0 (i.e., nullptr), no operation is performed. + log("", Priority::Low, __LINE__, __FUNCTION__, __FILE__); #ifdef USE_CUDA + cudaCheck(__LINE__, __FUNCTION__, __FILE__); if (pGpuPose != nullptr) + { cudaFree(pGpuPose); + pGpuPose = nullptr; + } if (pMaxPtr != nullptr) + { cudaFree(pMaxPtr); + pMaxPtr = nullptr; + } if (pMinPtr != nullptr) + { cudaFree(pMinPtr); + pMinPtr = nullptr; + } if (pScalePtr != nullptr) + { cudaFree(pScalePtr); + pScalePtr = nullptr; + } + cudaCheck(__LINE__, __FUNCTION__, __FILE__); #endif + log("", Priority::Low, __LINE__, __FUNCTION__, __FILE__); } catch (const std::exception& e) {