提交 c4369cba 编写于 作者: G Gines Hidalgo

Example 09_keypoints_from_heatmaps working for CUDA again

上级 3c9441ae
......@@ -370,13 +370,14 @@ OpenPose Library - Release Notes
1. Highly improved 3D triangulation for >3 cameras by fixing some small bugs.
2. Added community-based support for Nvidia NVCaffe.
3. Increased accuracy very lightly for CUDA version (about 0.01%) by adapting the threshold in `process()` in `bodyPartConnectorBase.cu` to `defaultNmsThreshold`. This also removes any posibility of future bugs in that function for using a default NMS threshold higher than 0.15 (which was the hard-coded value used previously).
4. Increased mAP but reduced mAR (both about 0.01%) as well as reduction of false positives. Step 1: removed legs where only knee/ankle/feet are found. Step 2: If no people is found in an image, `removePeopleBelowThresholds` is re-run with `maximizePositives = true`.
4. Increased mAP but reduced mAR (both about 0.01%) as well as reduction of false positives. Step 1: removed legs where only knee/ankle/feet are found. Step 2: If no people is found in an image, `removePeopleBelowThresholdsAndFillFaces` is re-run with `maximizePositives = true`.
5. Number of maximum people is not limited by the maximum number of max peaks anymore. However, the number of body part candidates for a specific keypoint (e.g., nose) is still limited to the number of max peaks.
6. Added more checks during destructors of CUDA-related functions and safer CUDA frees.
2. Functions or parameters renamed:
1. `--3d_min_views` default value (-1) no longer means that all camera views are required. Instead, it will be equal to max(2, min(4, #cameras-1)). This should provide a good trade-off between recall and precission.
3. Main bugs fixed:
1. Windows: Added back support for OpenGL and Spinnaker, as well as DLLs for debug compilation.
2. `06_face_from_image.cpp` and `07_hand_from_image.cpp` working again, they stopped working in version 1.5.0 with the GPU image resize for the GUI.
2. `06_face_from_image.cpp`, `07_hand_from_image.cpp`, and `09_keypoints_from_heatmaps` working again, they stopped working in version 1.5.0 with the GPU image resize for the GUI.
4. Changes/additions that affect the compatibility with the OpenPose Unity Plugin:
......
......@@ -34,9 +34,23 @@ namespace op
if (mGpuResize)
{
// Free temporary memory
cudaFree(pInputImageCuda);
cudaFree(pOutputImageCuda);
cudaFree(pInputImageReorderedCuda);
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
if (pInputImageCuda != nullptr)
{
cudaFree(pInputImageCuda);
pInputImageCuda = nullptr;
}
if (pOutputImageCuda != nullptr)
{
cudaFree(pOutputImageCuda);
pOutputImageCuda = nullptr;
}
if (pInputImageReorderedCuda != nullptr)
{
cudaFree(pInputImageReorderedCuda);
pInputImageReorderedCuda = nullptr;
}
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
}
#endif
}
......
......@@ -38,8 +38,18 @@ namespace op
if (mGpuResize)
{
// Free temporary memory
cudaFree(pInputImageCuda);
cudaFree(*spOutputImageCuda);
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
if (pInputImageCuda != nullptr)
{
cudaFree(pInputImageCuda);
pInputImageCuda = nullptr;
}
if (*spOutputImageCuda != nullptr)
{
cudaFree(*spOutputImageCuda);
*spOutputImageCuda = nullptr;
}
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
}
#endif
}
......
......@@ -46,7 +46,10 @@ namespace op
{
#ifdef USE_CUDA
if (mIsLastRenderer && spGpuMemory != nullptr)
{
cudaFree(*spGpuMemory);
*spGpuMemory = nullptr;
}
#endif
}
catch (const std::exception& e)
......
......@@ -37,8 +37,18 @@ namespace op
if (mGpuResize)
{
// Free temporary memory
cudaFree(*spOutputImageFloatCuda);
cudaFree(pOutputImageUCharCuda);
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
if (*spOutputImageFloatCuda != nullptr)
{
cudaFree(*spOutputImageFloatCuda);
*spOutputImageFloatCuda = nullptr;
}
if (pOutputImageUCharCuda != nullptr)
{
cudaFree(pOutputImageUCharCuda);
pOutputImageUCharCuda = nullptr;
}
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
}
#endif
}
......
......@@ -24,14 +24,28 @@ namespace op
{
// Free CUDA pointers - Note that if pointers are 0 (i.e., nullptr), no operation is performed.
#ifdef USE_CUDA
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
if (pGpuFace != nullptr)
{
cudaFree(pGpuFace);
pGpuFace = nullptr;
}
if (pMaxPtr != nullptr)
{
cudaFree(pMaxPtr);
pMaxPtr = nullptr;
}
if (pMinPtr != nullptr)
{
cudaFree(pMinPtr);
pMinPtr = nullptr;
}
if (pScalePtr != nullptr)
{
cudaFree(pScalePtr);
pScalePtr = nullptr;
}
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
#endif
}
catch (const std::exception& e)
......
......@@ -24,14 +24,28 @@ namespace op
{
// Free CUDA pointers - Note that if pointers are 0 (i.e., nullptr), no operation is performed.
#ifdef USE_CUDA
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
if (pGpuHand != nullptr)
{
cudaFree(pGpuHand);
pGpuHand = nullptr;
}
if (pMaxPtr != nullptr)
{
cudaFree(pMaxPtr);
pMaxPtr = nullptr;
}
if (pMinPtr != nullptr)
{
cudaFree(pMinPtr);
pMinPtr = nullptr;
}
if (pScalePtr != nullptr)
{
cudaFree(pScalePtr);
pScalePtr = nullptr;
}
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
#endif
}
catch (const std::exception& e)
......
......@@ -61,14 +61,14 @@ namespace op
template <typename T>
void getKeypointCounter(
int& personCounter, const std::vector<std::pair<std::vector<int>, T>>& peopleVector,
const unsigned int index, const int indexFirst, const int indexLast, const int minimum)
const unsigned int part, const int partFirst, const int partLast, const int minimum)
{
try
{
// Count keypoints
auto keypointCounter = 0;
for (auto i = indexFirst ; i < indexLast ; i++)
keypointCounter += (peopleVector[index].first.at(i) > 0);
for (auto i = partFirst ; i < partLast ; i++)
keypointCounter += (peopleVector[part].first.at(i) > 0);
// If enough keypoints --> subtract them and keep them at least as big as minimum
if (keypointCounter > minimum)
personCounter += minimum-keypointCounter; // personCounter = non-considered keypoints + minimum
......@@ -79,6 +79,53 @@ namespace op
}
}
template <typename T>
void getRoiDiameterAndBounds(
Rectangle<int>& roi, int& diameter, int& partFirstNon0, int& partLastNon0,
const std::vector<int>& personVector, const T* const peaksPtr,
const int partInit, const int partEnd)
{
try
{
// Find ROI, partFirstNon0, and partLastNon0
roi = Rectangle<int>{0,0,0,0};
partFirstNon0 = -1;
partLastNon0 = -1;
for (auto part = partInit ; part < partEnd ; part++)
{
const auto x = peaksPtr[personVector[part]-2];
const auto y = peaksPtr[personVector[part]-1];
const auto score = peaksPtr[personVector[part]];
if (score > 0)
{
// ROI
if (roi.x > x)
roi.x = x;
if (roi.y > y)
roi.y = y;
if (roi.width < x)
roi.width = x;
if (roi.height > y)
roi.height = y;
// First keypoint?
if (partFirstNon0 < 0)
partFirstNon0 = part;
// Last keypoint?
partLastNon0 = part;
}
}
// From [p1, p2] to [p1, width, height]
roi.width -= roi.x;
roi.height -= roi.y;
// diameter
diameter = fastMax(roi.width, roi.height);
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
template <typename T>
std::vector<std::pair<std::vector<int>, T>> createPeopleVector(
const T* const heatMapPtr, const T* const peaksPtr, const PoseModel poseModel, const Point<int>& heatMapSize,
......@@ -643,32 +690,6 @@ namespace op
}
}
template <typename T>
void getRoiDiameterAndBounds(
Rectangle<int>& roi, int& diameter, int& indexFirstNon0, int& indexLastNon0,
const std::vector<int>& personVector, const T* const peaksPtr,
const int indexInit, const int indexEnd)
{
try
{
roi = Rectangle<int>{0,0,0,0};
for (auto index = 0u ; index < personVector.size()-1 ; index++)
{
const auto x = peaksPtr[personVector[index]-2];
const auto y = peaksPtr[personVector[index]-1];
const auto score = peaksPtr[personVector[index]];
if (roi.x > x)
roi.x = x;
if (roi.y > y)
roi.y = y;
}
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
template <typename T>
void removePeopleBelowThresholdsAndFillFaces(
std::vector<int>& validSubsetIndexes, int& numberPeople,
......@@ -693,28 +714,28 @@ namespace op
std::vector<int> faceInvalidSubsetIndexes;
faceInvalidSubsetIndexes.reserve(peopleVector.size());
// For each person candidate
for (auto index = 0u ; index < peopleVector.size() ; index++)
for (auto person = 0u ; person < peopleVector.size() ; person++)
{
auto personCounter = peopleVector[index].first.back();
auto personCounter = peopleVector[person].first.back();
// Analog for hand/face keypoints
if (numberBodyParts >= 135)
{
// No consider face keypoints for personCounter
const auto currentCounter = personCounter;
getKeypointCounter(personCounter, peopleVector, index, 65, 135, 1);
getKeypointCounter(personCounter, peopleVector, person, 65, 135, 1);
const auto newCounter = personCounter;
if (personCounter == 0)
if (personCounter == 1)
{
faceInvalidSubsetIndexes.emplace_back(index);
faceInvalidSubsetIndexes.emplace_back(person);
continue;
}
// If body is still valid and facial points were removed, then add to valid faces
else if (currentCounter != newCounter)
faceValidSubsetIndexes.emplace_back(index);
faceValidSubsetIndexes.emplace_back(person);
// No consider right hand keypoints for personCounter
getKeypointCounter(personCounter, peopleVector, index, 45, 65, 1);
getKeypointCounter(personCounter, peopleVector, person, 45, 65, 1);
// No consider left hand keypoints for personCounter
getKeypointCounter(personCounter, peopleVector, index, 25, 45, 1);
getKeypointCounter(personCounter, peopleVector, person, 25, 45, 1);
}
// Foot keypoints do not affect personCounter (too many false positives,
// same foot usually appears as both left and right keypoints)
......@@ -724,7 +745,7 @@ namespace op
if (!maximizePositives && (numberBodyParts == 25 || numberBodyParts > 70))
{
const auto currentCounter = personCounter;
getKeypointCounter(personCounter, peopleVector, index, 19, 25, 0);
getKeypointCounter(personCounter, peopleVector, person, 19, 25, 0);
const auto newCounter = personCounter;
// Problem: Same leg/foot keypoints are considered for both left and right keypoints.
// Solution: Remove legs that are duplicated and that do not have upper torso
......@@ -733,11 +754,11 @@ namespace op
continue;
}
// Add only valid people
const auto personScore = peopleVector[index].second;
const auto personScore = peopleVector[person].second;
if (personCounter >= minSubsetCnt && (personScore/personCounter) >= minSubsetScore)
{
numberPeople++;
validSubsetIndexes.emplace_back(index);
validSubsetIndexes.emplace_back(person);
// // This is not required, it is OK if there are more people. No more GPU memory used.
// if (numberPeople == maxPeaks)
// break;
......@@ -747,25 +768,6 @@ namespace op
error("Bad personCounter (" + std::to_string(personCounter) + "). Bug in this"
" function if this happens.", __LINE__, __FUNCTION__, __FILE__);
}
// // Random standalone facial keypoints --> Merge into a more complete face
// if (numberPeople > 0 && faceInvalidSubsetIndexes.size() > 0)
// {
// for (auto faceId = 0u ; faceId < faceInvalidSubsetIndexes.size() ; faceId++)
// {
// // Get ROI
// Rectangle<int> roi;
// int diameter;
// int indexFirstNon0;
// int indexLastNon0;
// const auto index = faceValidSubsetIndexes[faceId];
// getRoiDiameterAndBounds(
// roi, diameter, indexFirstNon0, indexLastNon0, peopleVector[index].first, peaksPtr, 65, 135);
// // const auto personCounter = peopleVector[index].first.back();
// // const auto x = peaksPtr[peopleVector[index].first[part]-2];
// // const auto y = peaksPtr[peopleVector[index].first[part]-1];
// // const auto score = peaksPtr[peopleVector[index].first[part]];
// }
// }
// If no people found --> Repeat with maximizePositives = true
// Result: Increased COCO mAP because we catch more foot-only images
if (numberPeople == 0 && !maximizePositives)
......
......@@ -3,6 +3,7 @@
#endif
#ifdef USE_CUDA
#include <openpose/gpu/cuda.hpp>
#include <openpose/gpu/cuda.hu>
#endif
#ifdef USE_OPENCL
#include <openpose/gpu/opencl.hcl>
......@@ -41,14 +42,28 @@ namespace op
try
{
#if defined USE_CAFFE && defined USE_CUDA
cudaFree(pBodyPartPairsGpuPtr);
cudaFree(pMapIdxGpuPtr);
cudaFree(pFinalOutputGpuPtr);
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
if (pBodyPartPairsGpuPtr != nullptr)
{
cudaFree(pBodyPartPairsGpuPtr);
pBodyPartPairsGpuPtr = nullptr;
}
if (pMapIdxGpuPtr != nullptr)
{
cudaFree(pMapIdxGpuPtr);
pMapIdxGpuPtr = nullptr;
}
if (pFinalOutputGpuPtr != nullptr)
{
cudaFree(pFinalOutputGpuPtr);
pFinalOutputGpuPtr = nullptr;
}
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
#endif
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
errorDestructor(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
......
......@@ -45,9 +45,9 @@ namespace op
#if defined USE_CAFFE && defined USE_OPENCL
try
{
if(mKernelGpuPtr != nullptr)
if (mKernelGpuPtr != nullptr)
clReleaseMemObject((cl_mem)mKernelGpuPtr);
if(mKernelCpuPtr != nullptr)
if (mKernelCpuPtr != nullptr)
delete mKernelCpuPtr;
}
catch (const std::exception& e)
......
......@@ -4,8 +4,18 @@
namespace op
{
const auto THREADS_PER_BLOCK = 256u;
const auto THREADS_PER_BLOCK_1D = 16u;
template <typename T>
__global__ void fillKernel(
T* targetPtr, const T* const sourcePtr, const int N)
{
const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
if (x < N)
targetPtr[x] = sourcePtr[x];
}
// template <typename T>
// __global__ void resizeKernelOld(
// T* targetPtr, const T* const sourcePtr, const int widthSource, const int heightSource, const int widthTarget,
......@@ -329,18 +339,29 @@ namespace op
// Optimized function for 8x resize
// OP_CUDA_PROFILE_INIT(REPS);
if (widthTarget / widthSource != 8 || heightTarget / heightSource != 8)
error("Kernel only implemented for 8x resize. Notify us if this error appears.",
__LINE__, __FUNCTION__, __FILE__);
const auto rescaleFactor = (unsigned int) std::ceil(heightTarget / (float)(heightSource));
const dim3 threadsPerBlock{rescaleFactor, rescaleFactor, 1};
const dim3 numBlocks{
getNumberCudaBlocks(widthTarget, threadsPerBlock.x),
getNumberCudaBlocks(heightTarget, threadsPerBlock.y),
getNumberCudaBlocks(num * channels, threadsPerBlock.z)};
resize8TimesKernel<<<numBlocks, threadsPerBlock>>>(
targetPtr, sourcePtrs.at(0), widthSource, heightSource, widthTarget, heightTarget,
rescaleFactor);
if (widthTarget / widthSource == 1 && heightTarget / heightSource == 1)
{
const auto N = widthTarget * heightTarget * num * channels;
const dim3 threadsPerBlock{THREADS_PER_BLOCK};
const dim3 numBlocks{getNumberCudaBlocks(N, threadsPerBlock.x)};
fillKernel<<<numBlocks, threadsPerBlock>>>(
targetPtr, sourcePtrs.at(0), N);
}
else
{
if (widthTarget / widthSource != 8 || heightTarget / heightSource != 8)
error("Kernel only implemented for 8x resize. Notify us if this error appears.",
__LINE__, __FUNCTION__, __FILE__);
const auto rescaleFactor = (unsigned int) std::ceil(heightTarget / (float)(heightSource));
const dim3 threadsPerBlock{rescaleFactor, rescaleFactor, 1};
const dim3 numBlocks{
getNumberCudaBlocks(widthTarget, threadsPerBlock.x),
getNumberCudaBlocks(heightTarget, threadsPerBlock.y),
getNumberCudaBlocks(num * channels, threadsPerBlock.z)};
resize8TimesKernel<<<numBlocks, threadsPerBlock>>>(
targetPtr, sourcePtrs.at(0), widthSource, heightSource, widthTarget, heightTarget,
rescaleFactor);
}
// OP_CUDA_PROFILE_END(timeNormalize3, 1e3, REPS);
// // Profiling code
......@@ -482,10 +503,14 @@ namespace op
widthTarget, heightTarget, sourcePtrs[0], sourcePtrs[1], sourcePtrs[2], sourcePtrs[3],
sourcePtrs[4], sourcePtrs[5], sourcePtrs[6], sourcePtrs[7]);
// Free memory
cudaFree(widthSources);
cudaFree(heightSources);
cudaFree(scaleWidths);
cudaFree(scaleHeights);
if (widthSources != nullptr)
cudaFree(widthSources);
if (heightSources != nullptr)
cudaFree(heightSources);
if (scaleWidths != nullptr)
cudaFree(scaleWidths);
if (scaleHeights != nullptr)
cudaFree(scaleHeights);
// OP_CUDA_PROFILE_END(timeNormalize3, 1e3, REPS);
// // Profiling code
......
......@@ -34,16 +34,32 @@ namespace op
try
{
// Free CUDA pointers - Note that if pointers are 0 (i.e., nullptr), no operation is performed.
log("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
#ifdef USE_CUDA
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
if (pGpuPose != nullptr)
{
cudaFree(pGpuPose);
pGpuPose = nullptr;
}
if (pMaxPtr != nullptr)
{
cudaFree(pMaxPtr);
pMaxPtr = nullptr;
}
if (pMinPtr != nullptr)
{
cudaFree(pMinPtr);
pMinPtr = nullptr;
}
if (pScalePtr != nullptr)
{
cudaFree(pScalePtr);
pScalePtr = nullptr;
}
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
#endif
log("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
catch (const std::exception& e)
{
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册