提交 cbcf27f8 编写于 作者: G gineshidalgo99

Improved doc

上级 f49e1842
......@@ -26,7 +26,7 @@ See [doc/quick_start.md#maximum-accuracy-configuration](./quick_start.md#maximum
In general, there are 3 ways to reduce the latency (with some drawbacks each one):
- Reducing `--output_resolution`: It will slightly reduce the latency and increase the FPS. But the quality of the displayed image will deteriorate.
- Reducing `--net_resolution` and/or `--face_net_resolution` and/or `--hand_net_resolution`: It will increase the FPS and reduce the latency. But the accuracy will drop, specially for small people in the image.
- Reducing `--net_resolution` and/or `--face_net_resolution` and/or `--hand_net_resolution`: It will increase the FPS and reduce the latency. But the accuracy will drop, specially for small people in the image. Note: For maximum accuracy, follow [doc/quick_start.md#maximum-accuracy-configuration](./quick_start.md#maximum-accuracy-configuration).
- Enabling `--disable_multi_thread`: The latency should be reduced. But the speed will drop to 1-GPU speed (as it will only use 1 GPU). Note that it's practical only for body, if hands and face are also extracted, it's usually not worth it.
......
......@@ -31,7 +31,7 @@ OpenPose - Frequently Asked Question (FAQ)
**A**: Check the [OpenPose Benchmark](https://docs.google.com/spreadsheets/d/1-DynFGvoScvfWDA1P4jDInCkbD4lg0IKOYbXgEq0sK0/edit#gid=0) to discover the approximate speed of your graphics card. Some speed tips:
1. Use cuDNN 5.1 (cuDNN 6 is ~10% slower).
2. Reduce the `--net_resolution` (e.g. to 320x176) (lower accuracy).
2. Reduce the `--net_resolution` (e.g. to 320x176) (lower accuracy). Note: For maximum accuracy, follow [doc/quick_start.md#maximum-accuracy-configuration](./quick_start.md#maximum-accuracy-configuration).
3. For face, reduce the `--face_net_resolution`. The resolution 320x320 usually works pretty decently.
4. Use the `MPI_4_layers` model (lower accuracy and lower number of parts).
5. Change GPU rendering by CPU rendering to get approximately +0.5 FPS (`--render_pose 1`).
......
......@@ -82,7 +82,11 @@ build\x64\Release\OpenPoseDemo.exe --image_dir examples\media\ --face --hand
### Maximum Accuracy Configuration
This command provides the most accurate results we have been able to achieve for body, hand and face keypoint detection. However, this command will need ~10.5 GB of GPU memory (6.7 GB for COCO model) and runs at ~2 FPS on a Titan X for the body-foot model (1 FPS for COCO). **Note: Do not use this configuration for MPII model**, its accuracy might be harmed by this multi-scale setting. This configuration is optimal only for COCO and COCO-extended (e.g., the default BODY_25) models.
This command provides the most accurate results we have been able to achieve for body, hand and face keypoint detection. However, this command will need ~10.5 GB of GPU memory (6.7 GB for COCO model) and runs at ~2 FPS on a Titan X for the body-foot model (1 FPS for COCO).
- **Note 1:** Increasing `--net_resolution` will highly reduce the frame rate and increase latency, while it might increase the accuracy. However, this accuracy increase is not guaranteed in all scenarios, required a more detailed analysis for each particular scenario. E.g., it will work better for images with very small people, but usually worse for people taking a big ratio of the image. Thus, we recommend to follow the commands below for maximum accuracy in most cases for both big and small-size people.
- **Note 2: Do not use this configuration for MPII model**, its accuracy might be harmed by this multi-scale setting. This configuration is optimal only for COCO and COCO-extended (e.g., the default BODY_25) models.
```
# Ubuntu and Mac: Body
./build/examples/openpose/openpose.bin --net_resolution "1312x736" --scale_number 4 --scale_gap 0.25
......
......@@ -19,28 +19,29 @@ namespace op
const float alphaBlending = POSE_DEFAULT_ALPHA_KEYPOINT);
OP_API void renderPoseHeatMapGpu(float* frame, const PoseModel poseModel, const Point<int>& frameSize,
const float* const heatmap, const Point<int>& heatmapSize,
const float* const heatMapPtr, const Point<int>& heatMapSize,
const float scaleToKeepRatio, const int part,
const float alphaBlending = POSE_DEFAULT_ALPHA_HEAT_MAP);
OP_API void renderPoseHeatMapsGpu(float* frame, const PoseModel poseModel, const Point<int>& frameSize,
const float* const heatmap, const Point<int>& heatmapSize,
const float* const heatMapPtr, const Point<int>& heatMapSize,
const float scaleToKeepRatio,
const float alphaBlending = POSE_DEFAULT_ALPHA_HEAT_MAP);
OP_API void renderPosePAFGpu(float* framePtr, const PoseModel poseModel, const Point<int>& frameSize,
const float* const heatmapPtr, const Point<int>& heatmapSize,
const float* const heatMapPtr, const Point<int>& heatMapSize,
const float scaleToKeepRatio, const int part,
const float alphaBlending = POSE_DEFAULT_ALPHA_HEAT_MAP);
OP_API void renderPosePAFsGpu(float* framePtr, const PoseModel poseModel, const Point<int>& frameSize,
const float* const heatmapPtr, const Point<int>& heatmapSize,
const float* const heatMapPtr, const Point<int>& heatMapSize,
const float scaleToKeepRatio,
const float alphaBlending = POSE_DEFAULT_ALPHA_HEAT_MAP);
OP_API void renderPoseDistance(float* framePtr, const Point<int>& frameSize, const float* const heatMapPtr,
const Point<int>& heatMapSize, const float scaleToKeepRatio, const int part,
const int numberBodyParts, const int numberBodyPAFs, const float alphaBlending);
OP_API void renderPoseDistance(float* framePtr, const PoseModel poseModel, const Point<int>& frameSize,
const float* const heatMapPtr, const Point<int>& heatMapSize,
const float scaleToKeepRatio, const int part,
const float alphaBlending = POSE_DEFAULT_ALPHA_HEAT_MAP);
}
#endif // OPENPOSE_POSE_RENDER_POSE_HPP
......@@ -3,8 +3,10 @@
namespace op
{
// Use op::round/max/min for basic types (int, char, long, float, double, etc). Never with classes! std:: alternatives uses 'const T&' instead of 'const T' as argument.
// E.g. std::round is really slow (~300 ms vs ~10 ms when I individually apply it to each element of a whole image array (e.g. in floatPtrToUCharCvMat)
// Use op::round/max/min for basic types (int, char, long, float, double, etc). Never with classes!
// `std::` alternatives uses 'const T&' instead of 'const T' as argument.
// E.g. std::round is really slow (~300 ms vs ~10 ms when I individually apply it to each element of a whole
// image array
// Round functions
// Signed
......
......@@ -472,17 +472,17 @@ namespace op
void removeSubsetsBelowThresholds(std::vector<int>& validSubsetIndexes, int& numberPeople,
const std::vector<std::pair<std::vector<int>, double>>& subsets,
const unsigned int subsetCounterIndex, const unsigned int numberBodyParts,
const int minSubsetCnt, const T minSubsetScore)
const int minSubsetCnt, const T minSubsetScore, const int maxPeaks)
{
try
{
// Delete people below the following thresholds:
// a) minSubsetCnt: removed if less than minSubsetCnt body parts
// b) minSubsetScore: removed if global score smaller than this
// c) POSE_MAX_PEOPLE: keep first POSE_MAX_PEOPLE people above thresholds
// c) maxPeaks (POSE_MAX_PEOPLE): keep first maxPeaks people above thresholds
numberPeople = 0;
validSubsetIndexes.clear();
validSubsetIndexes.reserve(fastMin((size_t)POSE_MAX_PEOPLE, subsets.size()));
validSubsetIndexes.reserve(fastMin((size_t)maxPeaks, subsets.size()));
for (auto index = 0u ; index < subsets.size() ; index++)
{
auto subsetCounter = subsets[index].first[subsetCounterIndex];
......@@ -501,7 +501,7 @@ namespace op
{
numberPeople++;
validSubsetIndexes.emplace_back(index);
if (numberPeople == POSE_MAX_PEOPLE)
if (numberPeople == maxPeaks)
break;
}
else if ((subsetCounter < 1 && numberBodyParts != 25) || subsetCounter < 0)
......@@ -587,16 +587,122 @@ namespace op
// Delete people below the following thresholds:
// a) minSubsetCnt: removed if less than minSubsetCnt body parts
// b) minSubsetScore: removed if global score smaller than this
// c) POSE_MAX_PEOPLE: keep first POSE_MAX_PEOPLE people above thresholds
// c) maxPeaks (POSE_MAX_PEOPLE): keep first maxPeaks people above thresholds
int numberPeople;
std::vector<int> validSubsetIndexes;
validSubsetIndexes.reserve(fastMin((size_t)POSE_MAX_PEOPLE, subsets.size()));
validSubsetIndexes.reserve(fastMin((size_t)maxPeaks, subsets.size()));
removeSubsetsBelowThresholds(validSubsetIndexes, numberPeople, subsets, subsetCounterIndex,
numberBodyParts, minSubsetCnt, minSubsetScore);
numberBodyParts, minSubsetCnt, minSubsetScore, maxPeaks);
// Fill and return poseKeypoints
subsetsToPoseKeypointsAndScores(poseKeypoints, poseScores, scaleFactor, subsets, validSubsetIndexes,
peaksPtr, numberPeople, numberBodyParts, numberBodyPartPairs);
// poseKeypoints from neck-part distances
if (poseModel == PoseModel::BODY_25D)
{
// TODO: Get scaleDownFactor as parameter, NOT HARD-CODED!
const auto scaleDownFactor = 8;
Array<T> poseKeypoints2 = poseKeypoints.clone();
const auto rootIndex = 1;
const auto rootNumberIndex = rootIndex*(maxPeaks+1)*3;
numberPeople = intRound(peaksPtr[rootNumberIndex]);
// const auto numberPeople = intRound(peaksPtr[rootNumberIndex]);
poseKeypoints.reset({numberPeople, (int)numberBodyParts, 3}, 0);
poseScores.reset(numberPeople, 0);
const std::vector<float> multiplier{3.99662, 2.83036, 6.70038, 7.98778, 2.83036, 6.6937, 7.98559, 9.03892, // BODY_25D
9.25788, 13.5094, 18.4561, 9.25499, 13.5129, 18.452,
4.45502, 4.45194, 4.06345, 4.0489,
18.452, 18.452, 18.452, 18.4561, 18.4561, 18.4561};
// To get ideal distance
const auto numberBodyPartsAndBkgAndPAFChannels = numberBodyParts + 1 + bodyPartPairs.size();
const auto heatMapOffset = heatMapSize.area();
// For each person
for (auto p = 0 ; p < numberPeople ; p++)
{
// For root (neck) position
// bpOrig == rootIndex
const auto rootXYSIndex = rootNumberIndex+3*(1+p);
// Set (x,y,score)
const auto rootX = scaleFactor*peaksPtr[rootXYSIndex];
const auto rootY = scaleFactor*peaksPtr[rootXYSIndex+1];
poseKeypoints[{p,rootIndex,0}] = rootX;
poseKeypoints[{p,rootIndex,1}] = rootY;
poseKeypoints[{p,rootIndex,2}] = peaksPtr[rootXYSIndex+2];
// For each body part
for (auto bpOrig = 0 ; bpOrig < (int)numberBodyParts ; bpOrig++)
{
if (bpOrig != rootIndex)
{
const auto bpChannel = (bpOrig < rootIndex ? bpOrig : bpOrig-1);
// Get ideal distance
const auto offsetIndex = numberBodyPartsAndBkgAndPAFChannels + 2*bpChannel;
const auto* mapX = heatMapPtr + offsetIndex * heatMapOffset;
const auto* mapY = heatMapPtr + (offsetIndex+1) * heatMapOffset;
const auto increaseRatio = multiplier[bpChannel]*scaleDownFactor*scaleFactor;
// Set (x,y) coordinates from the distance
// const auto index = intRound(rootY/scaleFactor)*heatMapSize.x + intRound(rootX/scaleFactor);
// const Point<T> neckPartDist{increaseRatio*mapX[index], increaseRatio*mapY[index]};
// poseKeypoints[{p,bpOrig,0}] = rootX + neckPartDist.x;
// poseKeypoints[{p,bpOrig,1}] = rootY + neckPartDist.y;
// Refinement
Point<T> neckPartDistRefined{0, 0};
for (auto y = intRound(rootY/scaleFactor) - 3 ; y < intRound(rootY/scaleFactor) + 4 ; y++)
{
for (auto x = intRound(rootX/scaleFactor) - 3 ; x < intRound(rootX/scaleFactor) + 4 ; x++)
{
const auto index = y*heatMapSize.x + x;
neckPartDistRefined.x += mapX[index];
neckPartDistRefined.y += mapY[index];
}
}
neckPartDistRefined *= increaseRatio/49;
const auto partX = rootX + neckPartDistRefined.x;
const auto partY = rootY + neckPartDistRefined.y;
poseKeypoints[{p,bpOrig,0}] = partX;
poseKeypoints[{p,bpOrig,1}] = partY;
// Set (temporary) body part score
poseKeypoints[{p,bpOrig,2}] = T(0.0501);
// Associate estimated keypoint with closest one
const auto candidateNumberIndex = bpOrig*(maxPeaks+1)*3;
const auto numberCandidates = intRound(peaksPtr[candidateNumberIndex]);
int closestIndex = -1;
T closetValue = std::numeric_limits<T>::max();
for (auto i = 0 ; i < numberCandidates ; i++)
{
const auto candidateXYSIndex = candidateNumberIndex+3*(1+i);
const auto diffX = partX-scaleFactor*peaksPtr[candidateXYSIndex];
const auto diffY = partY-scaleFactor*peaksPtr[candidateXYSIndex+1];
const auto dist = (diffX*diffX + diffY*diffY);
if (closetValue > dist)
{
closetValue = dist;
closestIndex = candidateXYSIndex;
}
}
if (closestIndex != -1)
{
const auto estimatedDist = neckPartDistRefined.x*neckPartDistRefined.x
+ neckPartDistRefined.y*neckPartDistRefined.y;
const auto x = scaleFactor*peaksPtr[closestIndex];
const auto y = scaleFactor*peaksPtr[closestIndex+1];
const auto candidateDist = (rootX-x)*(rootX-x)+(rootY-y)*(rootY-y);
if (estimatedDist/candidateDist < 1.1 && estimatedDist/candidateDist > 0.9)
{
poseKeypoints[{p,bpOrig,0}] = x;
poseKeypoints[{p,bpOrig,1}] = y;
// Set body part score
const auto s = peaksPtr[closestIndex+2];
// poseKeypoints[{p,bpOrig,2}] = s -;
poseKeypoints[{p,bpOrig,2}] = s;
}
}
// Set poseScore
poseScores[p] += poseKeypoints[{p,bpOrig,2}];
}
}
}
}
}
catch (const std::exception& e)
{
......
#ifdef USE_CUDA
#include <cuda_runtime_api.h>
#include <openpose/gpu/cuda.hpp>
#endif
#include <openpose/core/enumClasses.hpp>
#include <openpose/utilities/fastMath.hpp>
......@@ -102,6 +103,9 @@ namespace op
Array<float> heatMaps;
if (!mHeatMapTypes.empty())
{
#ifdef USE_CUDA
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
#endif
// Get heatmaps size
const auto heatMapSize = getHeatMapSize();
......@@ -207,6 +211,9 @@ namespace op
// cudaMemcpy(heatMaps.getPtr(), getHeatMapGpuConstPtr(), heatMaps.getVolume() * sizeof(float),
// cudaMemcpyDeviceToHost);
}
#ifdef USE_CUDA
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
#endif
return heatMaps;
}
catch (const std::exception& e)
......
......@@ -134,9 +134,9 @@ namespace op
else if (elementRendered <= numberBodyPartsPlusBkg+2)
{
const auto realElementRendered = (elementRendered == 1
? numberBodyPartsPlusBkg
: elementRendered - 3);
elementRenderedName = mPartIndexToName.at(realElementRendered-1);
? numberBodyParts
: elementRendered - 4);
elementRenderedName = mPartIndexToName.at(realElementRendered);
renderPoseHeatMapGpu(*spGpuMemory, mPoseModel, frameSize,
spPoseExtractorNet->getHeatMapGpuConstPtr(),
heatMapSize, scaleNetToOutput * scaleInputToOutput, realElementRendered,
......@@ -161,14 +161,13 @@ namespace op
if (mPoseModel != PoseModel::BODY_25D)
error("Neck-part distance channel only for BODY_25D.",
__LINE__, __FUNCTION__, __FILE__);
const auto distancePart = (elementRendered - lastPAFChannel - 1)*2;
const auto distancePart = (elementRendered - lastPAFChannel - 1);
const auto distancePartMapped = numberBodyPartsPlusBkg + numberBodyPAFChannels
+ distancePart;
elementRenderedName = mPartIndexToName.at(distancePartMapped);
elementRenderedName = elementRenderedName.substr(0, elementRenderedName.find("("));
renderPoseDistance(*spGpuMemory, frameSize, spPoseExtractorNet->getHeatMapGpuConstPtr(),
heatMapSize, scaleNetToOutput * scaleInputToOutput, distancePart,
numberBodyParts, numberBodyPAFChannels,
renderPoseDistance(*spGpuMemory, mPoseModel, frameSize,
spPoseExtractorNet->getHeatMapGpuConstPtr(),
heatMapSize, scaleNetToOutput * scaleInputToOutput, distancePartMapped,
(mBlendOriginalFrame ? getAlphaHeatMap() : 1.f));
}
}
......
......@@ -96,7 +96,7 @@ namespace op
return (unsigned int)(getPoseBodyPartMapping(poseModel).size()
+ getPosePartPairs(poseModel).size()/2 + 3
+ (poseModel == PoseModel::BODY_25D
? getPoseNumberBodyParts(poseModel) - 1 : 0)
? 2*(getPoseNumberBodyParts(poseModel) - 1) : 0)
);
}
catch (const std::exception& e)
......
......@@ -368,7 +368,7 @@ namespace op
__global__ void renderBodyPartHeatMap(float* targetPtr, const int targetWidth, const int targetHeight,
const float* const heatMapPtr, const int widthHeatMap,
const int heightHeatMap, const float scaleToKeepRatio, const int part,
const int numberBodyParts, const float alphaColorToAdd)
const float alphaColorToAdd, const bool absValue = false)
{
const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
const auto y = (blockIdx.y * blockDim.y) + threadIdx.y;
......@@ -377,13 +377,15 @@ namespace op
{
const auto xSource = (x + 0.5f) / scaleToKeepRatio - 0.5f;
const auto ySource = (y + 0.5f) / scaleToKeepRatio - 0.5f;
const auto heatMapOffset = part * widthHeatMap * heightHeatMap;
const auto* const heatMapPtrOffsetted = heatMapPtr + heatMapOffset;
const auto* const heatMapPtrOffsetted = heatMapPtr + part * widthHeatMap * heightHeatMap;
const auto interpolatedValue = bicubicInterpolate(heatMapPtrOffsetted, xSource, ySource, widthHeatMap,
heightHeatMap, widthHeatMap);
float rgbColor[3];
getColorHeatMap(rgbColor, interpolatedValue, 0.f, 1.f);
if (absValue)
getColorHeatMap(rgbColor, fabsf(interpolatedValue), 0.f, 1.f);
else
getColorHeatMap(rgbColor, interpolatedValue, 0.f, 1.f);
const auto blueIndex = 3*(y * targetWidth + x);
addColorWeighted(targetPtr[blueIndex+2], targetPtr[blueIndex+1], targetPtr[blueIndex], rgbColor,
......@@ -437,6 +439,15 @@ namespace op
}
float3 rgbColor2;
// if (forceNorm1)
// {
// const auto norm = std::sqrt(valueX*valueX + valueY*valueY);
// if (norm > 0.05f)
// getColorXYAffinity(rgbColor2, valueX/norm, valueY/norm);
// else
// getColorXYAffinity(rgbColor2, valueX, valueY);
// }
// else
getColorXYAffinity(rgbColor2, valueX, valueY);
rgbColor[0] += rgbColor2.x;
rgbColor[1] += rgbColor2.y;
......@@ -594,12 +605,10 @@ namespace op
dim3 threadsPerBlock;
dim3 numBlocks;
getNumberCudaThreadsAndBlocks(threadsPerBlock, numBlocks, frameSize);
const auto numberBodyParts = getPoseNumberBodyParts(poseModel);
const auto heatMapOffset = numberBodyParts * heatMapSize.area();
renderBodyPartHeatMap<<<threadsPerBlock, numBlocks>>>(
framePtr, frameSize.x, frameSize.y, heatMapPtr, heatMapSize.x, heatMapSize.y, scaleToKeepRatio,
part-1, numberBodyParts, alphaBlending
part, alphaBlending
);
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
}
......@@ -667,19 +676,29 @@ namespace op
}
}
void renderPoseDistance(float* framePtr, const Point<int>& frameSize, const float* const heatMapPtr,
const Point<int>& heatMapSize, const float scaleToKeepRatio, const int part,
const int numberBodyParts, const int numberBodyPAFChannels, const float alphaBlending)
void renderPoseDistance(float* framePtr, const PoseModel poseModel, const Point<int>& frameSize,
const float* const heatMapPtr, const Point<int>& heatMapSize, const float scaleToKeepRatio,
const int part, const float alphaBlending)
{
try
{
// // As PAF
// const bool forceNorm1 = true;
// renderPosePAFGpuAux(framePtr, poseModel, frameSize, heatMapPtr, heatMapSize, scaleToKeepRatio, part, 1,
// alphaBlending, forceNorm1);
// As body part
// framePtr = width * height * 3
// heatMapPtr = heatMapSize.x * heatMapSize.y * #body parts
checkAlpha(alphaBlending);
dim3 threadsPerBlock;
dim3 numBlocks;
getNumberCudaThreadsAndBlocks(threadsPerBlock, numBlocks, frameSize);
renderDistance<<<threadsPerBlock, numBlocks>>>(
const auto absValue = true;
renderBodyPartHeatMap<<<threadsPerBlock, numBlocks>>>(
framePtr, frameSize.x, frameSize.y, heatMapPtr, heatMapSize.x, heatMapSize.y, scaleToKeepRatio,
part, numberBodyParts, numberBodyPAFChannels, alphaBlending);
part, alphaBlending, absValue);
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
}
catch (const std::exception& e)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册