提交 98e53de1 编写于 作者: G Gines Hidalgo

All/some skeletons not properly displayed or missing when many people

上级 5002ebd0
......@@ -407,6 +407,7 @@ OpenPose Library - Release Notes
4. Natural sort now works properly with filenames containining numbers longer than the limit of an int.
5. Optionally auto-generated bin folder only contains the required DLLs (depending on the CMake configuration), instead of all of them.
6. When WrapperStructFace and WrapperStructHand are not called and configured for Wrapper, setting body to CPU rendering was not working.
7. Skelton rendering: All or some skeletons were not properly displayed or completely missing on images with many people.
4. Changes/additions that affect the compatibility with the OpenPose Unity Plugin:
......
......@@ -8,18 +8,14 @@ namespace op
const float* const keypointsPtr, const int numberPeople, const int numberParts, const float threshold)
{
const auto globalIdx = threadIdx.x;
// const auto xIndex = 2*globalIdx;
// const auto yIndex = xIndex+1;
const auto xIndex = globalIdx;
const auto yIndex = numberPeople+globalIdx;
// Fill shared parameters
// if (globalIdx < numberPeople)
if (globalIdx < numberPeople)
{
auto minValueX = (float)targetWidth;
auto minValueY = (float)targetHeight;
auto maxValueX = 0.f;
auto maxValueY = 0.f;
float minValueX = (float)targetWidth;
float minValueY = (float)targetHeight;
float maxValueX = 0.f;
float maxValueY = 0.f;
for (auto part = 0 ; part < numberParts ; part++)
{
const auto index = 3 * (globalIdx*numberParts + part);
......@@ -51,6 +47,10 @@ namespace op
minValueY -= constantToAdd;
}
// const auto xIndex = 2*globalIdx;
// const auto yIndex = xIndex+1;
const auto xIndex = globalIdx;
const auto yIndex = numberPeople+globalIdx;
minPtr[xIndex] = minValueX;
minPtr[yIndex] = minValueY;
maxPtr[xIndex] = maxValueX;
......@@ -58,6 +58,7 @@ namespace op
}
}
// Note: renderKeypoints is not working for videos with many people, renderKeypointsOld speed was slightly improved instead
__inline__ __device__ void renderKeypoints(
float* targetPtr, float* sharedMaxs, float* sharedMins, float* sharedScaleF, const float* const maxPtr,
const float* const minPtr, const float* const scalePtr, const int globalIdx, const int x, const int y,
......@@ -82,9 +83,9 @@ namespace op
if (x < targetWidth && y < targetHeight)
{
const auto baseIndex = 3*(y * targetWidth + x);
auto b = targetPtr[baseIndex];
auto g = targetPtr[baseIndex+1];
auto r = targetPtr[baseIndex+2];
float b = targetPtr[baseIndex];
float g = targetPtr[baseIndex+1];
float r = targetPtr[baseIndex+2];
if (!blendOriginalFrame)
{
b = 0.f;
......@@ -104,7 +105,7 @@ namespace op
const auto xIndex = person;
const auto yIndex = numberPeople+person;
if (x <= sharedMaxs[xIndex] && x >= sharedMins[xIndex]
&& y <= sharedMaxs[yIndex] && y >= sharedMins[yIndex])
&& y <= sharedMaxs[yIndex] && y >= sharedMins[yIndex])
{
// Part pair connections
for (auto partPair = 0; partPair < numberPartPairs; partPair++)
......@@ -195,7 +196,6 @@ namespace op
if (minr2 <= dist2 && dist2 <= maxr2)
addColorWeighted(r, g, b, &rgbColorsPtr[(part%numberColors)*3], alphaColorToAdd);
}
}
}
}
......@@ -218,10 +218,10 @@ namespace op
// Fill shared parameters
if (globalIdx < numberPeople)
{
sharedMins[globalIdx].x = targetWidth;
sharedMins[globalIdx].y = targetHeight;
sharedMaxs[globalIdx].x = 0.f;
sharedMaxs[globalIdx].y = 0.f;
float minValueX = (float)targetWidth;
float minValueY = (float)targetHeight;
float maxValueX = 0.f;
float maxValueY = 0.f;
for (auto part = 0 ; part < numberParts ; part++)
{
const auto index = 3 * (globalIdx*numberParts + part);
......@@ -230,28 +230,33 @@ namespace op
const auto score = keypointsPtr[index+2];
if (score > threshold)
{
if (x < sharedMins[globalIdx].x)
sharedMins[globalIdx].x = x;
if (x > sharedMaxs[globalIdx].x)
sharedMaxs[globalIdx].x = x;
if (y < sharedMins[globalIdx].y)
sharedMins[globalIdx].y = y;
if (y > sharedMaxs[globalIdx].y)
sharedMaxs[globalIdx].y = y;
if (x < minValueX)
minValueX = x;
if (x > maxValueX)
maxValueX = x;
if (y < minValueY)
minValueY = y;
if (y > maxValueY)
maxValueY = y;
}
}
if (sharedMaxs[globalIdx].x != 0.f && sharedMaxs[globalIdx].y != 0.f)
if (maxValueX != 0.f && maxValueY != 0.f)
{
const auto averageX = sharedMaxs[globalIdx].x - sharedMins[globalIdx].x;
const auto averageY = sharedMaxs[globalIdx].y - sharedMins[globalIdx].y;
const auto averageX = maxValueX - minValueX;
const auto averageY = maxValueY - minValueY;
// (averageX + averageY) / 2.f / 400.f
sharedScaleF[globalIdx] = fastTruncateCuda((averageX + averageY) / 400.f, 0.33f, 1.f);
const auto constantToAdd = 50.f;
sharedMaxs[globalIdx].x += constantToAdd;
sharedMaxs[globalIdx].y += constantToAdd;
sharedMins[globalIdx].x -= constantToAdd;
sharedMins[globalIdx].y -= constantToAdd;
maxValueX += constantToAdd;
maxValueY += constantToAdd;
minValueX -= constantToAdd;
minValueY -= constantToAdd;
}
sharedMins[globalIdx].x = minValueX;
sharedMins[globalIdx].y = minValueY;
sharedMaxs[globalIdx].x = maxValueX;
sharedMaxs[globalIdx].y = maxValueY;
}
__syncthreads();
......@@ -259,9 +264,9 @@ namespace op
if (x < targetWidth && y < targetHeight)
{
const auto baseIndex = 3*(y * targetWidth + x);
auto& b = targetPtr[baseIndex];
auto& g = targetPtr[baseIndex+1];
auto& r = targetPtr[baseIndex+2];
float b = targetPtr[baseIndex];
float g = targetPtr[baseIndex+1];
float r = targetPtr[baseIndex+2];
if (!blendOriginalFrame)
{
b = 0.f;
......@@ -372,6 +377,9 @@ namespace op
}
}
}
targetPtr[baseIndex] = b;
targetPtr[baseIndex+1] = g;
targetPtr[baseIndex+2] = r;
}
}
}
......
......@@ -74,8 +74,8 @@ namespace op
opLog("Starting initialization on thread.", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
// GPU memory allocation for rendering
#ifdef USE_CUDA
cudaMalloc((void**)(&pGpuPose),
POSE_MAX_PEOPLE * getPoseNumberBodyParts(mPoseModel) * 3 * sizeof(float));
const auto gpuPoseVolume = POSE_MAX_PEOPLE * getPoseNumberBodyParts(mPoseModel) * 3 * sizeof(float);
cudaMalloc((void**)(&pGpuPose), gpuPoseVolume);
cudaMalloc((void**)&pMaxPtr, sizeof(float) * 2 * POSE_MAX_PEOPLE);
cudaMalloc((void**)&pMinPtr, sizeof(float) * 2 * POSE_MAX_PEOPLE);
cudaMalloc((void**)&pScalePtr, sizeof(float) * POSE_MAX_PEOPLE);
......@@ -119,9 +119,11 @@ namespace op
scaleKeypoints(poseKeypointsRescaled, scaleInputToOutput);
// Render keypoints
if (!poseKeypoints.empty())
{
const auto gpuPoseVolume = numberPeople * numberBodyParts * 3 * sizeof(float);
cudaMemcpy(
pGpuPose, poseKeypointsRescaled.getConstPtr(),
numberPeople * numberBodyParts * 3 * sizeof(float), cudaMemcpyHostToDevice);
pGpuPose, poseKeypointsRescaled.getConstPtr(), gpuPoseVolume, cudaMemcpyHostToDevice);
}
renderPoseKeypointsGpu(
*spGpuMemory, pMaxPtr, pMinPtr, pScalePtr, mPoseModel, numberPeople, frameSize, pGpuPose,
mRenderThreshold, mShowGooglyEyes, mBlendOriginalFrame, getAlphaKeypoint());
......
......@@ -136,8 +136,10 @@ namespace op
const auto globalIdx = threadIdx.y * blockDim.x + threadIdx.x;
// Shared parameters
__shared__ float sharedMins[2*POSE_MAX_PEOPLE];
__shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
__shared__ float2 sharedMins[POSE_MAX_PEOPLE];
__shared__ float2 sharedMaxs[POSE_MAX_PEOPLE];
// __shared__ float sharedMins[2*POSE_MAX_PEOPLE];
// __shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
__shared__ float sharedScaleF[POSE_MAX_PEOPLE];
// Other parameters
......@@ -148,8 +150,9 @@ namespace op
const auto lineWidth = fastMinCuda(targetWidth, targetHeight) / 120.f;
// Render key points
renderKeypoints(
targetPtr, sharedMaxs, sharedMins, sharedScaleF, maxPtr, minPtr, scalePtr,
// Note: renderKeypoints is not working for videos with many people, renderKeypointsOld speed was slightly improved instead
renderKeypointsOld( // renderKeypoints(
targetPtr, sharedMaxs, sharedMins, sharedScaleF, // maxPtr, minPtr, scalePtr,
globalIdx, x, y, targetWidth, targetHeight, posePtr, COCO_PAIRS_GPU, numberPeople, 18, numberPartPairs,
COCO_COLORS, numberColors, radius, lineWidth, COCO_SCALES, numberScales, threshold, alphaColorToAdd,
blendOriginalFrame, (googlyEyes ? 14 : -1), (googlyEyes ? 15 : -1));
......@@ -165,8 +168,10 @@ namespace op
const auto globalIdx = threadIdx.y * blockDim.x + threadIdx.x;
// Shared parameters
__shared__ float sharedMins[2*POSE_MAX_PEOPLE];
__shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
__shared__ float2 sharedMins[POSE_MAX_PEOPLE];
__shared__ float2 sharedMaxs[POSE_MAX_PEOPLE];
// __shared__ float sharedMins[2*POSE_MAX_PEOPLE];
// __shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
__shared__ float sharedScaleF[POSE_MAX_PEOPLE];
// Other parameters
......@@ -177,8 +182,9 @@ namespace op
const auto lineWidth = fastMinCuda(targetWidth, targetHeight) / 120.f;
// Render key points
renderKeypoints(
targetPtr, sharedMaxs, sharedMins, sharedScaleF, maxPtr, minPtr, scalePtr,
// Note: renderKeypoints is not working for videos with many people, renderKeypointsOld speed was slightly improved instead
renderKeypointsOld( // renderKeypoints(
targetPtr, sharedMaxs, sharedMins, sharedScaleF, // maxPtr, minPtr, scalePtr,
globalIdx, x, y, targetWidth, targetHeight, posePtr, BODY_19_PAIRS_GPU, numberPeople, 19, numberPartPairs,
BODY_19_COLORS, numberColors, radius, lineWidth, BODY_19_SCALES, numberScales, threshold, alphaColorToAdd,
blendOriginalFrame, (googlyEyes ? 15 : -1),
......@@ -195,8 +201,10 @@ namespace op
const auto globalIdx = threadIdx.y * blockDim.x + threadIdx.x;
// Shared parameters
__shared__ float sharedMins[2*POSE_MAX_PEOPLE];
__shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
__shared__ float2 sharedMins[POSE_MAX_PEOPLE];
__shared__ float2 sharedMaxs[POSE_MAX_PEOPLE];
// __shared__ float sharedMins[2*POSE_MAX_PEOPLE];
// __shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
__shared__ float sharedScaleF[POSE_MAX_PEOPLE];
// Other parameters
......@@ -207,42 +215,14 @@ namespace op
const auto lineWidth = fastMinCuda(targetWidth, targetHeight) / 120.f;
// Render key points
renderKeypoints(
targetPtr, sharedMaxs, sharedMins, sharedScaleF, maxPtr, minPtr, scalePtr,
// Note: renderKeypoints is not working for videos with many people, renderKeypointsOld speed was slightly improved instead
renderKeypointsOld( // renderKeypoints(
targetPtr, sharedMaxs, sharedMins, sharedScaleF, // maxPtr, minPtr, scalePtr,
globalIdx, x, y, targetWidth, targetHeight, posePtr, BODY_23_PAIRS_GPU, numberPeople, 23, numberPartPairs,
BODY_23_COLORS, numberColors, radius, lineWidth, BODY_23_SCALES, numberScales, threshold, alphaColorToAdd,
blendOriginalFrame, (googlyEyes ? 13 : -1), (googlyEyes ? 14 : -1));
}
// __global__ void renderPoseBody25Old(
// float* targetPtr, const int targetWidth, const int targetHeight, const float* const posePtr,
// const int numberPeople, const float threshold, const bool googlyEyes, const bool blendOriginalFrame,
// const float alphaColorToAdd)
// {
// const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
// const auto y = (blockIdx.y * blockDim.y) + threadIdx.y;
// const auto globalIdx = threadIdx.y * blockDim.x + threadIdx.x;
// // Shared parameters
// __shared__ float2 sharedMins[POSE_MAX_PEOPLE];
// __shared__ float2 sharedMaxs[POSE_MAX_PEOPLE];
// __shared__ float sharedScaleF[POSE_MAX_PEOPLE];
// // Other parameters
// const auto numberPartPairs = sizeof(BODY_25_PAIRS_GPU) / (2*sizeof(BODY_25_PAIRS_GPU[0]));
// const auto numberScales = sizeof(BODY_25_SCALES) / sizeof(BODY_25_SCALES[0]);
// const auto numberColors = sizeof(BODY_25_COLORS) / (3*sizeof(BODY_25_COLORS[0]));
// const auto radius = fastMinCuda(targetWidth, targetHeight) / 100.f;
// const auto lineWidth = fastMinCuda(targetWidth, targetHeight) / 120.f;
// // Render key points
// renderKeypointsOld(
// targetPtr, sharedMaxs, sharedMins, sharedScaleF, globalIdx, x, y, targetWidth, targetHeight, posePtr,
// BODY_25_PAIRS_GPU, numberPeople, 25, numberPartPairs, BODY_25_COLORS, numberColors, radius, lineWidth,
// BODY_25_SCALES, numberScales, threshold, alphaColorToAdd, blendOriginalFrame, (googlyEyes ? 15 : -1),
// (googlyEyes ? 16 : -1));
// }
__global__ void renderPoseBody25(
float* targetPtr, float* minPtr, float* maxPtr, float* scalePtr, const int targetWidth,
const int targetHeight, const float* const posePtr, const int numberPeople, const float threshold,
......@@ -253,8 +233,10 @@ namespace op
const auto globalIdx = threadIdx.y * blockDim.x + threadIdx.x;
// Shared parameters
__shared__ float sharedMins[2*POSE_MAX_PEOPLE];
__shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
__shared__ float2 sharedMins[POSE_MAX_PEOPLE];
__shared__ float2 sharedMaxs[POSE_MAX_PEOPLE];
// __shared__ float sharedMins[2*POSE_MAX_PEOPLE];
// __shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
__shared__ float sharedScaleF[POSE_MAX_PEOPLE];
// Other parameters
......@@ -265,8 +247,9 @@ namespace op
const auto lineWidth = fastMinCuda(targetWidth, targetHeight) / 120.f;
// Render key points
renderKeypoints(
targetPtr, sharedMaxs, sharedMins, sharedScaleF, maxPtr, minPtr, scalePtr,
// Note: renderKeypoints is not working for videos with many people, renderKeypointsOld speed was slightly improved instead
renderKeypointsOld( // renderKeypoints(
targetPtr, sharedMaxs, sharedMins, sharedScaleF, // maxPtr, minPtr, scalePtr,
globalIdx, x, y, targetWidth, targetHeight,
posePtr, BODY_25_PAIRS_GPU, numberPeople, 25, numberPartPairs, BODY_25_COLORS, numberColors,
radius, lineWidth, BODY_25_SCALES, numberScales, threshold, alphaColorToAdd,
......@@ -283,8 +266,10 @@ namespace op
const auto globalIdx = threadIdx.y * blockDim.x + threadIdx.x;
// Shared parameters
__shared__ float sharedMins[2*POSE_MAX_PEOPLE];
__shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
__shared__ float2 sharedMins[POSE_MAX_PEOPLE];
__shared__ float2 sharedMaxs[POSE_MAX_PEOPLE];
// __shared__ float sharedMins[2*POSE_MAX_PEOPLE];
// __shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
__shared__ float sharedScaleF[POSE_MAX_PEOPLE];
// Other parameters
......@@ -295,42 +280,14 @@ namespace op
const auto lineWidth = fastMinCuda(targetWidth, targetHeight) / 120.f;
// Render key points
renderKeypoints(
targetPtr, sharedMaxs, sharedMins, sharedScaleF, maxPtr, minPtr, scalePtr,
// Note: renderKeypoints is not working for videos with many people, renderKeypointsOld speed was slightly improved instead
renderKeypointsOld( // renderKeypoints(
targetPtr, sharedMaxs, sharedMins, sharedScaleF, // maxPtr, minPtr, scalePtr,
globalIdx, x, y, targetWidth, targetHeight, posePtr, BODY_25B_PAIRS_GPU, numberPeople, 25, numberPartPairs,
BODY_25B_COLORS, numberColors, radius, lineWidth, BODY_25B_SCALES, numberScales, threshold, alphaColorToAdd,
blendOriginalFrame, (googlyEyes ? 1 : -1), (googlyEyes ? 2 : -1));
}
// __global__ void renderPoseBody135Old(
// float* targetPtr, const int targetWidth, const int targetHeight, const float* const posePtr,
// const int numberPeople, const float threshold, const bool googlyEyes, const bool blendOriginalFrame,
// const float alphaColorToAdd)
// {
// const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
// const auto y = (blockIdx.y * blockDim.y) + threadIdx.y;
// const auto globalIdx = threadIdx.y * blockDim.x + threadIdx.x;
// // Shared parameters
// __shared__ float2 sharedMins[POSE_MAX_PEOPLE];
// __shared__ float2 sharedMaxs[POSE_MAX_PEOPLE];
// __shared__ float sharedScaleF[POSE_MAX_PEOPLE];
// // Other parameters
// const auto numberPartPairs = sizeof(BODY_135_PAIRS_GPU) / (2*sizeof(BODY_135_PAIRS_GPU[0]));
// const auto numberScales = sizeof(BODY_135_SCALES) / sizeof(BODY_135_SCALES[0]);
// const auto numberColors = sizeof(BODY_135_COLORS) / (3*sizeof(BODY_135_COLORS[0]));
// const auto radius = fastMinCuda(targetWidth, targetHeight) / 100.f;
// const auto lineWidth = fastMinCuda(targetWidth, targetHeight) / 120.f;
// // Render key points
// renderKeypointsOld(
// targetPtr, sharedMaxs, sharedMins, sharedScaleF, globalIdx, x, y, targetWidth, targetHeight, posePtr,
// BODY_135_PAIRS_GPU, numberPeople, 135, numberPartPairs, BODY_135_COLORS, numberColors, radius, lineWidth,
// BODY_135_SCALES, numberScales, threshold, alphaColorToAdd, blendOriginalFrame, (googlyEyes ? 1 : -1),
// (googlyEyes ? 2 : -1));
// }
__global__ void renderPoseBody135(
float* targetPtr, float* minPtr, float* maxPtr, float* scalePtr, const int targetWidth, const int targetHeight,
const float* const posePtr, const int numberPeople, const float threshold, const bool googlyEyes,
......@@ -341,8 +298,10 @@ namespace op
const auto globalIdx = threadIdx.y * blockDim.x + threadIdx.x;
// Shared parameters
__shared__ float sharedMins[2*POSE_MAX_PEOPLE];
__shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
__shared__ float2 sharedMins[POSE_MAX_PEOPLE];
__shared__ float2 sharedMaxs[POSE_MAX_PEOPLE];
// __shared__ float sharedMins[2*POSE_MAX_PEOPLE];
// __shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
__shared__ float sharedScaleF[POSE_MAX_PEOPLE];
// Other parameters
......@@ -353,11 +312,12 @@ namespace op
const auto lineWidth = fastMinCuda(targetWidth, targetHeight) / 120.f;
// Render key points
renderKeypoints(
targetPtr, sharedMaxs, sharedMins, sharedScaleF, maxPtr, minPtr, scalePtr, globalIdx, x, y, targetWidth,
targetHeight, posePtr, BODY_135_PAIRS_GPU, numberPeople, 135, numberPartPairs, BODY_135_COLORS,
numberColors, radius, lineWidth, BODY_135_SCALES, numberScales, threshold, alphaColorToAdd,
blendOriginalFrame, (googlyEyes ? 1 : -1), (googlyEyes ? 2 : -1));
// Note: renderKeypoints is not working for videos with many people, renderKeypointsOld speed was slightly improved instead
renderKeypointsOld( // renderKeypoints(
targetPtr, sharedMaxs, sharedMins, sharedScaleF, // maxPtr, minPtr, scalePtr,
globalIdx, x, y, targetWidth, targetHeight, posePtr, BODY_135_PAIRS_GPU, numberPeople, 135,
numberPartPairs, BODY_135_COLORS, numberColors, radius, lineWidth, BODY_135_SCALES, numberScales,
threshold, alphaColorToAdd, blendOriginalFrame, (googlyEyes ? 1 : -1), (googlyEyes ? 2 : -1));
}
__global__ void renderPoseMpi29Parts(
......@@ -370,8 +330,10 @@ namespace op
const auto globalIdx = threadIdx.y * blockDim.x + threadIdx.x;
// Shared parameters
__shared__ float sharedMins[2*POSE_MAX_PEOPLE];
__shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
__shared__ float2 sharedMins[POSE_MAX_PEOPLE];
__shared__ float2 sharedMaxs[POSE_MAX_PEOPLE];
// __shared__ float sharedMins[2*POSE_MAX_PEOPLE];
// __shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
__shared__ float sharedScaleF[POSE_MAX_PEOPLE];
// Other parameters
......@@ -382,10 +344,12 @@ namespace op
const auto lineWidth = fastMinCuda(targetWidth, targetHeight) / 120.f;
// Render key points
renderKeypoints(
targetPtr, sharedMaxs, sharedMins, sharedScaleF, maxPtr, minPtr, scalePtr, globalIdx, x, y, targetWidth,
targetHeight, posePtr, MPI_PAIRS_GPU, numberPeople, 15, numberPartPairs, MPI_COLORS, numberColors,
radius, lineWidth, COCO_SCALES, numberScales, threshold, alphaColorToAdd, blendOriginalFrame);
// Note: renderKeypoints is not working for videos with many people, renderKeypointsOld speed was slightly improved instead
renderKeypointsOld( // renderKeypoints(
targetPtr, sharedMaxs, sharedMins, sharedScaleF, // maxPtr, minPtr, scalePtr,
globalIdx, x, y, targetWidth, targetHeight, posePtr, MPI_PAIRS_GPU, numberPeople, 15, numberPartPairs,
MPI_COLORS, numberColors, radius, lineWidth, COCO_SCALES, numberScales, threshold, alphaColorToAdd,
blendOriginalFrame);
}
__global__ void renderPoseCar12(
......@@ -398,8 +362,10 @@ namespace op
const auto globalIdx = threadIdx.y * blockDim.x + threadIdx.x;
// Shared parameters
__shared__ float sharedMins[2*POSE_MAX_PEOPLE];
__shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
__shared__ float2 sharedMins[POSE_MAX_PEOPLE];
__shared__ float2 sharedMaxs[POSE_MAX_PEOPLE];
// __shared__ float sharedMins[2*POSE_MAX_PEOPLE];
// __shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
__shared__ float sharedScaleF[POSE_MAX_PEOPLE];
// Other parameters
......@@ -410,11 +376,12 @@ namespace op
const auto lineWidth = fastMinCuda(targetWidth, targetHeight) / 120.f;
// Render key points
renderKeypoints(
targetPtr, sharedMaxs, sharedMins, sharedScaleF, maxPtr, minPtr, scalePtr, globalIdx, x, y, targetWidth,
targetHeight, posePtr, CAR_12_PAIRS_GPU, numberPeople, 12, numberPartPairs, CAR_12_COLORS, numberColors,
radius, lineWidth, CAR_12_SCALES, numberScales, threshold, alphaColorToAdd, blendOriginalFrame,
(googlyEyes ? 4 : -1), (googlyEyes ? 5 : -1));
// Note: renderKeypoints is not working for videos with many people, renderKeypointsOld speed was slightly improved instead
renderKeypointsOld( // renderKeypoints(
targetPtr, sharedMaxs, sharedMins, sharedScaleF, // maxPtr, minPtr, scalePtr,
globalIdx, x, y, targetWidth, targetHeight, posePtr, CAR_12_PAIRS_GPU, numberPeople, 12, numberPartPairs,
CAR_12_COLORS, numberColors, radius, lineWidth, CAR_12_SCALES, numberScales, threshold, alphaColorToAdd,
blendOriginalFrame, (googlyEyes ? 4 : -1), (googlyEyes ? 5 : -1));
}
__global__ void renderPoseCar22(
......@@ -427,8 +394,10 @@ namespace op
const auto globalIdx = threadIdx.y * blockDim.x + threadIdx.x;
// Shared parameters
__shared__ float sharedMins[2*POSE_MAX_PEOPLE];
__shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
__shared__ float2 sharedMins[POSE_MAX_PEOPLE];
__shared__ float2 sharedMaxs[POSE_MAX_PEOPLE];
// __shared__ float sharedMins[2*POSE_MAX_PEOPLE];
// __shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
__shared__ float sharedScaleF[POSE_MAX_PEOPLE];
// Other parameters
......@@ -439,11 +408,12 @@ namespace op
const auto lineWidth = fastMinCuda(targetWidth, targetHeight) / 120.f;
// Render key points
renderKeypoints(
targetPtr, sharedMaxs, sharedMins, sharedScaleF, maxPtr, minPtr, scalePtr, globalIdx, x, y, targetWidth,
targetHeight, posePtr, CAR_22_PAIRS_GPU, numberPeople, 22, numberPartPairs, CAR_22_COLORS, numberColors,
radius, lineWidth, CAR_22_SCALES, numberScales, threshold, alphaColorToAdd, blendOriginalFrame,
(googlyEyes ? 6 : -1), (googlyEyes ? 7 : -1));
// Note: renderKeypoints is not working for videos with many people, renderKeypointsOld speed was slightly improved instead
renderKeypointsOld( // renderKeypoints(
targetPtr, sharedMaxs, sharedMins, sharedScaleF, // maxPtr, minPtr, scalePtr,
globalIdx, x, y, targetWidth, targetHeight, posePtr, CAR_22_PAIRS_GPU, numberPeople, 22, numberPartPairs,
CAR_22_COLORS, numberColors, radius, lineWidth, CAR_22_SCALES, numberScales, threshold, alphaColorToAdd,
blendOriginalFrame, (googlyEyes ? 6 : -1), (googlyEyes ? 7 : -1));
}
__global__ void renderBodyPartHeatMaps(float* targetPtr, const int targetWidth, const int targetHeight,
......@@ -654,16 +624,17 @@ namespace op
error("Rendering assumes that numberPeople <= POSE_MAX_PEOPLE = " + std::to_string(POSE_MAX_PEOPLE)
+ ".", __LINE__, __FUNCTION__, __FILE__);
//// Get bounding box per person
//const dim3 threadsPerBlockBoundBox = {1, 1, 1};
//const dim3 numBlocksBox{getNumberCudaBlocks(POSE_MAX_PEOPLE, threadsPerBlockBoundBox.x)};
//getBoundingBoxPerPersonPose<<<threadsPerBlockBoundBox, numBlocksBox>>>(
// maxPtr, minPtr, scalePtr, frameSize.x, frameSize.y, posePtr, numberPeople,
// getPoseNumberBodyParts(poseModel), renderThreshold);
// Body pose
dim3 threadsPerBlock;
dim3 numBlocks;
getNumberCudaThreadsAndBlocks(threadsPerBlock, numBlocks, frameSize);
// Body pose
const dim3 threadsPerBlockBoundBox = {1, 1, 1};
const dim3 numBlocksBox{getNumberCudaBlocks(POSE_MAX_PEOPLE, threadsPerBlockBoundBox.x)};
getBoundingBoxPerPersonPose<<<threadsPerBlockBoundBox, numBlocksBox>>>(
maxPtr, minPtr, scalePtr, frameSize.x, frameSize.y, posePtr, numberPeople,
getPoseNumberBodyParts(poseModel), renderThreshold);
if (poseModel == PoseModel::BODY_25 || poseModel == PoseModel::BODY_25D
|| poseModel == PoseModel::BODY_25E)
{
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册