提交 27903fb3 编写于 作者: G gineshidalgo99

Part candidates at same scale than keypoints

上级 29ea7e24
...@@ -74,7 +74,7 @@ In order to verify that the camera parameters introduced by the user are sorted ...@@ -74,7 +74,7 @@ In order to verify that the camera parameters introduced by the user are sorted
## Installing the OpenPose 3-D Reconstruction Module ## Installing the OpenPose 3-D Reconstruction Module
Check the [doc/installation.md#3d-reconstruction-module](./quick_start.md#3d-reconstruction-module) for installation steps. Check the [doc/installation.md#3d-reconstruction-module](./installation.md#3d-reconstruction-module) for installation steps.
......
...@@ -218,6 +218,7 @@ OpenPose Library - Release Notes ...@@ -218,6 +218,7 @@ OpenPose Library - Release Notes
1. Removed scale parameter from hand and face rectangle extractor (causing wrong results if custom `--output_resolution`). 1. Removed scale parameter from hand and face rectangle extractor (causing wrong results if custom `--output_resolution`).
3. Main bugs fixed: 3. Main bugs fixed:
1. Hand and face work properly again with any `--output_resolution`. 1. Hand and face work properly again with any `--output_resolution`.
2. Part candidates (`--part_candidates`) are saved with the same scale than the final keypoints itself.
......
...@@ -17,6 +17,9 @@ namespace op ...@@ -17,6 +17,9 @@ namespace op
void scale(std::vector<Array<float>>& arraysToScale, const double scaleInputToOutput, void scale(std::vector<Array<float>>& arraysToScale, const double scaleInputToOutput,
const double scaleNetToOutput, const Point<int>& producerSize) const; const double scaleNetToOutput, const Point<int>& producerSize) const;
void scale(std::vector<std::vector<std::array<float,3>>>& poseCandidates, const double scaleInputToOutput,
const double scaleNetToOutput, const Point<int>& producerSize) const;
private: private:
const ScaleMode mScaleMode; const ScaleMode mScaleMode;
}; };
......
...@@ -59,6 +59,9 @@ namespace op ...@@ -59,6 +59,9 @@ namespace op
tDatum.handKeypoints[1], tDatum.faceKeypoints}; tDatum.handKeypoints[1], tDatum.faceKeypoints};
spKeypointScaler->scale(arraysToScale, tDatum.scaleInputToOutput, tDatum.scaleNetToOutput, spKeypointScaler->scale(arraysToScale, tDatum.scaleInputToOutput, tDatum.scaleNetToOutput,
Point<int>{tDatum.cvInputData.cols, tDatum.cvInputData.rows}); Point<int>{tDatum.cvInputData.cols, tDatum.cvInputData.rows});
// Rescale part candidates
spKeypointScaler->scale(tDatum.poseCandidates, tDatum.scaleInputToOutput, tDatum.scaleNetToOutput,
Point<int>{tDatum.cvInputData.cols, tDatum.cvInputData.rows});
} }
// Profiling speed // Profiling speed
Profiler::timerEnd(profilerKey); Profiler::timerEnd(profilerKey);
......
...@@ -7,16 +7,18 @@ namespace op ...@@ -7,16 +7,18 @@ namespace op
{ {
template <typename T> template <typename T>
OP_API void nmsCpu(T* targetPtr, int* kernelPtr, const T* const sourcePtr, const T threshold, OP_API void nmsCpu(T* targetPtr, int* kernelPtr, const T* const sourcePtr, const T threshold,
const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize); const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize,
const Point<T>& offset);
template <typename T> template <typename T>
OP_API void nmsGpu(T* targetPtr, int* kernelPtr, const T* const sourcePtr, const T threshold, OP_API void nmsGpu(T* targetPtr, int* kernelPtr, const T* const sourcePtr, const T threshold,
const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize); const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize,
const Point<T>& offset);
template <typename T> template <typename T>
OP_API void nmsOcl(T* targetPtr, int* kernelPtr, const T* const sourcePtr, const T threshold, OP_API void nmsOcl(T* targetPtr, int* kernelPtr, const T* const sourcePtr, const T threshold,
const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize, const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize,
const int gpuID = 0); const Point<T>& offset, const int gpuID = 0);
} }
#endif // OPENPOSE_NET_NMS_BASE_HPP #endif // OPENPOSE_NET_NMS_BASE_HPP
...@@ -25,6 +25,9 @@ namespace op ...@@ -25,6 +25,9 @@ namespace op
void setThreshold(const T threshold); void setThreshold(const T threshold);
// Empirically gives better results (copied from Matlab original code)
void setOffset(const Point<T>& offset);
virtual void Forward_cpu(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top); virtual void Forward_cpu(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top);
virtual void Forward_gpu(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top); virtual void Forward_gpu(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top);
...@@ -39,6 +42,7 @@ namespace op ...@@ -39,6 +42,7 @@ namespace op
private: private:
T mThreshold; T mThreshold;
Point<T> mOffset;
int mGpuID; int mGpuID;
// PIMPL idiom // PIMPL idiom
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
namespace op namespace op
{ {
OP_API void putTextOnCvMat(cv::Mat& cvMat, const std::string& textToDisplay, const Point<int>& position, OP_API void putTextOnCvMat(cv::Mat& cvMat, const std::string& textToDisplay, const Point<int>& position,
const cv::Scalar& color, const bool normalizeWidth, const int imageWidth); const cv::Scalar& color, const bool normalizeWidth, const int imageWidth);
OP_API void floatPtrToUCharCvMat(cv::Mat& uCharCvMat, const float* const floatPtrImage, OP_API void floatPtrToUCharCvMat(cv::Mat& uCharCvMat, const float* const floatPtrImage,
const std::array<int, 3> resolutionSize); const std::array<int, 3> resolutionSize);
......
...@@ -3,6 +3,40 @@ ...@@ -3,6 +3,40 @@
namespace op namespace op
{ {
Rectangle<float> getScaleAndOffset(const ScaleMode scaleMode, const double scaleInputToOutput,
const double scaleNetToOutput, const Point<int>& producerSize)
{
try
{
// OutputResolution
if (scaleMode == ScaleMode::OutputResolution)
return Rectangle<float>{0.f, 0.f, float(scaleInputToOutput), float(scaleInputToOutput)};
// NetOutputResolution
else if (scaleMode == ScaleMode::NetOutputResolution)
return Rectangle<float>{0.f, 0.f, float(1./scaleNetToOutput),
float(1./scaleNetToOutput)};
// [0,1]
else if (scaleMode == ScaleMode::ZeroToOne)
return Rectangle<float>{0.f, 0.f, 1.f / ((float)producerSize.x - 1.f),
1.f / ((float)producerSize.y - 1.f)};
// [-1,1]
else if (scaleMode == ScaleMode::PlusMinusOne)
return Rectangle<float>{-1.f, -1.f, 2.f / ((float)producerSize.x - 1.f),
2.f / ((float)producerSize.y - 1.f)};
// InputResolution
else if (scaleMode == ScaleMode::InputResolution)
return Rectangle<float>{0.f, 0.f, 1.f, 1.f};
// Unknown
error("Unknown ScaleMode selected.", __LINE__, __FUNCTION__, __FILE__);
return Rectangle<float>{};
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
return Rectangle<float>{};
}
}
KeypointScaler::KeypointScaler(const ScaleMode scaleMode) : KeypointScaler::KeypointScaler(const ScaleMode scaleMode) :
mScaleMode{scaleMode} mScaleMode{scaleMode}
{ {
...@@ -29,38 +63,61 @@ namespace op ...@@ -29,38 +63,61 @@ namespace op
{ {
if (mScaleMode != ScaleMode::InputResolution) if (mScaleMode != ScaleMode::InputResolution)
{ {
// OutputResolution // Get scale and offset
if (mScaleMode == ScaleMode::OutputResolution) const auto scaleAndOffset = getScaleAndOffset(mScaleMode, scaleInputToOutput, scaleNetToOutput,
{ producerSize);
// Only scaling
if (scaleAndOffset.x == 0 && scaleAndOffset.y == 0)
for (auto& arrayToScale : arrayToScalesToScale) for (auto& arrayToScale : arrayToScalesToScale)
scaleKeypoints(arrayToScale, float(scaleInputToOutput)); scaleKeypoints(arrayToScale, scaleAndOffset.width, scaleAndOffset.height);
} // Scaling + offset
// NetOutputResolution else
else if (mScaleMode == ScaleMode::NetOutputResolution)
{
for (auto& arrayToScale : arrayToScalesToScale) for (auto& arrayToScale : arrayToScalesToScale)
scaleKeypoints(arrayToScale, float(1./scaleNetToOutput)); scaleKeypoints(arrayToScale, scaleAndOffset.width, scaleAndOffset.height,
} scaleAndOffset.x, scaleAndOffset.y);
// [0,1] }
else if (mScaleMode == ScaleMode::ZeroToOne) }
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
void KeypointScaler::scale(std::vector<std::vector<std::array<float,3>>>& poseCandidates,
const double scaleInputToOutput, const double scaleNetToOutput,
const Point<int>& producerSize) const
{
try
{
if (mScaleMode != ScaleMode::InputResolution)
{
// Get scale and offset
const auto scaleAndOffset = getScaleAndOffset(mScaleMode, scaleInputToOutput, scaleNetToOutput,
producerSize);
// Only scaling
if (scaleAndOffset.x == 0 && scaleAndOffset.y == 0)
{ {
const auto scaleX = 1.f / ((float)producerSize.x - 1.f); for (auto& partCandidates : poseCandidates)
const auto scaleY = 1.f / ((float)producerSize.y - 1.f); {
for (auto& arrayToScale : arrayToScalesToScale) for (auto& candidate : partCandidates)
scaleKeypoints(arrayToScale, scaleX, scaleY); {
candidate[0] *= scaleAndOffset.width;
candidate[1] *= scaleAndOffset.height;
}
}
} }
// [-1,1] // Scaling + offset
else if (mScaleMode == ScaleMode::PlusMinusOne) else
{ {
const auto scaleX = (2.f / ((float)producerSize.x - 1.f)); for (auto& partCandidates : poseCandidates)
const auto scaleY = (2.f / ((float)producerSize.y - 1.f)); {
const auto offset = -1.f; for (auto& candidate : partCandidates)
for (auto& arrayToScale : arrayToScalesToScale) {
scaleKeypoints(arrayToScale, scaleX, scaleY, offset, offset); candidate[0] = candidate[0]*scaleAndOffset.width + scaleAndOffset.x;
candidate[1] = candidate[1]*scaleAndOffset.height + scaleAndOffset.y;
}
}
} }
// Unknown
else
error("Unknown ScaleMode selected.", __LINE__, __FUNCTION__, __FILE__);
} }
} }
catch (const std::exception& e) catch (const std::exception& e)
......
...@@ -68,8 +68,8 @@ namespace op ...@@ -68,8 +68,8 @@ namespace op
} }
template <typename T> template <typename T>
void nmsAccuratePeakPosition(const T* const sourcePtr, const int& peakLocX, const int& peakLocY, void nmsAccuratePeakPosition(T* output, const T* const sourcePtr, const int& peakLocX, const int& peakLocY,
const int& width, const int& height, T* output) const int& width, const int& height, const Point<T>& offset)
{ {
T xAcc = 0.f; T xAcc = 0.f;
T yAcc = 0.f; T yAcc = 0.f;
...@@ -98,14 +98,18 @@ namespace op ...@@ -98,14 +98,18 @@ namespace op
} }
} }
output[0] = xAcc / scoreAcc; // Offset to keep Matlab format (empirically higher acc)
output[1] = yAcc / scoreAcc; // Best results for 1 scale: x + 0, y + 0.5
// +0.5 to both to keep Matlab format
output[0] = xAcc / scoreAcc + offset.x;
output[1] = yAcc / scoreAcc + offset.y;
output[2] = sourcePtr[peakLocY*width + peakLocX]; output[2] = sourcePtr[peakLocY*width + peakLocX];
} }
template <typename T> template <typename T>
void nmsCpu(T* targetPtr, int* kernelPtr, const T* const sourcePtr, const T threshold, void nmsCpu(T* targetPtr, int* kernelPtr, const T* const sourcePtr, const T threshold,
const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize) const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize,
const Point<T>& offset)
{ {
try try
{ {
...@@ -149,8 +153,8 @@ namespace op ...@@ -149,8 +153,8 @@ namespace op
if (currKernelPtr[index] == 1) if (currKernelPtr[index] == 1)
{ {
// Accurate Peak Position // Accurate Peak Position
nmsAccuratePeakPosition(currSourcePtr, x, y, sourceWidth, sourceHeight, nmsAccuratePeakPosition(&currTargetPtr[currentPeakCount*3], currSourcePtr, x, y,
&currTargetPtr[currentPeakCount*3]); sourceWidth, sourceHeight, offset);
currentPeakCount++; currentPeakCount++;
} }
} }
...@@ -167,7 +171,9 @@ namespace op ...@@ -167,7 +171,9 @@ namespace op
} }
template void nmsCpu(float* targetPtr, int* kernelPtr, const float* const sourcePtr, const float threshold, template void nmsCpu(float* targetPtr, int* kernelPtr, const float* const sourcePtr, const float threshold,
const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize); const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize,
const Point<float>& offset);
template void nmsCpu(double* targetPtr, int* kernelPtr, const double* const sourcePtr, const double threshold, template void nmsCpu(double* targetPtr, int* kernelPtr, const double* const sourcePtr, const double threshold,
const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize); const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize,
const Point<double>& offset);
} }
...@@ -48,7 +48,8 @@ namespace op ...@@ -48,7 +48,8 @@ namespace op
template <typename T> template <typename T>
__global__ void writeResultKernel(T* output, const int length, const int* const kernelPtr, __global__ void writeResultKernel(T* output, const int length, const int* const kernelPtr,
const T* const sourcePtr, const int width, const int height, const int maxPeaks) const T* const sourcePtr, const int width, const int height, const int maxPeaks,
const T offsetX, const T offsetY)
{ {
__shared__ int local[THREADS_PER_BLOCK+1]; // one more __shared__ int local[THREADS_PER_BLOCK+1]; // one more
const auto globalIdx = blockIdx.x * blockDim.x + threadIdx.x; const auto globalIdx = blockIdx.x * blockDim.x + threadIdx.x;
...@@ -101,9 +102,12 @@ namespace op ...@@ -101,9 +102,12 @@ namespace op
} }
} }
// Offset to keep Matlab format (empirically higher acc)
// Best results for 1 scale: x + 0, y + 0.5
// +0.5 to both to keep Matlab format
const auto outputIndex = (peakIndex + 1) * 3; const auto outputIndex = (peakIndex + 1) * 3;
output[outputIndex] = xAcc / scoreAcc; output[outputIndex] = xAcc / scoreAcc + offsetX;
output[outputIndex + 1] = yAcc / scoreAcc; output[outputIndex + 1] = yAcc / scoreAcc + offsetY;
output[outputIndex + 2] = sourcePtr[peakLocY*width + peakLocX]; output[outputIndex + 2] = sourcePtr[peakLocY*width + peakLocX];
} }
} }
...@@ -115,7 +119,7 @@ namespace op ...@@ -115,7 +119,7 @@ namespace op
template <typename T> template <typename T>
void nmsGpu(T* targetPtr, int* kernelPtr, const T* const sourcePtr, const T threshold, void nmsGpu(T* targetPtr, int* kernelPtr, const T* const sourcePtr, const T threshold,
const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize) const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize, const Point<T>& offset)
{ {
try try
{ {
...@@ -177,7 +181,7 @@ namespace op ...@@ -177,7 +181,7 @@ namespace op
// This returns targetPtrOffsetted, with the NMS applied over it // This returns targetPtrOffsetted, with the NMS applied over it
writeResultKernel<<<numBlocks1D, threadsPerBlock1D>>>(targetPtrOffsetted, imageOffset, writeResultKernel<<<numBlocks1D, threadsPerBlock1D>>>(targetPtrOffsetted, imageOffset,
kernelPtrOffsetted, sourcePtrOffsetted, kernelPtrOffsetted, sourcePtrOffsetted,
width, height, maxPeaks); width, height, maxPeaks, offset.x, offset.y);
} }
} }
cudaCheck(__LINE__, __FUNCTION__, __FILE__); cudaCheck(__LINE__, __FUNCTION__, __FILE__);
...@@ -189,7 +193,9 @@ namespace op ...@@ -189,7 +193,9 @@ namespace op
} }
template void nmsGpu(float* targetPtr, int* kernelPtr, const float* const sourcePtr, const float threshold, template void nmsGpu(float* targetPtr, int* kernelPtr, const float* const sourcePtr, const float threshold,
const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize); const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize,
const Point<float>& offset);
template void nmsGpu(double* targetPtr, int* kernelPtr, const double* const sourcePtr, const double threshold, template void nmsGpu(double* targetPtr, int* kernelPtr, const double* const sourcePtr, const double threshold,
const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize); const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize,
const Point<double>& offset);
} }
...@@ -13,8 +13,9 @@ namespace op ...@@ -13,8 +13,9 @@ namespace op
{ {
#ifdef USE_OPENCL #ifdef USE_OPENCL
const std::string nmsOclCommonFunctions = MULTI_LINE_STRING( const std::string nmsOclCommonFunctions = MULTI_LINE_STRING(
void nmsAccuratePeakPosition(__global const Type* sourcePtr, const int peakLocX, const int peakLocY, void nmsAccuratePeakPosition(__global const Type* sourcePtr, Type* fx, Type* fy, Type* fscore,
const int width, const int height, Type* fx, Type* fy, Type* fscore) const int peakLocX, const int peakLocY, const int width, const int height,
const T offsetX, const T offsetY)
{ {
Type xAcc = 0.f; Type xAcc = 0.f;
Type yAcc = 0.f; Type yAcc = 0.f;
...@@ -43,8 +44,11 @@ namespace op ...@@ -43,8 +44,11 @@ namespace op
} }
} }
*fx = xAcc / scoreAcc; // Offset to keep Matlab format (empirically higher acc)
*fy = yAcc / scoreAcc; // Best results for 1 scale: x + 0, y + 0.5
// +0.5 to both to keep Matlab format
*fx = xAcc / scoreAcc + offsetX;
*fy = yAcc / scoreAcc + offsetY;
*fscore = sourcePtr[peakLocY*width + peakLocX]; *fscore = sourcePtr[peakLocY*width + peakLocX];
} }
...@@ -85,10 +89,7 @@ namespace op ...@@ -85,10 +89,7 @@ namespace op
&& value > left && value > right && value > left && value > right
&& value > bottomLeft && value > bottom && value > bottomRight) && value > bottomLeft && value > bottom && value > bottomRight)
{ {
//Type fx = 0; Type fy = 0; Type fscore = 0;
//nmsAccuratePeakPosition(sourcePtr, x, y, w, h, &fx, &fy, &fscore);
kernelPtr[index] = 1; kernelPtr[index] = 1;
//if(debug) printf("%d %d \n", x,y);
} }
else else
kernelPtr[index] = 0; kernelPtr[index] = 0;
...@@ -104,7 +105,8 @@ namespace op ...@@ -104,7 +105,8 @@ namespace op
typedef cl::KernelFunctor<cl::Buffer, cl::Buffer, cl::Buffer, int, int, int, int> NMSWriteKernelFunctor; typedef cl::KernelFunctor<cl::Buffer, cl::Buffer, cl::Buffer, int, int, int, int> NMSWriteKernelFunctor;
const std::string nmsWriteKernel = MULTI_LINE_STRING( const std::string nmsWriteKernel = MULTI_LINE_STRING(
__kernel void nmsWriteKernel(__global Type* targetPtr, __global int* kernelPtr, __global const Type* sourcePtr, __kernel void nmsWriteKernel(__global Type* targetPtr, __global int* kernelPtr, __global const Type* sourcePtr,
const int w, const int h, const int maxPeaks, const int debug) const int w, const int h, const int maxPeaks, const int debug,
const T offsetX, const T offsetY)
{ {
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
...@@ -118,7 +120,7 @@ namespace op ...@@ -118,7 +120,7 @@ namespace op
if (prev - curr) if (prev - curr)
{ {
Type fx = 0; Type fy = 0; Type fscore = 0; Type fx = 0; Type fy = 0; Type fscore = 0;
nmsAccuratePeakPosition(sourcePtr, x, y, w, h, &fx, &fy, &fscore); nmsAccuratePeakPosition(sourcePtr, &fx, &fy, &fscore, x, y, w, h, offsetX, offsetY);
//if (debug) printf("C %d %d %d \n", x,y,kernelPtr[index]); //if (debug) printf("C %d %d %d \n", x,y,kernelPtr[index]);
__global Type* output = &targetPtr[curr*3]; __global Type* output = &targetPtr[curr*3];
output[0] = fx; output[1] = fy; output[2] = fscore; output[0] = fx; output[1] = fy; output[2] = fscore;
...@@ -144,7 +146,8 @@ namespace op ...@@ -144,7 +146,8 @@ namespace op
template <typename T> template <typename T>
void nmsOcl(T* targetPtr, int* kernelPtr, const T* const sourcePtr, const T threshold, void nmsOcl(T* targetPtr, int* kernelPtr, const T* const sourcePtr, const T threshold,
const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize, const int gpuID) const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize, const Point<T>& offset,
const int gpuID)
{ {
try try
{ {
...@@ -229,7 +232,8 @@ namespace op ...@@ -229,7 +232,8 @@ namespace op
sizeof(int) * width * height, &kernelCPU[0]); sizeof(int) * width * height, &kernelCPU[0]);
// Write Kernel // Write Kernel
nmsWriteKernel(cl::EnqueueArgs(op::OpenCL::getInstance(gpuID)->getQueue(), cl::NDRange(width, height)), nmsWriteKernel(cl::EnqueueArgs(op::OpenCL::getInstance(gpuID)->getQueue(), cl::NDRange(width, height)),
targetBuffer, kernelBuffer, sourceBuffer, width, height, targetPeaks-1, debug); targetBuffer, kernelBuffer, sourceBuffer, width, height, targetPeaks-1, debug,
offset.x, offset.y);
} }
} }
#else #else
...@@ -239,6 +243,7 @@ namespace op ...@@ -239,6 +243,7 @@ namespace op
UNUSED(threshold); UNUSED(threshold);
UNUSED(targetSize); UNUSED(targetSize);
UNUSED(sourceSize); UNUSED(sourceSize);
UNUSED(offset);
UNUSED(gpuID); UNUSED(gpuID);
error("OpenPose must be compiled with the `USE_OPENCL` macro definition in order to use this" error("OpenPose must be compiled with the `USE_OPENCL` macro definition in order to use this"
" functionality.", __LINE__, __FUNCTION__, __FILE__); " functionality.", __LINE__, __FUNCTION__, __FILE__);
...@@ -258,7 +263,9 @@ namespace op ...@@ -258,7 +263,9 @@ namespace op
} }
template void nmsOcl(float* targetPtr, int* kernelPtr, const float* const sourcePtr, const float threshold, template void nmsOcl(float* targetPtr, int* kernelPtr, const float* const sourcePtr, const float threshold,
const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize, int gpuID); const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize,
const Point<float>& offset, const int gpuID);
template void nmsOcl(double* targetPtr, int* kernelPtr, const double* const sourcePtr, const double threshold, template void nmsOcl(double* targetPtr, int* kernelPtr, const double* const sourcePtr, const double threshold,
const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize, int gpuID); const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize,
const Point<double>& offset, const int gpuID);
} }
...@@ -127,6 +127,19 @@ namespace op ...@@ -127,6 +127,19 @@ namespace op
} }
} }
template <typename T>
void NmsCaffe<T>::setOffset(const Point<T>& offset)
{
try
{
mOffset = {offset};
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
template <typename T> template <typename T>
void NmsCaffe<T>::Forward_cpu(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top) void NmsCaffe<T>::Forward_cpu(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top)
{ {
...@@ -134,7 +147,7 @@ namespace op ...@@ -134,7 +147,7 @@ namespace op
{ {
#ifdef USE_CAFFE #ifdef USE_CAFFE
nmsCpu(top.at(0)->mutable_cpu_data(), upImpl->mKernelBlob.mutable_cpu_data(), bottom.at(0)->cpu_data(), nmsCpu(top.at(0)->mutable_cpu_data(), upImpl->mKernelBlob.mutable_cpu_data(), bottom.at(0)->cpu_data(),
mThreshold, upImpl->mTopSize, upImpl->mBottomSize); mThreshold, upImpl->mTopSize, upImpl->mBottomSize, mOffset);
#else #else
UNUSED(bottom); UNUSED(bottom);
UNUSED(top); UNUSED(top);
...@@ -153,7 +166,7 @@ namespace op ...@@ -153,7 +166,7 @@ namespace op
{ {
#if defined USE_CAFFE && defined USE_CUDA #if defined USE_CAFFE && defined USE_CUDA
nmsGpu(top.at(0)->mutable_gpu_data(), upImpl->mKernelBlob.mutable_gpu_data(), nmsGpu(top.at(0)->mutable_gpu_data(), upImpl->mKernelBlob.mutable_gpu_data(),
bottom.at(0)->gpu_data(), mThreshold, upImpl->mTopSize, upImpl->mBottomSize); bottom.at(0)->gpu_data(), mThreshold, upImpl->mTopSize, upImpl->mBottomSize, mOffset);
#else #else
UNUSED(bottom); UNUSED(bottom);
UNUSED(top); UNUSED(top);
...@@ -174,7 +187,8 @@ namespace op ...@@ -174,7 +187,8 @@ namespace op
{ {
#if defined USE_CAFFE && defined USE_OPENCL #if defined USE_CAFFE && defined USE_OPENCL
nmsOcl(top.at(0)->mutable_gpu_data(), upImpl->mKernelBlobT->mutable_gpu_data(), nmsOcl(top.at(0)->mutable_gpu_data(), upImpl->mKernelBlobT->mutable_gpu_data(),
bottom.at(0)->gpu_data(), mThreshold, upImpl->mTopSize, upImpl->mBottomSize, mGpuID); bottom.at(0)->gpu_data(), mThreshold, upImpl->mTopSize, upImpl->mBottomSize, mOffset,
mGpuID);
#else #else
UNUSED(bottom); UNUSED(bottom);
UNUSED(top); UNUSED(top);
......
...@@ -332,10 +332,8 @@ namespace op ...@@ -332,10 +332,8 @@ namespace op
const auto bodyPartIndex = subsetI[bodyPart]; const auto bodyPartIndex = subsetI[bodyPart];
if (bodyPartIndex > 0) if (bodyPartIndex > 0)
{ {
// Best results for 1 scale: x + 0, y + 0.5 poseKeypoints[baseOffset] = peaksPtr[bodyPartIndex-2] * scaleFactor;
// +0.5 to both to keep Matlab format poseKeypoints[baseOffset + 1] = peaksPtr[bodyPartIndex-1] * scaleFactor;
poseKeypoints[baseOffset] = peaksPtr[bodyPartIndex-2] * scaleFactor + 0.5f;
poseKeypoints[baseOffset + 1] = peaksPtr[bodyPartIndex-1] * scaleFactor + 0.5f;
poseKeypoints[baseOffset + 2] = peaksPtr[bodyPartIndex]; poseKeypoints[baseOffset + 2] = peaksPtr[bodyPartIndex];
} }
else else
......
...@@ -334,10 +334,8 @@ namespace op ...@@ -334,10 +334,8 @@ namespace op
const auto bodyPartIndex = subsetI[bodyPart]; const auto bodyPartIndex = subsetI[bodyPart];
if (bodyPartIndex > 0) if (bodyPartIndex > 0)
{ {
// Best results for 1 scale: x + 0, y + 0.5 poseKeypoints[baseOffset] = peaksPtr[bodyPartIndex-2] * scaleFactor;
// +0.5 to both to keep Matlab format poseKeypoints[baseOffset + 1] = peaksPtr[bodyPartIndex-1] * scaleFactor;
poseKeypoints[baseOffset] = peaksPtr[bodyPartIndex-2] * scaleFactor + 0.5f;
poseKeypoints[baseOffset + 1] = peaksPtr[bodyPartIndex-1] * scaleFactor + 0.5f;
poseKeypoints[baseOffset + 2] = peaksPtr[bodyPartIndex]; poseKeypoints[baseOffset + 2] = peaksPtr[bodyPartIndex];
} }
else else
......
...@@ -238,8 +238,8 @@ namespace op ...@@ -238,8 +238,8 @@ namespace op
candidates[part].resize(numberPartCandidates); candidates[part].resize(numberPartCandidates);
const auto* partCandidatesPtr = &candidatesCpuPtr[part*peaksArea+3]; const auto* partCandidatesPtr = &candidatesCpuPtr[part*peaksArea+3];
for (auto candidate = 0 ; candidate < numberPartCandidates ; candidate++) for (auto candidate = 0 ; candidate < numberPartCandidates ; candidate++)
candidates[part][candidate] = {partCandidatesPtr[3*candidate], candidates[part][candidate] = {partCandidatesPtr[3*candidate] * mScaleNetToOutput,
partCandidatesPtr[3*candidate+1], partCandidatesPtr[3*candidate+1] * mScaleNetToOutput,
partCandidatesPtr[3*candidate+2]}; partCandidatesPtr[3*candidate+2]};
} }
} }
......
...@@ -261,8 +261,18 @@ namespace op ...@@ -261,8 +261,18 @@ namespace op
upImpl->spResizeAndMergeCaffe->Forward_cpu(caffeNetOutputBlobs, {upImpl->spHeatMapsBlob.get()}); // ~20ms upImpl->spResizeAndMergeCaffe->Forward_cpu(caffeNetOutputBlobs, {upImpl->spHeatMapsBlob.get()}); // ~20ms
#endif #endif
// Get scale net to output (i.e. image input)
// Note: In order to resize to input size, (un)comment the following lines
const auto scaleProducerToNetInput = resizeGetScaleFactor(inputDataSize, mNetOutputSize);
const Point<int> netSize{intRound(scaleProducerToNetInput*inputDataSize.x),
intRound(scaleProducerToNetInput*inputDataSize.y)};
mScaleNetToOutput = {(float)resizeGetScaleFactor(netSize, inputDataSize)};
// mScaleNetToOutput = 1.f;
// 3. Get peaks by Non-Maximum Suppression // 3. Get peaks by Non-Maximum Suppression
upImpl->spNmsCaffe->setThreshold((float)get(PoseProperty::NMSThreshold)); upImpl->spNmsCaffe->setThreshold((float)get(PoseProperty::NMSThreshold));
const auto nmsOffset = float(0.5/double(mScaleNetToOutput));
upImpl->spNmsCaffe->setOffset(Point<float>{nmsOffset, nmsOffset});
#ifdef USE_CUDA #ifdef USE_CUDA
//upImpl->spNmsCaffe->Forward_cpu({upImpl->spHeatMapsBlob.get()}, {upImpl->spPeaksBlob.get()}); // ~ 7ms //upImpl->spNmsCaffe->Forward_cpu({upImpl->spHeatMapsBlob.get()}, {upImpl->spPeaksBlob.get()}); // ~ 7ms
upImpl->spNmsCaffe->Forward_gpu({upImpl->spHeatMapsBlob.get()}, {upImpl->spPeaksBlob.get()});// ~2ms upImpl->spNmsCaffe->Forward_gpu({upImpl->spHeatMapsBlob.get()}, {upImpl->spPeaksBlob.get()});// ~2ms
...@@ -274,14 +284,6 @@ namespace op ...@@ -274,14 +284,6 @@ namespace op
upImpl->spNmsCaffe->Forward_cpu({upImpl->spHeatMapsBlob.get()}, {upImpl->spPeaksBlob.get()}); // ~ 7ms upImpl->spNmsCaffe->Forward_cpu({upImpl->spHeatMapsBlob.get()}, {upImpl->spPeaksBlob.get()}); // ~ 7ms
#endif #endif
// Get scale net to output (i.e. image input)
// Note: In order to resize to input size, (un)comment the following lines
const auto scaleProducerToNetInput = resizeGetScaleFactor(inputDataSize, mNetOutputSize);
const Point<int> netSize{intRound(scaleProducerToNetInput*inputDataSize.x),
intRound(scaleProducerToNetInput*inputDataSize.y)};
mScaleNetToOutput = {(float)resizeGetScaleFactor(netSize, inputDataSize)};
// mScaleNetToOutput = 1.f;
// 4. Connecting body parts // 4. Connecting body parts
// Get scale net to output (i.e. image input) // Get scale net to output (i.e. image input)
upImpl->spBodyPartConnectorCaffe->setScaleNetToOutput(mScaleNetToOutput); upImpl->spBodyPartConnectorCaffe->setScaleNetToOutput(mScaleNetToOutput);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册