提交 3b15986a 编写于 作者: G gineshidalgo99

Bug fixed: multi-scale was introducing offset

上级 204615ef
......@@ -23,7 +23,7 @@ OpenPose is freely available for free non-commercial use, and may be redistribut
Library main functionality:
* Multi-person 15 or **18-keypoint body pose** estimation and rendering. **Running time invariant of number of people** on the image.
* Multi-person 15 or **18-keypoint body pose** estimation and rendering. **Running time invariant to number of people** on the image.
* Multi-person **2x21-keypoint hand** estimation and rendering. Note: In this initial version, **running time** linearly **depends** on the **number of people** on the image. **Coming soon (in around 1-5 weeks)!**
......@@ -76,8 +76,6 @@ The pose estimation work is based on the C++ code from [the ECCV 2016 demo](http
2. [OpenPose Wrapper](#openpose-wrapper)
3. [OpenPose Library](#openpose-library)
4. [Output](#output)
1. [Output Format](#output-format)
2. [Reading Saved Results](#reading-saved-results)
5. [OpenPose Benchmark](#openpose-benchmark)
6. [Send Us Your Feedback!](#send-us-your-feedback)
7. [Citation](#citation)
......
......@@ -41,5 +41,16 @@ OpenPose Library - Release Notes
3. Main bugs fixed:
1. All visualization functions moved to same thread, so it works with most OpenCV custom compiled versions.
2. Fixed error on debug mode: `Too many resources requested for launch`.
3. Bug in Array::getConstCvMat() if mVolume=0, now returning empty cv::Mat.
4. Bug: `--process_real_time` threw error with webcam.
## Current version (future OpenPose 1.0.0rc4)
1. Main improvements:
1. Check() functions give more feedback.
2. Improved documentation.
2. Functions or paremeters renamed:
1. `Datum::scaleRatios` to save the relative scale ratio when multi-scale.
3. Main bugs fixed:
1. Fixed bug in Array::getConstCvMat() if mVolume=0, now returning empty cv::Mat.
2. Fixed bug: `--process_real_time` threw error with webcam.
3. Fixed bug: Face not working with output resolution different to input.
......@@ -28,7 +28,7 @@ DEFINE_int32(logging_level, 3, "The logging level. Inte
DEFINE_string(image_path, "examples/media/COCO_val2014_000000000192.jpg", "Process the desired image.");
// OpenPose
DEFINE_string(model_pose, "COCO", "Model to be used (e.g. COCO, MPI, MPI_4_layers).");
DEFINE_string(model_folder, "models/", "Folder where the pose models (COCO and MPI) are located.");
DEFINE_string(model_folder, "models/", "Folder path (absolute or relative) where the models (pose, face, ...) are located.");
DEFINE_string(net_resolution, "656x368", "Multiples of 16. If it is increased, the accuracy usually increases. If it is decreased, the speed increases.");
DEFINE_string(resolution, "1280x720", "The image resolution (display). Use \"-1x-1\" to force the program to use the default images resolution.");
DEFINE_int32(num_gpu_start, 0, "GPU device start number.");
......@@ -98,7 +98,7 @@ int openPoseTutorialPose1()
// Step 3 - Initialize all required classes
op::CvMatToOpInput cvMatToOpInput{netInputSize, FLAGS_num_scales, (float)FLAGS_scale_gap};
op::CvMatToOpOutput cvMatToOpOutput{outputSize};
op::PoseExtractorCaffe poseExtractorCaffe{netInputSize, netOutputSize, outputSize, FLAGS_num_scales, (float)FLAGS_scale_gap, poseModel,
op::PoseExtractorCaffe poseExtractorCaffe{netInputSize, netOutputSize, outputSize, FLAGS_num_scales, poseModel,
FLAGS_model_folder, FLAGS_num_gpu_start};
op::PoseRenderer poseRenderer{netOutputSize, outputSize, poseModel, nullptr, (float)FLAGS_alpha_pose};
op::OpOutputToCvMat opOutputToCvMat{outputSize};
......@@ -114,12 +114,14 @@ int openPoseTutorialPose1()
if(inputImage.empty())
op::error("Could not open or find the image: " + FLAGS_image_path, __LINE__, __FUNCTION__, __FILE__);
// Step 2 - Format input image to OpenPose input and output formats
const auto netInputArray = cvMatToOpInput.format(inputImage);
op::Array<float> netInputArray;
std::vector<float> scaleRatios;
std::tie(netInputArray, scaleRatios) = cvMatToOpInput.format(inputImage);
double scaleInputToOutput;
op::Array<float> outputArray;
std::tie(scaleInputToOutput, outputArray) = cvMatToOpOutput.format(inputImage);
// Step 3 - Estimate poseKeypoints
poseExtractorCaffe.forwardPass(netInputArray, {inputImage.cols, inputImage.rows});
poseExtractorCaffe.forwardPass(netInputArray, {inputImage.cols, inputImage.rows}, scaleRatios);
const auto poseKeypoints = poseExtractorCaffe.getPoseKeypoints();
// Step 4 - Render poseKeypoints
poseRenderer.renderPose(outputArray, poseKeypoints);
......
......@@ -28,7 +28,7 @@ DEFINE_int32(logging_level, 3, "The logging level. Inte
DEFINE_string(image_path, "examples/media/COCO_val2014_000000000192.jpg", "Process the desired image.");
// OpenPose
DEFINE_string(model_pose, "COCO", "Model to be used (e.g. COCO, MPI, MPI_4_layers).");
DEFINE_string(model_folder, "models/", "Folder where the pose models (COCO and MPI) are located.");
DEFINE_string(model_folder, "models/", "Folder path (absolute or relative) where the models (pose, face, ...) are located.");
DEFINE_string(net_resolution, "656x368", "Multiples of 16. If it is increased, the accuracy usually increases. If it is decreased, the speed increases.");
DEFINE_string(resolution, "1280x720", "The image resolution (display). Use \"-1x-1\" to force the program to use the default images resolution.");
DEFINE_int32(num_gpu_start, 0, "GPU device start number.");
......@@ -101,8 +101,7 @@ int openPoseTutorialPose2()
op::CvMatToOpInput cvMatToOpInput{netInputSize, FLAGS_num_scales, (float)FLAGS_scale_gap};
op::CvMatToOpOutput cvMatToOpOutput{outputSize};
std::shared_ptr<op::PoseExtractor> poseExtractorPtr = std::make_shared<op::PoseExtractorCaffe>(netInputSize, netOutputSize, outputSize, FLAGS_num_scales,
(float)FLAGS_scale_gap, poseModel,
FLAGS_model_folder, FLAGS_num_gpu_start);
poseModel, FLAGS_model_folder, FLAGS_num_gpu_start);
op::PoseRenderer poseRenderer{netOutputSize, outputSize, poseModel, poseExtractorPtr, (float)FLAGS_alpha_pose, (float)FLAGS_alpha_heatmap};
poseRenderer.setElementToRender(FLAGS_part_to_show);
op::OpOutputToCvMat opOutputToCvMat{outputSize};
......@@ -118,12 +117,14 @@ int openPoseTutorialPose2()
if(inputImage.empty())
op::error("Could not open or find the image: " + FLAGS_image_path, __LINE__, __FUNCTION__, __FILE__);
// Step 2 - Format input image to OpenPose input and output formats
const auto netInputArray = cvMatToOpInput.format(inputImage);
op::Array<float> netInputArray;
std::vector<float> scaleRatios;
std::tie(netInputArray, scaleRatios) = cvMatToOpInput.format(inputImage);
double scaleInputToOutput;
op::Array<float> outputArray;
std::tie(scaleInputToOutput, outputArray) = cvMatToOpOutput.format(inputImage);
// Step 3 - Estimate poseKeypoints
poseExtractorPtr->forwardPass(netInputArray, {inputImage.cols, inputImage.rows});
poseExtractorPtr->forwardPass(netInputArray, {inputImage.cols, inputImage.rows}, scaleRatios);
const auto poseKeypoints = poseExtractorPtr->getPoseKeypoints();
const auto scaleNetToOutput = poseExtractorPtr->getScaleNetToOutput();
// Step 4 - Render pose
......
#ifndef OPENPOSE_CORE_CV_MAT_TO_OP_INPUT_HPP
#define OPENPOSE_CORE_CV_MAT_TO_OP_INPUT_HPP
#include <utility> // std::pair
#include <vector>
#include <opencv2/core/core.hpp> // cv::Mat
#include "array.hpp"
......@@ -13,7 +14,7 @@ namespace op
public:
CvMatToOpInput(const Point<int>& netInputResolution, const int scaleNumber = 1, const float scaleGap = 0.25);
Array<float> format(const cv::Mat& cvInputData) const;
std::pair<Array<float>, std::vector<float>> format(const cv::Mat& cvInputData) const;
private:
const int mScaleNumber;
......
......@@ -74,7 +74,7 @@ namespace op
/**
* Face detection locations (x,y,width,height) for each person in the image.
* It has been resized to the same resolution as `poseKeypoints`.
* It is resized to cvInputData.size().
* Size: #people
*/
std::vector<Rectangle<float>> faceRectangles;
......@@ -86,6 +86,13 @@ namespace op
*/
Array<float> faceKeypoints;
/**
* Hand detection locations (x,y,width,height) for each person in the image.
* It is resized to cvInputData.size().
* Size: #people
*/
std::vector<std::array<Rectangle<float>, 2>> handRectangles;
/**
* Experimental (NOT IMPLEMENTED YET)
* Hands code is in development phase. Not included in this version.
......@@ -98,6 +105,8 @@ namespace op
float scaleNetToOutput; /**< Scale ratio between the net output and the final output Datum::cvOutputData. */
std::vector<float> scaleRatios; /**< Scale ratios between each scale (e.g. flag `num_scales`). Used to resize the different scales. */
std::pair<int, std::string> elementRendered; /**< Pair with the element key id POSE_BODY_PART_MAPPING on `pose/poseParameters.hpp` and its mapped value (e.g. 1 and "Neck"). */
......@@ -167,7 +176,7 @@ namespace op
* @param datum Datum to be compared.
* @result Whether the instance satisfies the condition with respect to datum.
*/
inline bool operator <(const Datum& datum) const
inline bool operator<(const Datum& datum) const
{
return id < datum.id;
}
......@@ -176,7 +185,7 @@ namespace op
* @param datum Datum to be compared.
* @result Whether the instance satisfies the condition with respect to datum.
*/
inline bool operator >(const Datum& datum) const
inline bool operator>(const Datum& datum) const
{
return id > datum.id;
}
......@@ -185,7 +194,7 @@ namespace op
* @param datum Datum to be compared.
* @result Whether the instance satisfies the condition with respect to datum.
*/
inline bool operator <=(const Datum& datum) const
inline bool operator<=(const Datum& datum) const
{
return id <= datum.id;
}
......@@ -194,7 +203,7 @@ namespace op
* @param datum Datum to be compared.
* @result Whether the instance satisfies the condition with respect to datum.
*/
inline bool operator >=(const Datum& datum) const
inline bool operator>=(const Datum& datum) const
{
return id >= datum.id;
}
......@@ -203,7 +212,7 @@ namespace op
* @param datum Datum to be compared.
* @result Whether the instance satisfies the condition with respect to datum.
*/
inline bool operator ==(const Datum& datum) const
inline bool operator==(const Datum& datum) const
{
return id == datum.id;
}
......@@ -212,7 +221,7 @@ namespace op
* @param datum Datum to be compared.
* @result Whether the instance satisfies the condition with respect to datum.
*/
inline bool operator !=(const Datum& datum) const
inline bool operator!=(const Datum& datum) const
{
return id != datum.id;
}
......
......@@ -2,14 +2,17 @@
#define OPENPOSE_CORE_RESIZE_AND_MERGE_BASE_HPP
#include <array>
#include <vector>
namespace op
{
template <typename T>
void resizeAndMergeCpu(T* targetPtr, const T* const sourcePtr, const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize, const T scaleGap = 0.f);
void resizeAndMergeCpu(T* targetPtr, const T* const sourcePtr, const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize,
const std::vector<T>& scaleRatios = {1});
template <typename T>
void resizeAndMergeGpu(T* targetPtr, const T* const sourcePtr, const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize, const T scaleGap = 0.f);
void resizeAndMergeGpu(T* targetPtr, const T* const sourcePtr, const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize,
const std::vector<T>& scaleRatios = {1});
}
#endif // OPENPOSE_CORE_RESIZE_AND_MERGE_BASE_HPP
......@@ -3,12 +3,14 @@
#define OPENPOSE_CORE_RESIZE_AND_MERGE_CAFFE_HPP
#include <array>
#include <vector>
#include <caffe/blob.hpp>
#include <openpose/utilities/macros.hpp>
namespace op
{
// It mostly follows the Caffe::layer implementation, so Caffe users can easily use it. However, in order to keep the compatibility with any generic Caffe version,
// It mostly follows the Caffe::layer implementation, so Caffe users can easily use it. However, in order to keep the
// compatibility with any generic Caffe version,
// we keep this 'layer' inside our library rather than in the Caffe code.
template <typename T>
class ResizeAndMergeCaffe
......@@ -18,22 +20,25 @@ namespace op
virtual void LayerSetUp(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top);
virtual void Reshape(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top, const float factor, const bool mergeFirstDimension = true);
virtual void Reshape(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top,
const float factor, const bool mergeFirstDimension = true);
virtual inline const char* type() const { return "ResizeAndMerge"; }
void setScaleGap(const T scaleGap);
void setScaleRatios(const std::vector<T>& scaleRatios);
virtual void Forward_cpu(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top);
virtual void Forward_gpu(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top);
virtual void Backward_cpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down, const std::vector<caffe::Blob<T>*>& bottom);
virtual void Backward_cpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down,
const std::vector<caffe::Blob<T>*>& bottom);
virtual void Backward_gpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down, const std::vector<caffe::Blob<T>*>& bottom);
virtual void Backward_gpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down,
const std::vector<caffe::Blob<T>*>& bottom);
private:
T mScaleGap;
std::vector<T> mScaleRatios;
std::array<int, 4> mBottomSize;
std::array<int, 4> mTopSize;
......
......@@ -60,7 +60,7 @@ namespace op
const auto profilerKey = Profiler::timerInit(__LINE__, __FUNCTION__, __FILE__);
// cv::Mat -> float*
for (auto& tDatum : *tDatums)
tDatum.inputNetData = spCvMatToOpInput->format(tDatum.cvInputData);
std::tie(tDatum.inputNetData, tDatum.scaleRatios) = spCvMatToOpInput->format(tDatum.cvInputData);
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
......
......@@ -22,7 +22,7 @@ namespace op
void initializationOnThread();
virtual void forwardPass(const Array<float>& inputNetData, const Point<int>& inputDataSize) = 0;
virtual void forwardPass(const Array<float>& inputNetData, const Point<int>& inputDataSize, const std::vector<float>& scaleRatios = {1.f}) = 0;
virtual const float* getHeatMapCpuConstPtr() const = 0;
......
......@@ -20,14 +20,14 @@ namespace op
{
public:
PoseExtractorCaffe(const Point<int>& netInputSize, const Point<int>& netOutputSize, const Point<int>& outputSize, const int scaleNumber,
const float scaleGap, const PoseModel poseModel, const std::string& modelFolder, const int gpuId, const std::vector<HeatMapType>& heatMapTypes = {},
const PoseModel poseModel, const std::string& modelFolder, const int gpuId, const std::vector<HeatMapType>& heatMapTypes = {},
const ScaleMode heatMapScale = ScaleMode::ZeroToOne);
virtual ~PoseExtractorCaffe();
void netInitializationOnThread();
void forwardPass(const Array<float>& inputNetData, const Point<int>& inputDataSize);
void forwardPass(const Array<float>& inputNetData, const Point<int>& inputDataSize, const std::vector<float>& scaleRatios = {1.f});
const float* getHeatMapCpuConstPtr() const;
......@@ -36,6 +36,7 @@ namespace op
const float* getPoseGpuConstPtr() const;
private:
const float mResizeScale;
std::shared_ptr<Net> spNet;
std::shared_ptr<ResizeAndMergeCaffe<float>> spResizeAndMergeCaffe;
std::shared_ptr<NmsCaffe<float>> spNmsCaffe;
......
......@@ -61,7 +61,7 @@ namespace op
// Extract people pose
for (auto& tDatum : *tDatums)
{
spPoseExtractor->forwardPass(tDatum.inputNetData, Point<int>{tDatum.cvInputData.cols, tDatum.cvInputData.rows});
spPoseExtractor->forwardPass(tDatum.inputNetData, Point<int>{tDatum.cvInputData.cols, tDatum.cvInputData.rows}, tDatum.scaleRatios);
tDatum.poseHeatMaps = spPoseExtractor->getHeatMaps();
tDatum.poseKeypoints = spPoseExtractor->getPoseKeypoints();
tDatum.scaleNetToOutput = spPoseExtractor->getScaleNetToOutput();
......
......@@ -81,7 +81,8 @@ namespace op
// Cubic interpolation
template <typename T>
inline __device__ void cubicSequentialData(int* xIntArray, int* yIntArray, T& dx, T& dy, const T xSource, const T ySource, const int width, const int height)
inline __device__ void cubicSequentialData(int* xIntArray, int* yIntArray, T& dx, T& dy, const T xSource, const T ySource,
const int width, const int height)
{
xIntArray[1] = fastTruncate(int(xSource + 1e-5), 0, width - 1);
xIntArray[0] = fastMax(0, xIntArray[1] - 1);
......@@ -97,7 +98,7 @@ namespace op
}
template <typename T>
inline __device__ T cubicInterpolation(const T v0, const T v1, const T v2, const T v3, const T dx)
inline __device__ T cubicInterpolate(const T v0, const T v1, const T v2, const T v3, const T dx)
{
// http://www.paulinternet.nl/?page=bicubic
// const auto a = (-0.5f * v0 + 1.5f * v1 - 1.5f * v2 + 0.5f * v3);
......@@ -108,10 +109,12 @@ namespace op
+ (v0 - 2.5f * v1 + 2.f * v2 - 0.5f * v3) * dx * dx
- 0.5f * (v0 - v2) * dx // + (-0.5f * v0 + 0.5f * v2) * dx
+ v1;
// return v1 + 0.5f * dx * (v2 - v0 + dx * (2.f * v0 - 5.f * v1 + 4.f * v2 - v3 + dx * (3.f * (v1 - v2) + v3 - v0)));
}
template <typename T>
inline __device__ T cubicResize(const T* const sourcePtr, const T xSource, const T ySource, const int widthSource, const int heightSource, const int widthSourcePtr)
inline __device__ T bicubicInterpolate(const T* const sourcePtr, const T xSource, const T ySource, const int widthSource,
const int heightSource, const int widthSourcePtr)
{
int xIntArray[4];
int yIntArray[4];
......@@ -122,16 +125,17 @@ namespace op
T temp[4];
for (unsigned char i = 0; i < 4; i++)
{
const int offset = yIntArray[i]*widthSourcePtr;
temp[i] = cubicInterpolation(sourcePtr[offset + xIntArray[0]], sourcePtr[offset + xIntArray[1]], sourcePtr[offset + xIntArray[2]], sourcePtr[offset + xIntArray[3]], dx);
const auto offset = yIntArray[i]*widthSourcePtr;
temp[i] = cubicInterpolate(sourcePtr[offset + xIntArray[0]], sourcePtr[offset + xIntArray[1]],
sourcePtr[offset + xIntArray[2]], sourcePtr[offset + xIntArray[3]], dx);
}
return cubicInterpolation(temp[0], temp[1], temp[2], temp[3], dy);
return cubicInterpolate(temp[0], temp[1], temp[2], temp[3], dy);
}
template <typename T>
inline __device__ T addWeighted(const T value1, const T value2, const T alphaValue2)
{
return (1 - alphaValue2) * value1 + alphaValue2 * value2;
return (1.f - alphaValue2) * value1 + alphaValue2 * value2;
}
template <typename T>
......
......@@ -552,13 +552,13 @@ namespace op
wDatumProducer = nullptr;
// Pose estimators
const Point<int>& netOutputSize = wrapperStructPose.netInputSize;
const Point<int>& poseNetOutputSize = wrapperStructPose.netInputSize;
std::vector<std::shared_ptr<PoseExtractor>> poseExtractors;
for (auto gpuId = 0; gpuId < gpuNumber; gpuId++)
poseExtractors.emplace_back(std::make_shared<PoseExtractorCaffe>(
wrapperStructPose.netInputSize, netOutputSize, finalOutputSize, wrapperStructPose.scalesNumber,
wrapperStructPose.scaleGap, wrapperStructPose.poseModel, wrapperStructPose.modelFolder,
gpuId + gpuNumberStart, wrapperStructPose.heatMapTypes, wrapperStructPose.heatMapScale
wrapperStructPose.netInputSize, poseNetOutputSize, finalOutputSize, wrapperStructPose.scalesNumber,
wrapperStructPose.poseModel, wrapperStructPose.modelFolder, gpuId + gpuNumberStart,
wrapperStructPose.heatMapTypes, wrapperStructPose.heatMapScale
));
// Pose renderers
......@@ -572,7 +572,7 @@ namespace op
for (auto gpuId = 0; gpuId < poseExtractors.size(); gpuId++)
{
poseRenderers.emplace_back(std::make_shared<PoseRenderer>(
netOutputSize, finalOutputSize, wrapperStructPose.poseModel, poseExtractors[gpuId],
poseNetOutputSize, finalOutputSize, wrapperStructPose.poseModel, poseExtractors[gpuId],
wrapperStructPose.blendOriginalFrame, alphaKeypoint,
alphaHeatMap, wrapperStructPose.defaultPartToRender
));
......@@ -678,7 +678,7 @@ namespace op
// Re-scale pose if desired
if (wrapperStructPose.keypointScale != ScaleMode::OutputResolution
&& (wrapperStructPose.keypointScale != ScaleMode::InputResolution || (finalOutputSize != producerSize))
&& (wrapperStructPose.keypointScale != ScaleMode::NetOutputResolution || (finalOutputSize != netOutputSize)))
&& (wrapperStructPose.keypointScale != ScaleMode::NetOutputResolution || (finalOutputSize != poseNetOutputSize)))
{
auto keypointScaler = std::make_shared<KeypointScaler>(wrapperStructPose.keypointScale);
mPostProcessingWs.emplace_back(std::make_shared<WKeypointScaler<TDatumsPtr>>(keypointScaler));
......
......@@ -10,9 +10,19 @@ namespace op
mScaleGap{scaleGap},
mInputNetSize4D{{mScaleNumber, 3, netInputResolution.y, netInputResolution.x}}
{
try
{
// Security checks
if (netInputResolution.x % 16 != 0 || netInputResolution.y % 16 != 0)
error("Net input resolution must be multiples of 16.", __LINE__, __FUNCTION__, __FILE__);
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
Array<float> CvMatToOpInput::format(const cv::Mat& cvInputData) const
std::pair<Array<float>, std::vector<float>> CvMatToOpInput::format(const cv::Mat& cvInputData) const
{
try
{
......@@ -22,29 +32,35 @@ namespace op
// inputNetData - Reescale keeping aspect ratio and transform to float the input deep net image
Array<float> inputNetData{mInputNetSize4D};
std::vector<float> scaleRatios(mScaleNumber, 1.f);
const auto inputNetDataOffset = inputNetData.getVolume(1, 3);
for (auto i = 0; i < mScaleNumber; i++)
{
const auto requestedScale = 1.f - i*mScaleGap;
if (requestedScale > 1.f)
error("All scales must be <= 1, i.e. 1-num_scales*scale_gap <= 1", __LINE__, __FUNCTION__, __FILE__);
const auto currentScale = 1.f - i*mScaleGap;
if (currentScale < 0.f || 1.f < currentScale)
error("All scales must be in the range [0, 1], i.e. 0 <= 1-num_scales*scale_gap <= 1", __LINE__, __FUNCTION__, __FILE__);
const auto netInputWidth = inputNetData.getSize(3);
const auto targetWidth = fastTruncate(16 * intRound(netInputWidth * requestedScale / 16.), 1, netInputWidth/16*16);
const auto targetWidth = fastTruncate(intRound(netInputWidth * currentScale) / 16 * 16, 1, netInputWidth);
const auto netInputHeight = inputNetData.getSize(2);
const auto targetHeight = fastTruncate(16 * intRound(netInputHeight * requestedScale / 16.), 1, netInputHeight/16*16);
const auto targetHeight = fastTruncate(intRound(netInputHeight * currentScale) / 16 * 16, 1, netInputHeight);
const Point<int> targetSize{targetWidth, targetHeight};
const auto scale = resizeGetScaleFactor(Point<int>{cvInputData.cols, cvInputData.rows}, targetSize);
const cv::Mat frameWithNetSize = resizeFixedAspectRatio(cvInputData, scale, Point<int>{netInputWidth, netInputHeight});
// Fill inputNetData
uCharCvMatToFloatPtr(inputNetData.getPtr() + i * inputNetDataOffset, frameWithNetSize, true);
// Fill scaleRatios
scaleRatios[i] = scale;
if (i > 0)
scaleRatios[i] /= scaleRatios[0];
}
return inputNetData;
scaleRatios.at(0) /= scaleRatios[0];
return std::make_pair(inputNetData, scaleRatios);
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
return Array<float>{};
return std::make_pair(Array<float>{}, std::vector<float>{});
}
}
}
......@@ -22,10 +22,12 @@ namespace op
poseHeatMaps{datum.poseHeatMaps},
faceRectangles{datum.faceRectangles},
faceKeypoints{datum.faceKeypoints},
handRectangles{datum.handRectangles},
handKeypoints{datum.handKeypoints},
// Other parameters
scaleInputToOutput{datum.scaleInputToOutput},
scaleNetToOutput{datum.scaleNetToOutput},
scaleRatios{datum.scaleRatios},
elementRendered{datum.elementRendered}
{
}
......@@ -48,10 +50,12 @@ namespace op
poseHeatMaps = datum.poseHeatMaps,
faceRectangles = datum.faceRectangles,
faceKeypoints = datum.faceKeypoints,
handRectangles = datum.handRectangles,
handKeypoints = datum.handKeypoints,
// Other parameters
scaleInputToOutput = datum.scaleInputToOutput;
scaleNetToOutput = datum.scaleNetToOutput;
scaleRatios = datum.scaleRatios;
elementRendered = datum.elementRendered;
// Return
return *this;
......@@ -85,8 +89,10 @@ namespace op
std::swap(poseHeatMaps, datum.poseHeatMaps);
std::swap(faceRectangles, datum.faceRectangles);
std::swap(faceKeypoints, datum.faceKeypoints);
std::swap(handRectangles, datum.handRectangles);
std::swap(handKeypoints, datum.handKeypoints);
// Other parameters
std::swap(scaleRatios, datum.scaleRatios);
std::swap(elementRendered, datum.elementRendered);
}
catch (const std::exception& e)
......@@ -113,10 +119,12 @@ namespace op
std::swap(poseHeatMaps, datum.poseHeatMaps);
std::swap(faceRectangles, datum.faceRectangles);
std::swap(faceKeypoints, datum.faceKeypoints);
std::swap(handRectangles, datum.handRectangles);
std::swap(handKeypoints, datum.handKeypoints);
// Other parameters
scaleInputToOutput = datum.scaleInputToOutput;
scaleNetToOutput = datum.scaleNetToOutput;
std::swap(scaleRatios, datum.scaleRatios);
std::swap(elementRendered, datum.elementRendered);
// Return
return *this;
......@@ -151,10 +159,12 @@ namespace op
datum.poseHeatMaps = poseHeatMaps.clone();
datum.faceRectangles = faceRectangles;
datum.faceKeypoints = faceKeypoints.clone();
datum.handRectangles = datum.handRectangles;
datum.handKeypoints = handKeypoints.clone();
// Other parameters
datum.scaleInputToOutput = scaleInputToOutput;
datum.scaleNetToOutput = scaleNetToOutput;
datum.scaleRatios = scaleRatios;
datum.elementRendered = elementRendered;
// Return
return std::move(datum);
......
......@@ -6,13 +6,14 @@
namespace op
{
template <typename T>
void resizeAndMergeCpu(T* targetPtr, const T* const sourcePtr, const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize, const T scaleGap)
void resizeAndMergeCpu(T* targetPtr, const T* const sourcePtr, const std::array<int, 4>& targetSize,
const std::array<int, 4>& sourceSize, const std::vector<T>& scaleRatios)
{
try
{
UNUSED(targetPtr);
UNUSED(sourcePtr);
UNUSED(scaleGap);
UNUSED(scaleRatios);
UNUSED(targetSize);
UNUSED(sourceSize);
error("CPU version not completely implemented.", __LINE__, __FUNCTION__, __FILE__);
......@@ -61,6 +62,8 @@ namespace op
}
}
template void resizeAndMergeCpu(float* targetPtr, const float* const sourcePtr, const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize, const float scaleGap);
template void resizeAndMergeCpu(double* targetPtr, const double* const sourcePtr, const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize, const double scaleGap);
template void resizeAndMergeCpu(float* targetPtr, const float* const sourcePtr, const std::array<int, 4>& targetSize,
const std::array<int, 4>& sourceSize, const std::vector<float>& scaleRatios);
template void resizeAndMergeCpu(double* targetPtr, const double* const sourcePtr, const std::array<int, 4>& targetSize,
const std::array<int, 4>& sourceSize, const std::vector<double>& scaleRatios);
}
......@@ -8,7 +8,8 @@ namespace op
const auto THREADS_PER_BLOCK_1D = 16u;
template <typename T>
__global__ void resizeKernel(T* targetPtr, const T* const sourcePtr, const int sourceWidth, const int sourceHeight, const int targetWidth, const int targetHeight)
__global__ void resizeKernel(T* targetPtr, const T* const sourcePtr, const int sourceWidth, const int sourceHeight, const int targetWidth,
const int targetHeight)
{
const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
const auto y = (blockIdx.y * blockDim.y) + threadIdx.y;
......@@ -20,12 +21,12 @@ namespace op
const T xSource = (x + 0.5f) / scaleWidth - 0.5f;
const T ySource = (y + 0.5f) / scaleHeight - 0.5f;
targetPtr[y*targetWidth+x] = cubicResize(sourcePtr, xSource, ySource, sourceWidth, sourceHeight, sourceWidth);
targetPtr[y*targetWidth+x] = bicubicInterpolate(sourcePtr, xSource, ySource, sourceWidth, sourceHeight, sourceWidth);
}
}
template <typename T>
__global__ void resizeKernelAndMerge(T* targetPtr, const T* const sourcePtr, const int sourceNumOffset, const int num, const T scaleGap,
__global__ void resizeKernelAndMerge(T* targetPtr, const T* const sourcePtr, const int sourceNumOffset, const int num, const T* scaleRatios,
const int sourceWidth, const int sourceHeight, const int targetWidth, const int targetHeight)
{
const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
......@@ -38,17 +39,17 @@ namespace op
// targetPixel = -1000.f; // For fastMax
for (auto n = 0; n < num; n++)
{
const auto numberScale = 1 - n * scaleGap;
const auto widthPaddedSource = int(sourceWidth * numberScale);
const auto heightPaddedSource = int(sourceHeight * numberScale);
const auto currentWidth = sourceWidth * scaleRatios[n];
const auto currentHeight = sourceHeight * scaleRatios[n];
const auto scaleWidth = targetWidth / T(widthPaddedSource);
const auto scaleHeight = targetHeight / T(heightPaddedSource);
const auto scaleWidth = targetWidth / currentWidth;
const auto scaleHeight = targetHeight / currentHeight;
const T xSource = (x + 0.5f) / scaleWidth - 0.5f;
const T ySource = (y + 0.5f) / scaleHeight - 0.5f;
const T* const sourcePtrN = sourcePtr + n * sourceNumOffset;
const auto interpolated = cubicResize(sourcePtrN, xSource, ySource, widthPaddedSource, heightPaddedSource, sourceWidth);
const auto interpolated = bicubicInterpolate(sourcePtrN, xSource, ySource, intRound(currentWidth),
intRound(currentHeight), sourceWidth);
targetPixel += interpolated;
// targetPixel = fastMax(targetPixel, interpolated);
}
......@@ -57,7 +58,8 @@ namespace op
}
template <typename T>
void resizeAndMergeGpu(T* targetPtr, const T* const sourcePtr, const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize, const T scaleGap)
void resizeAndMergeGpu(T* targetPtr, const T* const sourcePtr, const std::array<int, 4>& targetSize,
const std::array<int, 4>& sourceSize, const std::vector<T>& scaleRatios)
{
try
{
......@@ -73,21 +75,42 @@ namespace op
const auto sourceChannelOffset = sourceHeight * sourceWidth;
const auto targetChannelOffset = targetWidth * targetHeight;
// No multi-scale merging
if (targetSize[0] > 1)
{
for (auto n = 0; n < num; n++)
for (auto c = 0; c < channels; c++)
resizeKernel<<<numBlocks, threadsPerBlock>>>(targetPtr + (n*channels + c) * targetChannelOffset, sourcePtr + (n*channels + c) * sourceChannelOffset,
{
const auto offsetBase = n*channels;
for (auto c = 0 ; c < channels ; c++)
{
const auto offset = offsetBase + c;
resizeKernel<<<numBlocks, threadsPerBlock>>>(targetPtr + offset * targetChannelOffset,
sourcePtr + offset * sourceChannelOffset,
sourceWidth, sourceHeight, targetWidth, targetHeight);
}
}
}
// Multi-scale merging
else
{
if (scaleGap <= 0.f && num != targetSize[0])
error("The scale gap must be greater than 0.", __LINE__, __FUNCTION__, __FILE__);
// If num_scales > 1 --> scaleRatios must be set
if (scaleRatios.size() != num)
error("The scale ratios size must be equal than the number of scales.", __LINE__, __FUNCTION__, __FILE__);
const auto maxScales = 10;
if (scaleRatios.size() > maxScales)
error("The maximum number of scales is " + std::to_string(maxScales) + ".", __LINE__, __FUNCTION__, __FILE__);
// Copy scaleRatios
T* scaleRatiosGpuPtr;
cudaMalloc((void**)&scaleRatiosGpuPtr, maxScales * sizeof(T));
cudaMemcpy(scaleRatiosGpuPtr, scaleRatios.data(), scaleRatios.size() * sizeof(T), cudaMemcpyHostToDevice);
// Perform resize + merging
const auto sourceNumOffset = channels * sourceChannelOffset;
for (auto c = 0; c < channels; c++)
resizeKernelAndMerge<<<numBlocks, threadsPerBlock>>>(targetPtr + c * targetChannelOffset, sourcePtr + c * sourceChannelOffset, sourceNumOffset,
num, scaleGap, sourceWidth, sourceHeight, targetWidth, targetHeight);
for (auto c = 0 ; c < channels ; c++)
resizeKernelAndMerge<<<numBlocks, threadsPerBlock>>>(targetPtr + c * targetChannelOffset,
sourcePtr + c * sourceChannelOffset, sourceNumOffset,
num, scaleRatiosGpuPtr, sourceWidth, sourceHeight, targetWidth, targetHeight);
// Free memory
cudaFree(scaleRatiosGpuPtr);
}
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
......@@ -98,6 +121,8 @@ namespace op
}
}
template void resizeAndMergeGpu(float* targetPtr, const float* const sourcePtr, const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize, const float scaleGap);
template void resizeAndMergeGpu(double* targetPtr, const double* const sourcePtr, const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize, const double scaleGap);
template void resizeAndMergeGpu(float* targetPtr, const float* const sourcePtr, const std::array<int, 4>& targetSize,
const std::array<int, 4>& sourceSize, const std::vector<float>& scaleRatios);
template void resizeAndMergeGpu(double* targetPtr, const double* const sourcePtr, const std::array<int, 4>& targetSize,
const std::array<int, 4>& sourceSize, const std::vector<double>& scaleRatios);
}
......@@ -8,7 +8,8 @@
namespace op
{
template <typename T>
ResizeAndMergeCaffe<T>::ResizeAndMergeCaffe()
ResizeAndMergeCaffe<T>::ResizeAndMergeCaffe() :
mScaleRatios{1}
{
}
......@@ -29,7 +30,8 @@ namespace op
}
template <typename T>
void ResizeAndMergeCaffe<T>::Reshape(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top, const float factor, const bool mergeFirstDimension)
void ResizeAndMergeCaffe<T>::Reshape(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top,
const float factor, const bool mergeFirstDimension)
{
try
{
......@@ -54,11 +56,11 @@ namespace op
}
template <typename T>
void ResizeAndMergeCaffe<T>::setScaleGap(const T scaleGap)
void ResizeAndMergeCaffe<T>::setScaleRatios(const std::vector<T>& scaleRatios)
{
try
{
mScaleGap = {scaleGap};
mScaleRatios = {scaleRatios};
}
catch (const std::exception& e)
{
......@@ -71,7 +73,7 @@ namespace op
{
try
{
resizeAndMergeCpu(top.at(0)->mutable_cpu_data(), bottom.at(0)->cpu_data(), mTopSize, mBottomSize, mScaleGap);
resizeAndMergeCpu(top.at(0)->mutable_cpu_data(), bottom.at(0)->cpu_data(), mTopSize, mBottomSize, mScaleRatios);
}
catch (const std::exception& e)
{
......@@ -84,7 +86,7 @@ namespace op
{
try
{
resizeAndMergeGpu(top.at(0)->mutable_gpu_data(), bottom.at(0)->gpu_data(), mTopSize, mBottomSize, mScaleGap);
resizeAndMergeGpu(top.at(0)->mutable_gpu_data(), bottom.at(0)->gpu_data(), mTopSize, mBottomSize, mScaleRatios);
}
catch (const std::exception& e)
{
......@@ -93,7 +95,8 @@ namespace op
}
template <typename T>
void ResizeAndMergeCaffe<T>::Backward_cpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down, const std::vector<caffe::Blob<T>*>& bottom)
void ResizeAndMergeCaffe<T>::Backward_cpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down,
const std::vector<caffe::Blob<T>*>& bottom)
{
try
{
......@@ -109,7 +112,8 @@ namespace op
}
template <typename T>
void ResizeAndMergeCaffe<T>::Backward_gpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down, const std::vector<caffe::Blob<T>*>& bottom)
void ResizeAndMergeCaffe<T>::Backward_gpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down,
const std::vector<caffe::Blob<T>*>& bottom)
{
try
{
......
......@@ -11,9 +11,10 @@
namespace op
{
PoseExtractorCaffe::PoseExtractorCaffe(const Point<int>& netInputSize, const Point<int>& netOutputSize, const Point<int>& outputSize, const int scaleNumber,
const float scaleGap, const PoseModel poseModel, const std::string& modelFolder, const int gpuId, const std::vector<HeatMapType>& heatMapTypes,
const PoseModel poseModel, const std::string& modelFolder, const int gpuId, const std::vector<HeatMapType>& heatMapTypes,
const ScaleMode heatMapScale) :
PoseExtractor{netOutputSize, outputSize, poseModel, heatMapTypes, heatMapScale},
mResizeScale{mNetOutputSize.x / (float)netInputSize.x},
spNet{std::make_shared<NetCaffe>(std::array<int,4>{scaleNumber, 3, (int)netInputSize.y, (int)netInputSize.x},
modelFolder + POSE_PROTOTXT[(int)poseModel], modelFolder + POSE_TRAINED_MODEL[(int)poseModel], gpuId)},
spResizeAndMergeCaffe{std::make_shared<ResizeAndMergeCaffe<float>>()},
......@@ -22,9 +23,10 @@ namespace op
{
try
{
checkE(netOutputSize.x, netInputSize.x, "Net input and output size must be equal.", __LINE__, __FUNCTION__, __FILE__);
checkE(netOutputSize.y, netInputSize.y, "Net input and output size must be equal.", __LINE__, __FUNCTION__, __FILE__);
spResizeAndMergeCaffe->setScaleGap(scaleGap);
const auto resizeScale = mNetOutputSize.x / (float)netInputSize.x;
const auto resizeScaleCheck = resizeScale / (mNetOutputSize.y/(float)netInputSize.y);
if (1+1e-6 < resizeScaleCheck || resizeScaleCheck < 1-1e-6)
error("Net input and output size must be proportional. resizeScaleCheck = " + std::to_string(resizeScaleCheck), __LINE__, __FUNCTION__, __FILE__);
}
catch (const std::exception& e)
{
......@@ -49,7 +51,7 @@ namespace op
// HeatMaps extractor blob and layer
spHeatMapsBlob = {std::make_shared<caffe::Blob<float>>(1,1,1,1)};
spResizeAndMergeCaffe->Reshape({spCaffeNetOutputBlob.get()}, {spHeatMapsBlob.get()}, POSE_CCN_DECREASE_FACTOR[(int)mPoseModel]);
spResizeAndMergeCaffe->Reshape({spCaffeNetOutputBlob.get()}, {spHeatMapsBlob.get()}, mResizeScale * POSE_CCN_DECREASE_FACTOR[(int)mPoseModel]);
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
// Pose extractor blob and layer
......@@ -71,7 +73,7 @@ namespace op
}
}
void PoseExtractorCaffe::forwardPass(const Array<float>& inputNetData, const Point<int>& inputDataSize)
void PoseExtractorCaffe::forwardPass(const Array<float>& inputNetData, const Point<int>& inputDataSize, const std::vector<float>& scaleRatios)
{
try
{
......@@ -83,6 +85,7 @@ namespace op
spNet->forwardPass(inputNetData.getConstPtr()); // ~79.3836ms
// 2. Resize heat maps + merge different scales
spResizeAndMergeCaffe->setScaleRatios(scaleRatios);
#ifndef CPU_ONLY
spResizeAndMergeCaffe->Forward_gpu({spCaffeNetOutputBlob.get()}, {spHeatMapsBlob.get()}); // ~5ms
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
......
......@@ -244,7 +244,7 @@ namespace op
const auto ySource = (y + 0.5f) / scaleToKeepRatio - 0.5f;
const auto heatMapOffset = part * widthHeatMap * heightHeatMap;
const auto* const heatMapPtrOffsetted = heatMapPtr + heatMapOffset;
const auto interpolatedValue = cubicResize(heatMapPtrOffsetted, xSource, ySource, widthHeatMap, heightHeatMap, widthHeatMap);
const auto interpolatedValue = bicubicInterpolate(heatMapPtrOffsetted, xSource, ySource, widthHeatMap, heightHeatMap, widthHeatMap);
float rgbColor[3];
getColorHeatMap(rgbColor, interpolatedValue, 0.f, 1.f);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册