diff --git a/README.md b/README.md index 556314dbb7cc0eec9bf97a4d8ca2fcfc17e5b26d..2b36976b43200a10c4c7729bb186b87f329b496e 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ OpenPose is freely available for free non-commercial use, and may be redistribut Library main functionality: -* Multi-person 15 or **18-keypoint body pose** estimation and rendering. **Running time invariant of number of people** on the image. +* Multi-person 15 or **18-keypoint body pose** estimation and rendering. **Running time invariant to number of people** on the image. * Multi-person **2x21-keypoint hand** estimation and rendering. Note: In this initial version, **running time** linearly **depends** on the **number of people** on the image. **Coming soon (in around 1-5 weeks)!** @@ -76,8 +76,6 @@ The pose estimation work is based on the C++ code from [the ECCV 2016 demo](http 2. [OpenPose Wrapper](#openpose-wrapper) 3. [OpenPose Library](#openpose-library) 4. [Output](#output) - 1. [Output Format](#output-format) - 2. [Reading Saved Results](#reading-saved-results) 5. [OpenPose Benchmark](#openpose-benchmark) 6. [Send Us Your Feedback!](#send-us-your-feedback) 7. [Citation](#citation) diff --git a/doc/release_notes.md b/doc/release_notes.md index a1eee70d7a451f3f0bd71d8b14d5f826fe30f5fc..e474c3b0ec73f7eca253536dbebcc76829cc730c 100644 --- a/doc/release_notes.md +++ b/doc/release_notes.md @@ -41,5 +41,16 @@ OpenPose Library - Release Notes 3. Main bugs fixed: 1. All visualization functions moved to same thread, so it works with most OpenCV custom compiled versions. 2. Fixed error on debug mode: `Too many resources requested for launch`. - 3. Bug in Array::getConstCvMat() if mVolume=0, now returning empty cv::Mat. - 4. Bug: `--process_real_time` threw error with webcam. + + + +## Current version (future OpenPose 1.0.0rc4) +1. Main improvements: + 1. Check() functions give more feedback. + 2. Improved documentation. +2. Functions or paremeters renamed: + 1. `Datum::scaleRatios` to save the relative scale ratio when multi-scale. +3. Main bugs fixed: + 1. Fixed bug in Array::getConstCvMat() if mVolume=0, now returning empty cv::Mat. + 2. Fixed bug: `--process_real_time` threw error with webcam. + 3. Fixed bug: Face not working with output resolution different to input. diff --git a/examples/tutorial_pose/1_extract_from_image.cpp b/examples/tutorial_pose/1_extract_from_image.cpp index c23fb89ced7ddc764436655647d9b8a2abf1859e..1018bef790ec005d36a7737c70e9ea4495605a1a 100644 --- a/examples/tutorial_pose/1_extract_from_image.cpp +++ b/examples/tutorial_pose/1_extract_from_image.cpp @@ -28,7 +28,7 @@ DEFINE_int32(logging_level, 3, "The logging level. Inte DEFINE_string(image_path, "examples/media/COCO_val2014_000000000192.jpg", "Process the desired image."); // OpenPose DEFINE_string(model_pose, "COCO", "Model to be used (e.g. COCO, MPI, MPI_4_layers)."); -DEFINE_string(model_folder, "models/", "Folder where the pose models (COCO and MPI) are located."); +DEFINE_string(model_folder, "models/", "Folder path (absolute or relative) where the models (pose, face, ...) are located."); DEFINE_string(net_resolution, "656x368", "Multiples of 16. If it is increased, the accuracy usually increases. If it is decreased, the speed increases."); DEFINE_string(resolution, "1280x720", "The image resolution (display). Use \"-1x-1\" to force the program to use the default images resolution."); DEFINE_int32(num_gpu_start, 0, "GPU device start number."); @@ -98,7 +98,7 @@ int openPoseTutorialPose1() // Step 3 - Initialize all required classes op::CvMatToOpInput cvMatToOpInput{netInputSize, FLAGS_num_scales, (float)FLAGS_scale_gap}; op::CvMatToOpOutput cvMatToOpOutput{outputSize}; - op::PoseExtractorCaffe poseExtractorCaffe{netInputSize, netOutputSize, outputSize, FLAGS_num_scales, (float)FLAGS_scale_gap, poseModel, + op::PoseExtractorCaffe poseExtractorCaffe{netInputSize, netOutputSize, outputSize, FLAGS_num_scales, poseModel, FLAGS_model_folder, FLAGS_num_gpu_start}; op::PoseRenderer poseRenderer{netOutputSize, outputSize, poseModel, nullptr, (float)FLAGS_alpha_pose}; op::OpOutputToCvMat opOutputToCvMat{outputSize}; @@ -114,12 +114,14 @@ int openPoseTutorialPose1() if(inputImage.empty()) op::error("Could not open or find the image: " + FLAGS_image_path, __LINE__, __FUNCTION__, __FILE__); // Step 2 - Format input image to OpenPose input and output formats - const auto netInputArray = cvMatToOpInput.format(inputImage); + op::Array netInputArray; + std::vector scaleRatios; + std::tie(netInputArray, scaleRatios) = cvMatToOpInput.format(inputImage); double scaleInputToOutput; op::Array outputArray; std::tie(scaleInputToOutput, outputArray) = cvMatToOpOutput.format(inputImage); // Step 3 - Estimate poseKeypoints - poseExtractorCaffe.forwardPass(netInputArray, {inputImage.cols, inputImage.rows}); + poseExtractorCaffe.forwardPass(netInputArray, {inputImage.cols, inputImage.rows}, scaleRatios); const auto poseKeypoints = poseExtractorCaffe.getPoseKeypoints(); // Step 4 - Render poseKeypoints poseRenderer.renderPose(outputArray, poseKeypoints); diff --git a/examples/tutorial_pose/2_extract_pose_or_heatmat_from_image.cpp b/examples/tutorial_pose/2_extract_pose_or_heatmat_from_image.cpp index 52fadcaf22faacbfc5b6fad76c468540a8df4bec..4f55a07289266275ddd62e985a090f4f02e6b8f1 100644 --- a/examples/tutorial_pose/2_extract_pose_or_heatmat_from_image.cpp +++ b/examples/tutorial_pose/2_extract_pose_or_heatmat_from_image.cpp @@ -28,7 +28,7 @@ DEFINE_int32(logging_level, 3, "The logging level. Inte DEFINE_string(image_path, "examples/media/COCO_val2014_000000000192.jpg", "Process the desired image."); // OpenPose DEFINE_string(model_pose, "COCO", "Model to be used (e.g. COCO, MPI, MPI_4_layers)."); -DEFINE_string(model_folder, "models/", "Folder where the pose models (COCO and MPI) are located."); +DEFINE_string(model_folder, "models/", "Folder path (absolute or relative) where the models (pose, face, ...) are located."); DEFINE_string(net_resolution, "656x368", "Multiples of 16. If it is increased, the accuracy usually increases. If it is decreased, the speed increases."); DEFINE_string(resolution, "1280x720", "The image resolution (display). Use \"-1x-1\" to force the program to use the default images resolution."); DEFINE_int32(num_gpu_start, 0, "GPU device start number."); @@ -101,8 +101,7 @@ int openPoseTutorialPose2() op::CvMatToOpInput cvMatToOpInput{netInputSize, FLAGS_num_scales, (float)FLAGS_scale_gap}; op::CvMatToOpOutput cvMatToOpOutput{outputSize}; std::shared_ptr poseExtractorPtr = std::make_shared(netInputSize, netOutputSize, outputSize, FLAGS_num_scales, - (float)FLAGS_scale_gap, poseModel, - FLAGS_model_folder, FLAGS_num_gpu_start); + poseModel, FLAGS_model_folder, FLAGS_num_gpu_start); op::PoseRenderer poseRenderer{netOutputSize, outputSize, poseModel, poseExtractorPtr, (float)FLAGS_alpha_pose, (float)FLAGS_alpha_heatmap}; poseRenderer.setElementToRender(FLAGS_part_to_show); op::OpOutputToCvMat opOutputToCvMat{outputSize}; @@ -118,12 +117,14 @@ int openPoseTutorialPose2() if(inputImage.empty()) op::error("Could not open or find the image: " + FLAGS_image_path, __LINE__, __FUNCTION__, __FILE__); // Step 2 - Format input image to OpenPose input and output formats - const auto netInputArray = cvMatToOpInput.format(inputImage); + op::Array netInputArray; + std::vector scaleRatios; + std::tie(netInputArray, scaleRatios) = cvMatToOpInput.format(inputImage); double scaleInputToOutput; op::Array outputArray; std::tie(scaleInputToOutput, outputArray) = cvMatToOpOutput.format(inputImage); // Step 3 - Estimate poseKeypoints - poseExtractorPtr->forwardPass(netInputArray, {inputImage.cols, inputImage.rows}); + poseExtractorPtr->forwardPass(netInputArray, {inputImage.cols, inputImage.rows}, scaleRatios); const auto poseKeypoints = poseExtractorPtr->getPoseKeypoints(); const auto scaleNetToOutput = poseExtractorPtr->getScaleNetToOutput(); // Step 4 - Render pose diff --git a/include/openpose/core/cvMatToOpInput.hpp b/include/openpose/core/cvMatToOpInput.hpp index 50fc2e1d91ef841d3b07bf4fbd7c2cc24d00caf5..4f87bbf4ce57680c49e4923951592a1475b8b4bd 100644 --- a/include/openpose/core/cvMatToOpInput.hpp +++ b/include/openpose/core/cvMatToOpInput.hpp @@ -1,6 +1,7 @@ #ifndef OPENPOSE_CORE_CV_MAT_TO_OP_INPUT_HPP #define OPENPOSE_CORE_CV_MAT_TO_OP_INPUT_HPP +#include // std::pair #include #include // cv::Mat #include "array.hpp" @@ -13,7 +14,7 @@ namespace op public: CvMatToOpInput(const Point& netInputResolution, const int scaleNumber = 1, const float scaleGap = 0.25); - Array format(const cv::Mat& cvInputData) const; + std::pair, std::vector> format(const cv::Mat& cvInputData) const; private: const int mScaleNumber; diff --git a/include/openpose/core/datum.hpp b/include/openpose/core/datum.hpp index 01eaaba697f4118d8a99e65417d04fd716c5109b..a948e03768dce3983974b118a5197e29d25c4ddb 100644 --- a/include/openpose/core/datum.hpp +++ b/include/openpose/core/datum.hpp @@ -74,7 +74,7 @@ namespace op /** * Face detection locations (x,y,width,height) for each person in the image. - * It has been resized to the same resolution as `poseKeypoints`. + * It is resized to cvInputData.size(). * Size: #people */ std::vector> faceRectangles; @@ -86,6 +86,13 @@ namespace op */ Array faceKeypoints; + /** + * Hand detection locations (x,y,width,height) for each person in the image. + * It is resized to cvInputData.size(). + * Size: #people + */ + std::vector, 2>> handRectangles; + /** * Experimental (NOT IMPLEMENTED YET) * Hands code is in development phase. Not included in this version. @@ -98,6 +105,8 @@ namespace op float scaleNetToOutput; /**< Scale ratio between the net output and the final output Datum::cvOutputData. */ + std::vector scaleRatios; /**< Scale ratios between each scale (e.g. flag `num_scales`). Used to resize the different scales. */ + std::pair elementRendered; /**< Pair with the element key id POSE_BODY_PART_MAPPING on `pose/poseParameters.hpp` and its mapped value (e.g. 1 and "Neck"). */ @@ -167,7 +176,7 @@ namespace op * @param datum Datum to be compared. * @result Whether the instance satisfies the condition with respect to datum. */ - inline bool operator <(const Datum& datum) const + inline bool operator<(const Datum& datum) const { return id < datum.id; } @@ -176,7 +185,7 @@ namespace op * @param datum Datum to be compared. * @result Whether the instance satisfies the condition with respect to datum. */ - inline bool operator >(const Datum& datum) const + inline bool operator>(const Datum& datum) const { return id > datum.id; } @@ -185,7 +194,7 @@ namespace op * @param datum Datum to be compared. * @result Whether the instance satisfies the condition with respect to datum. */ - inline bool operator <=(const Datum& datum) const + inline bool operator<=(const Datum& datum) const { return id <= datum.id; } @@ -194,7 +203,7 @@ namespace op * @param datum Datum to be compared. * @result Whether the instance satisfies the condition with respect to datum. */ - inline bool operator >=(const Datum& datum) const + inline bool operator>=(const Datum& datum) const { return id >= datum.id; } @@ -203,7 +212,7 @@ namespace op * @param datum Datum to be compared. * @result Whether the instance satisfies the condition with respect to datum. */ - inline bool operator ==(const Datum& datum) const + inline bool operator==(const Datum& datum) const { return id == datum.id; } @@ -212,7 +221,7 @@ namespace op * @param datum Datum to be compared. * @result Whether the instance satisfies the condition with respect to datum. */ - inline bool operator !=(const Datum& datum) const + inline bool operator!=(const Datum& datum) const { return id != datum.id; } diff --git a/include/openpose/core/resizeAndMergeBase.hpp b/include/openpose/core/resizeAndMergeBase.hpp index b36d51a53356edade57afc5666e4ec05a1cf83bd..f4cd03af6674150c11385c59a12135177895cef7 100644 --- a/include/openpose/core/resizeAndMergeBase.hpp +++ b/include/openpose/core/resizeAndMergeBase.hpp @@ -2,14 +2,17 @@ #define OPENPOSE_CORE_RESIZE_AND_MERGE_BASE_HPP #include +#include namespace op { template - void resizeAndMergeCpu(T* targetPtr, const T* const sourcePtr, const std::array& targetSize, const std::array& sourceSize, const T scaleGap = 0.f); + void resizeAndMergeCpu(T* targetPtr, const T* const sourcePtr, const std::array& targetSize, const std::array& sourceSize, + const std::vector& scaleRatios = {1}); template - void resizeAndMergeGpu(T* targetPtr, const T* const sourcePtr, const std::array& targetSize, const std::array& sourceSize, const T scaleGap = 0.f); + void resizeAndMergeGpu(T* targetPtr, const T* const sourcePtr, const std::array& targetSize, const std::array& sourceSize, + const std::vector& scaleRatios = {1}); } #endif // OPENPOSE_CORE_RESIZE_AND_MERGE_BASE_HPP diff --git a/include/openpose/core/resizeAndMergeCaffe.hpp b/include/openpose/core/resizeAndMergeCaffe.hpp index 2e1d1ef644d2e6299ff54b4bee6085639eb6fff7..a2f253f72eed0adbf42c8b57566addb3d3e8a9be 100644 --- a/include/openpose/core/resizeAndMergeCaffe.hpp +++ b/include/openpose/core/resizeAndMergeCaffe.hpp @@ -3,12 +3,14 @@ #define OPENPOSE_CORE_RESIZE_AND_MERGE_CAFFE_HPP #include +#include #include #include namespace op { - // It mostly follows the Caffe::layer implementation, so Caffe users can easily use it. However, in order to keep the compatibility with any generic Caffe version, + // It mostly follows the Caffe::layer implementation, so Caffe users can easily use it. However, in order to keep the + // compatibility with any generic Caffe version, // we keep this 'layer' inside our library rather than in the Caffe code. template class ResizeAndMergeCaffe @@ -18,22 +20,25 @@ namespace op virtual void LayerSetUp(const std::vector*>& bottom, const std::vector*>& top); - virtual void Reshape(const std::vector*>& bottom, const std::vector*>& top, const float factor, const bool mergeFirstDimension = true); + virtual void Reshape(const std::vector*>& bottom, const std::vector*>& top, + const float factor, const bool mergeFirstDimension = true); virtual inline const char* type() const { return "ResizeAndMerge"; } - void setScaleGap(const T scaleGap); + void setScaleRatios(const std::vector& scaleRatios); virtual void Forward_cpu(const std::vector*>& bottom, const std::vector*>& top); virtual void Forward_gpu(const std::vector*>& bottom, const std::vector*>& top); - virtual void Backward_cpu(const std::vector*>& top, const std::vector& propagate_down, const std::vector*>& bottom); + virtual void Backward_cpu(const std::vector*>& top, const std::vector& propagate_down, + const std::vector*>& bottom); - virtual void Backward_gpu(const std::vector*>& top, const std::vector& propagate_down, const std::vector*>& bottom); + virtual void Backward_gpu(const std::vector*>& top, const std::vector& propagate_down, + const std::vector*>& bottom); private: - T mScaleGap; + std::vector mScaleRatios; std::array mBottomSize; std::array mTopSize; diff --git a/include/openpose/core/wCvMatToOpInput.hpp b/include/openpose/core/wCvMatToOpInput.hpp index 06bd7cecb5646486848f3d35d53f2c6092160ccd..5ad46c5db87bab9ae6d86a8f639a9c1d649a4e07 100644 --- a/include/openpose/core/wCvMatToOpInput.hpp +++ b/include/openpose/core/wCvMatToOpInput.hpp @@ -60,7 +60,7 @@ namespace op const auto profilerKey = Profiler::timerInit(__LINE__, __FUNCTION__, __FILE__); // cv::Mat -> float* for (auto& tDatum : *tDatums) - tDatum.inputNetData = spCvMatToOpInput->format(tDatum.cvInputData); + std::tie(tDatum.inputNetData, tDatum.scaleRatios) = spCvMatToOpInput->format(tDatum.cvInputData); // Profiling speed Profiler::timerEnd(profilerKey); Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X); diff --git a/include/openpose/pose/poseExtractor.hpp b/include/openpose/pose/poseExtractor.hpp index 306699ca22dd905f48fc4143aa04c1c82f3e6c06..c76c48c6b94eab4f7f8b49643a52525b32272ba6 100644 --- a/include/openpose/pose/poseExtractor.hpp +++ b/include/openpose/pose/poseExtractor.hpp @@ -22,7 +22,7 @@ namespace op void initializationOnThread(); - virtual void forwardPass(const Array& inputNetData, const Point& inputDataSize) = 0; + virtual void forwardPass(const Array& inputNetData, const Point& inputDataSize, const std::vector& scaleRatios = {1.f}) = 0; virtual const float* getHeatMapCpuConstPtr() const = 0; diff --git a/include/openpose/pose/poseExtractorCaffe.hpp b/include/openpose/pose/poseExtractorCaffe.hpp index 9dc465db8d9e7eb8df6ecb9e87dd66c6e04b2a78..57b1e37c1d46679de3bbc36dbdbff571d6bd3d73 100644 --- a/include/openpose/pose/poseExtractorCaffe.hpp +++ b/include/openpose/pose/poseExtractorCaffe.hpp @@ -20,14 +20,14 @@ namespace op { public: PoseExtractorCaffe(const Point& netInputSize, const Point& netOutputSize, const Point& outputSize, const int scaleNumber, - const float scaleGap, const PoseModel poseModel, const std::string& modelFolder, const int gpuId, const std::vector& heatMapTypes = {}, + const PoseModel poseModel, const std::string& modelFolder, const int gpuId, const std::vector& heatMapTypes = {}, const ScaleMode heatMapScale = ScaleMode::ZeroToOne); virtual ~PoseExtractorCaffe(); void netInitializationOnThread(); - void forwardPass(const Array& inputNetData, const Point& inputDataSize); + void forwardPass(const Array& inputNetData, const Point& inputDataSize, const std::vector& scaleRatios = {1.f}); const float* getHeatMapCpuConstPtr() const; @@ -36,6 +36,7 @@ namespace op const float* getPoseGpuConstPtr() const; private: + const float mResizeScale; std::shared_ptr spNet; std::shared_ptr> spResizeAndMergeCaffe; std::shared_ptr> spNmsCaffe; diff --git a/include/openpose/pose/wPoseExtractor.hpp b/include/openpose/pose/wPoseExtractor.hpp index d2c54569c1edc10b31556ed1528859843a2d5acd..dfab2a9e5c6fa41e6d3219dbe8271739c7c1bb98 100644 --- a/include/openpose/pose/wPoseExtractor.hpp +++ b/include/openpose/pose/wPoseExtractor.hpp @@ -61,7 +61,7 @@ namespace op // Extract people pose for (auto& tDatum : *tDatums) { - spPoseExtractor->forwardPass(tDatum.inputNetData, Point{tDatum.cvInputData.cols, tDatum.cvInputData.rows}); + spPoseExtractor->forwardPass(tDatum.inputNetData, Point{tDatum.cvInputData.cols, tDatum.cvInputData.rows}, tDatum.scaleRatios); tDatum.poseHeatMaps = spPoseExtractor->getHeatMaps(); tDatum.poseKeypoints = spPoseExtractor->getPoseKeypoints(); tDatum.scaleNetToOutput = spPoseExtractor->getScaleNetToOutput(); diff --git a/include/openpose/utilities/cuda.hu b/include/openpose/utilities/cuda.hu index ae1630cc3419c7d9ee52a7b2499925865b01186f..5e060006823f89fa72d714ce2117f5f6bf7dd86e 100644 --- a/include/openpose/utilities/cuda.hu +++ b/include/openpose/utilities/cuda.hu @@ -81,7 +81,8 @@ namespace op // Cubic interpolation template - inline __device__ void cubicSequentialData(int* xIntArray, int* yIntArray, T& dx, T& dy, const T xSource, const T ySource, const int width, const int height) + inline __device__ void cubicSequentialData(int* xIntArray, int* yIntArray, T& dx, T& dy, const T xSource, const T ySource, + const int width, const int height) { xIntArray[1] = fastTruncate(int(xSource + 1e-5), 0, width - 1); xIntArray[0] = fastMax(0, xIntArray[1] - 1); @@ -97,7 +98,7 @@ namespace op } template - inline __device__ T cubicInterpolation(const T v0, const T v1, const T v2, const T v3, const T dx) + inline __device__ T cubicInterpolate(const T v0, const T v1, const T v2, const T v3, const T dx) { // http://www.paulinternet.nl/?page=bicubic // const auto a = (-0.5f * v0 + 1.5f * v1 - 1.5f * v2 + 0.5f * v3); @@ -108,10 +109,12 @@ namespace op + (v0 - 2.5f * v1 + 2.f * v2 - 0.5f * v3) * dx * dx - 0.5f * (v0 - v2) * dx // + (-0.5f * v0 + 0.5f * v2) * dx + v1; + // return v1 + 0.5f * dx * (v2 - v0 + dx * (2.f * v0 - 5.f * v1 + 4.f * v2 - v3 + dx * (3.f * (v1 - v2) + v3 - v0))); } template - inline __device__ T cubicResize(const T* const sourcePtr, const T xSource, const T ySource, const int widthSource, const int heightSource, const int widthSourcePtr) + inline __device__ T bicubicInterpolate(const T* const sourcePtr, const T xSource, const T ySource, const int widthSource, + const int heightSource, const int widthSourcePtr) { int xIntArray[4]; int yIntArray[4]; @@ -122,16 +125,17 @@ namespace op T temp[4]; for (unsigned char i = 0; i < 4; i++) { - const int offset = yIntArray[i]*widthSourcePtr; - temp[i] = cubicInterpolation(sourcePtr[offset + xIntArray[0]], sourcePtr[offset + xIntArray[1]], sourcePtr[offset + xIntArray[2]], sourcePtr[offset + xIntArray[3]], dx); + const auto offset = yIntArray[i]*widthSourcePtr; + temp[i] = cubicInterpolate(sourcePtr[offset + xIntArray[0]], sourcePtr[offset + xIntArray[1]], + sourcePtr[offset + xIntArray[2]], sourcePtr[offset + xIntArray[3]], dx); } - return cubicInterpolation(temp[0], temp[1], temp[2], temp[3], dy); + return cubicInterpolate(temp[0], temp[1], temp[2], temp[3], dy); } template inline __device__ T addWeighted(const T value1, const T value2, const T alphaValue2) { - return (1 - alphaValue2) * value1 + alphaValue2 * value2; + return (1.f - alphaValue2) * value1 + alphaValue2 * value2; } template diff --git a/include/openpose/wrapper/wrapper.hpp b/include/openpose/wrapper/wrapper.hpp index 18af1cff50400acc66b404db14a5607d89463b6d..9739c1d494d1607b885366ab1caa649d51f8f189 100644 --- a/include/openpose/wrapper/wrapper.hpp +++ b/include/openpose/wrapper/wrapper.hpp @@ -552,13 +552,13 @@ namespace op wDatumProducer = nullptr; // Pose estimators - const Point& netOutputSize = wrapperStructPose.netInputSize; + const Point& poseNetOutputSize = wrapperStructPose.netInputSize; std::vector> poseExtractors; for (auto gpuId = 0; gpuId < gpuNumber; gpuId++) poseExtractors.emplace_back(std::make_shared( - wrapperStructPose.netInputSize, netOutputSize, finalOutputSize, wrapperStructPose.scalesNumber, - wrapperStructPose.scaleGap, wrapperStructPose.poseModel, wrapperStructPose.modelFolder, - gpuId + gpuNumberStart, wrapperStructPose.heatMapTypes, wrapperStructPose.heatMapScale + wrapperStructPose.netInputSize, poseNetOutputSize, finalOutputSize, wrapperStructPose.scalesNumber, + wrapperStructPose.poseModel, wrapperStructPose.modelFolder, gpuId + gpuNumberStart, + wrapperStructPose.heatMapTypes, wrapperStructPose.heatMapScale )); // Pose renderers @@ -572,7 +572,7 @@ namespace op for (auto gpuId = 0; gpuId < poseExtractors.size(); gpuId++) { poseRenderers.emplace_back(std::make_shared( - netOutputSize, finalOutputSize, wrapperStructPose.poseModel, poseExtractors[gpuId], + poseNetOutputSize, finalOutputSize, wrapperStructPose.poseModel, poseExtractors[gpuId], wrapperStructPose.blendOriginalFrame, alphaKeypoint, alphaHeatMap, wrapperStructPose.defaultPartToRender )); @@ -678,7 +678,7 @@ namespace op // Re-scale pose if desired if (wrapperStructPose.keypointScale != ScaleMode::OutputResolution && (wrapperStructPose.keypointScale != ScaleMode::InputResolution || (finalOutputSize != producerSize)) - && (wrapperStructPose.keypointScale != ScaleMode::NetOutputResolution || (finalOutputSize != netOutputSize))) + && (wrapperStructPose.keypointScale != ScaleMode::NetOutputResolution || (finalOutputSize != poseNetOutputSize))) { auto keypointScaler = std::make_shared(wrapperStructPose.keypointScale); mPostProcessingWs.emplace_back(std::make_shared>(keypointScaler)); diff --git a/src/openpose/core/cvMatToOpInput.cpp b/src/openpose/core/cvMatToOpInput.cpp index 225864c713ccb6217a278416012f8da8bae4efb6..d0570378e955f89edec13e2331984e6b8cae86df 100644 --- a/src/openpose/core/cvMatToOpInput.cpp +++ b/src/openpose/core/cvMatToOpInput.cpp @@ -10,9 +10,19 @@ namespace op mScaleGap{scaleGap}, mInputNetSize4D{{mScaleNumber, 3, netInputResolution.y, netInputResolution.x}} { + try + { + // Security checks + if (netInputResolution.x % 16 != 0 || netInputResolution.y % 16 != 0) + error("Net input resolution must be multiples of 16.", __LINE__, __FUNCTION__, __FILE__); + } + catch (const std::exception& e) + { + error(e.what(), __LINE__, __FUNCTION__, __FILE__); + } } - Array CvMatToOpInput::format(const cv::Mat& cvInputData) const + std::pair, std::vector> CvMatToOpInput::format(const cv::Mat& cvInputData) const { try { @@ -22,29 +32,35 @@ namespace op // inputNetData - Reescale keeping aspect ratio and transform to float the input deep net image Array inputNetData{mInputNetSize4D}; + std::vector scaleRatios(mScaleNumber, 1.f); const auto inputNetDataOffset = inputNetData.getVolume(1, 3); for (auto i = 0; i < mScaleNumber; i++) { - const auto requestedScale = 1.f - i*mScaleGap; - if (requestedScale > 1.f) - error("All scales must be <= 1, i.e. 1-num_scales*scale_gap <= 1", __LINE__, __FUNCTION__, __FILE__); + const auto currentScale = 1.f - i*mScaleGap; + if (currentScale < 0.f || 1.f < currentScale) + error("All scales must be in the range [0, 1], i.e. 0 <= 1-num_scales*scale_gap <= 1", __LINE__, __FUNCTION__, __FILE__); const auto netInputWidth = inputNetData.getSize(3); - const auto targetWidth = fastTruncate(16 * intRound(netInputWidth * requestedScale / 16.), 1, netInputWidth/16*16); + const auto targetWidth = fastTruncate(intRound(netInputWidth * currentScale) / 16 * 16, 1, netInputWidth); const auto netInputHeight = inputNetData.getSize(2); - const auto targetHeight = fastTruncate(16 * intRound(netInputHeight * requestedScale / 16.), 1, netInputHeight/16*16); + const auto targetHeight = fastTruncate(intRound(netInputHeight * currentScale) / 16 * 16, 1, netInputHeight); const Point targetSize{targetWidth, targetHeight}; const auto scale = resizeGetScaleFactor(Point{cvInputData.cols, cvInputData.rows}, targetSize); const cv::Mat frameWithNetSize = resizeFixedAspectRatio(cvInputData, scale, Point{netInputWidth, netInputHeight}); + // Fill inputNetData uCharCvMatToFloatPtr(inputNetData.getPtr() + i * inputNetDataOffset, frameWithNetSize, true); + // Fill scaleRatios + scaleRatios[i] = scale; + if (i > 0) + scaleRatios[i] /= scaleRatios[0]; } - - return inputNetData; + scaleRatios.at(0) /= scaleRatios[0]; + return std::make_pair(inputNetData, scaleRatios); } catch (const std::exception& e) { error(e.what(), __LINE__, __FUNCTION__, __FILE__); - return Array{}; + return std::make_pair(Array{}, std::vector{}); } } } diff --git a/src/openpose/core/datum.cpp b/src/openpose/core/datum.cpp index 950cdc6bb8fbf6f0a0d78248818cbdfa9f7bff61..ef2f1444038c5b54259cbbcd931a19356e7a8440 100644 --- a/src/openpose/core/datum.cpp +++ b/src/openpose/core/datum.cpp @@ -22,10 +22,12 @@ namespace op poseHeatMaps{datum.poseHeatMaps}, faceRectangles{datum.faceRectangles}, faceKeypoints{datum.faceKeypoints}, + handRectangles{datum.handRectangles}, handKeypoints{datum.handKeypoints}, // Other parameters scaleInputToOutput{datum.scaleInputToOutput}, scaleNetToOutput{datum.scaleNetToOutput}, + scaleRatios{datum.scaleRatios}, elementRendered{datum.elementRendered} { } @@ -48,10 +50,12 @@ namespace op poseHeatMaps = datum.poseHeatMaps, faceRectangles = datum.faceRectangles, faceKeypoints = datum.faceKeypoints, + handRectangles = datum.handRectangles, handKeypoints = datum.handKeypoints, // Other parameters scaleInputToOutput = datum.scaleInputToOutput; scaleNetToOutput = datum.scaleNetToOutput; + scaleRatios = datum.scaleRatios; elementRendered = datum.elementRendered; // Return return *this; @@ -85,8 +89,10 @@ namespace op std::swap(poseHeatMaps, datum.poseHeatMaps); std::swap(faceRectangles, datum.faceRectangles); std::swap(faceKeypoints, datum.faceKeypoints); + std::swap(handRectangles, datum.handRectangles); std::swap(handKeypoints, datum.handKeypoints); // Other parameters + std::swap(scaleRatios, datum.scaleRatios); std::swap(elementRendered, datum.elementRendered); } catch (const std::exception& e) @@ -113,10 +119,12 @@ namespace op std::swap(poseHeatMaps, datum.poseHeatMaps); std::swap(faceRectangles, datum.faceRectangles); std::swap(faceKeypoints, datum.faceKeypoints); + std::swap(handRectangles, datum.handRectangles); std::swap(handKeypoints, datum.handKeypoints); // Other parameters scaleInputToOutput = datum.scaleInputToOutput; scaleNetToOutput = datum.scaleNetToOutput; + std::swap(scaleRatios, datum.scaleRatios); std::swap(elementRendered, datum.elementRendered); // Return return *this; @@ -151,10 +159,12 @@ namespace op datum.poseHeatMaps = poseHeatMaps.clone(); datum.faceRectangles = faceRectangles; datum.faceKeypoints = faceKeypoints.clone(); + datum.handRectangles = datum.handRectangles; datum.handKeypoints = handKeypoints.clone(); // Other parameters datum.scaleInputToOutput = scaleInputToOutput; datum.scaleNetToOutput = scaleNetToOutput; + datum.scaleRatios = scaleRatios; datum.elementRendered = elementRendered; // Return return std::move(datum); diff --git a/src/openpose/core/resizeAndMergeBase.cpp b/src/openpose/core/resizeAndMergeBase.cpp index bfcf2ea761ba50e6aec6eda18d7d08c6a128b716..463074c89deadbc3303e7bc0c3a9a8eb37fc7700 100644 --- a/src/openpose/core/resizeAndMergeBase.cpp +++ b/src/openpose/core/resizeAndMergeBase.cpp @@ -6,13 +6,14 @@ namespace op { template - void resizeAndMergeCpu(T* targetPtr, const T* const sourcePtr, const std::array& targetSize, const std::array& sourceSize, const T scaleGap) + void resizeAndMergeCpu(T* targetPtr, const T* const sourcePtr, const std::array& targetSize, + const std::array& sourceSize, const std::vector& scaleRatios) { try { UNUSED(targetPtr); UNUSED(sourcePtr); - UNUSED(scaleGap); + UNUSED(scaleRatios); UNUSED(targetSize); UNUSED(sourceSize); error("CPU version not completely implemented.", __LINE__, __FUNCTION__, __FILE__); @@ -61,6 +62,8 @@ namespace op } } - template void resizeAndMergeCpu(float* targetPtr, const float* const sourcePtr, const std::array& targetSize, const std::array& sourceSize, const float scaleGap); - template void resizeAndMergeCpu(double* targetPtr, const double* const sourcePtr, const std::array& targetSize, const std::array& sourceSize, const double scaleGap); + template void resizeAndMergeCpu(float* targetPtr, const float* const sourcePtr, const std::array& targetSize, + const std::array& sourceSize, const std::vector& scaleRatios); + template void resizeAndMergeCpu(double* targetPtr, const double* const sourcePtr, const std::array& targetSize, + const std::array& sourceSize, const std::vector& scaleRatios); } diff --git a/src/openpose/core/resizeAndMergeBase.cu b/src/openpose/core/resizeAndMergeBase.cu index 98038462f89cfec13e3662054b834a2c2d057cc3..3e57c958749db31208e0c79c94350385452b0efc 100644 --- a/src/openpose/core/resizeAndMergeBase.cu +++ b/src/openpose/core/resizeAndMergeBase.cu @@ -8,7 +8,8 @@ namespace op const auto THREADS_PER_BLOCK_1D = 16u; template - __global__ void resizeKernel(T* targetPtr, const T* const sourcePtr, const int sourceWidth, const int sourceHeight, const int targetWidth, const int targetHeight) + __global__ void resizeKernel(T* targetPtr, const T* const sourcePtr, const int sourceWidth, const int sourceHeight, const int targetWidth, + const int targetHeight) { const auto x = (blockIdx.x * blockDim.x) + threadIdx.x; const auto y = (blockIdx.y * blockDim.y) + threadIdx.y; @@ -20,12 +21,12 @@ namespace op const T xSource = (x + 0.5f) / scaleWidth - 0.5f; const T ySource = (y + 0.5f) / scaleHeight - 0.5f; - targetPtr[y*targetWidth+x] = cubicResize(sourcePtr, xSource, ySource, sourceWidth, sourceHeight, sourceWidth); + targetPtr[y*targetWidth+x] = bicubicInterpolate(sourcePtr, xSource, ySource, sourceWidth, sourceHeight, sourceWidth); } } template - __global__ void resizeKernelAndMerge(T* targetPtr, const T* const sourcePtr, const int sourceNumOffset, const int num, const T scaleGap, + __global__ void resizeKernelAndMerge(T* targetPtr, const T* const sourcePtr, const int sourceNumOffset, const int num, const T* scaleRatios, const int sourceWidth, const int sourceHeight, const int targetWidth, const int targetHeight) { const auto x = (blockIdx.x * blockDim.x) + threadIdx.x; @@ -38,17 +39,17 @@ namespace op // targetPixel = -1000.f; // For fastMax for (auto n = 0; n < num; n++) { - const auto numberScale = 1 - n * scaleGap; - const auto widthPaddedSource = int(sourceWidth * numberScale); - const auto heightPaddedSource = int(sourceHeight * numberScale); + const auto currentWidth = sourceWidth * scaleRatios[n]; + const auto currentHeight = sourceHeight * scaleRatios[n]; - const auto scaleWidth = targetWidth / T(widthPaddedSource); - const auto scaleHeight = targetHeight / T(heightPaddedSource); + const auto scaleWidth = targetWidth / currentWidth; + const auto scaleHeight = targetHeight / currentHeight; const T xSource = (x + 0.5f) / scaleWidth - 0.5f; const T ySource = (y + 0.5f) / scaleHeight - 0.5f; const T* const sourcePtrN = sourcePtr + n * sourceNumOffset; - const auto interpolated = cubicResize(sourcePtrN, xSource, ySource, widthPaddedSource, heightPaddedSource, sourceWidth); + const auto interpolated = bicubicInterpolate(sourcePtrN, xSource, ySource, intRound(currentWidth), + intRound(currentHeight), sourceWidth); targetPixel += interpolated; // targetPixel = fastMax(targetPixel, interpolated); } @@ -57,7 +58,8 @@ namespace op } template - void resizeAndMergeGpu(T* targetPtr, const T* const sourcePtr, const std::array& targetSize, const std::array& sourceSize, const T scaleGap) + void resizeAndMergeGpu(T* targetPtr, const T* const sourcePtr, const std::array& targetSize, + const std::array& sourceSize, const std::vector& scaleRatios) { try { @@ -73,21 +75,42 @@ namespace op const auto sourceChannelOffset = sourceHeight * sourceWidth; const auto targetChannelOffset = targetWidth * targetHeight; + // No multi-scale merging if (targetSize[0] > 1) { for (auto n = 0; n < num; n++) - for (auto c = 0; c < channels; c++) - resizeKernel<<>>(targetPtr + (n*channels + c) * targetChannelOffset, sourcePtr + (n*channels + c) * sourceChannelOffset, + { + const auto offsetBase = n*channels; + for (auto c = 0 ; c < channels ; c++) + { + const auto offset = offsetBase + c; + resizeKernel<<>>(targetPtr + offset * targetChannelOffset, + sourcePtr + offset * sourceChannelOffset, sourceWidth, sourceHeight, targetWidth, targetHeight); + } + } } + // Multi-scale merging else { - if (scaleGap <= 0.f && num != targetSize[0]) - error("The scale gap must be greater than 0.", __LINE__, __FUNCTION__, __FILE__); + // If num_scales > 1 --> scaleRatios must be set + if (scaleRatios.size() != num) + error("The scale ratios size must be equal than the number of scales.", __LINE__, __FUNCTION__, __FILE__); + const auto maxScales = 10; + if (scaleRatios.size() > maxScales) + error("The maximum number of scales is " + std::to_string(maxScales) + ".", __LINE__, __FUNCTION__, __FILE__); + // Copy scaleRatios + T* scaleRatiosGpuPtr; + cudaMalloc((void**)&scaleRatiosGpuPtr, maxScales * sizeof(T)); + cudaMemcpy(scaleRatiosGpuPtr, scaleRatios.data(), scaleRatios.size() * sizeof(T), cudaMemcpyHostToDevice); + // Perform resize + merging const auto sourceNumOffset = channels * sourceChannelOffset; - for (auto c = 0; c < channels; c++) - resizeKernelAndMerge<<>>(targetPtr + c * targetChannelOffset, sourcePtr + c * sourceChannelOffset, sourceNumOffset, - num, scaleGap, sourceWidth, sourceHeight, targetWidth, targetHeight); + for (auto c = 0 ; c < channels ; c++) + resizeKernelAndMerge<<>>(targetPtr + c * targetChannelOffset, + sourcePtr + c * sourceChannelOffset, sourceNumOffset, + num, scaleRatiosGpuPtr, sourceWidth, sourceHeight, targetWidth, targetHeight); + // Free memory + cudaFree(scaleRatiosGpuPtr); } cudaCheck(__LINE__, __FUNCTION__, __FILE__); @@ -98,6 +121,8 @@ namespace op } } - template void resizeAndMergeGpu(float* targetPtr, const float* const sourcePtr, const std::array& targetSize, const std::array& sourceSize, const float scaleGap); - template void resizeAndMergeGpu(double* targetPtr, const double* const sourcePtr, const std::array& targetSize, const std::array& sourceSize, const double scaleGap); + template void resizeAndMergeGpu(float* targetPtr, const float* const sourcePtr, const std::array& targetSize, + const std::array& sourceSize, const std::vector& scaleRatios); + template void resizeAndMergeGpu(double* targetPtr, const double* const sourcePtr, const std::array& targetSize, + const std::array& sourceSize, const std::vector& scaleRatios); } diff --git a/src/openpose/core/resizeAndMergeCaffe.cpp b/src/openpose/core/resizeAndMergeCaffe.cpp index 2f30afc8421800bed5a5f7d53885027285e0aeb9..91a2658b95c4f0522a19ba6fe355858ac84cd125 100644 --- a/src/openpose/core/resizeAndMergeCaffe.cpp +++ b/src/openpose/core/resizeAndMergeCaffe.cpp @@ -8,7 +8,8 @@ namespace op { template - ResizeAndMergeCaffe::ResizeAndMergeCaffe() + ResizeAndMergeCaffe::ResizeAndMergeCaffe() : + mScaleRatios{1} { } @@ -29,7 +30,8 @@ namespace op } template - void ResizeAndMergeCaffe::Reshape(const std::vector*>& bottom, const std::vector*>& top, const float factor, const bool mergeFirstDimension) + void ResizeAndMergeCaffe::Reshape(const std::vector*>& bottom, const std::vector*>& top, + const float factor, const bool mergeFirstDimension) { try { @@ -54,11 +56,11 @@ namespace op } template - void ResizeAndMergeCaffe::setScaleGap(const T scaleGap) + void ResizeAndMergeCaffe::setScaleRatios(const std::vector& scaleRatios) { try { - mScaleGap = {scaleGap}; + mScaleRatios = {scaleRatios}; } catch (const std::exception& e) { @@ -71,7 +73,7 @@ namespace op { try { - resizeAndMergeCpu(top.at(0)->mutable_cpu_data(), bottom.at(0)->cpu_data(), mTopSize, mBottomSize, mScaleGap); + resizeAndMergeCpu(top.at(0)->mutable_cpu_data(), bottom.at(0)->cpu_data(), mTopSize, mBottomSize, mScaleRatios); } catch (const std::exception& e) { @@ -84,7 +86,7 @@ namespace op { try { - resizeAndMergeGpu(top.at(0)->mutable_gpu_data(), bottom.at(0)->gpu_data(), mTopSize, mBottomSize, mScaleGap); + resizeAndMergeGpu(top.at(0)->mutable_gpu_data(), bottom.at(0)->gpu_data(), mTopSize, mBottomSize, mScaleRatios); } catch (const std::exception& e) { @@ -93,7 +95,8 @@ namespace op } template - void ResizeAndMergeCaffe::Backward_cpu(const std::vector*>& top, const std::vector& propagate_down, const std::vector*>& bottom) + void ResizeAndMergeCaffe::Backward_cpu(const std::vector*>& top, const std::vector& propagate_down, + const std::vector*>& bottom) { try { @@ -109,7 +112,8 @@ namespace op } template - void ResizeAndMergeCaffe::Backward_gpu(const std::vector*>& top, const std::vector& propagate_down, const std::vector*>& bottom) + void ResizeAndMergeCaffe::Backward_gpu(const std::vector*>& top, const std::vector& propagate_down, + const std::vector*>& bottom) { try { diff --git a/src/openpose/pose/poseExtractorCaffe.cpp b/src/openpose/pose/poseExtractorCaffe.cpp index 2834ae618f00ada75f8342ee4d10beab4b7ecee5..11f9b91a77a0f05df80c8599fc74648b3856995a 100644 --- a/src/openpose/pose/poseExtractorCaffe.cpp +++ b/src/openpose/pose/poseExtractorCaffe.cpp @@ -11,9 +11,10 @@ namespace op { PoseExtractorCaffe::PoseExtractorCaffe(const Point& netInputSize, const Point& netOutputSize, const Point& outputSize, const int scaleNumber, - const float scaleGap, const PoseModel poseModel, const std::string& modelFolder, const int gpuId, const std::vector& heatMapTypes, + const PoseModel poseModel, const std::string& modelFolder, const int gpuId, const std::vector& heatMapTypes, const ScaleMode heatMapScale) : PoseExtractor{netOutputSize, outputSize, poseModel, heatMapTypes, heatMapScale}, + mResizeScale{mNetOutputSize.x / (float)netInputSize.x}, spNet{std::make_shared(std::array{scaleNumber, 3, (int)netInputSize.y, (int)netInputSize.x}, modelFolder + POSE_PROTOTXT[(int)poseModel], modelFolder + POSE_TRAINED_MODEL[(int)poseModel], gpuId)}, spResizeAndMergeCaffe{std::make_shared>()}, @@ -22,9 +23,10 @@ namespace op { try { - checkE(netOutputSize.x, netInputSize.x, "Net input and output size must be equal.", __LINE__, __FUNCTION__, __FILE__); - checkE(netOutputSize.y, netInputSize.y, "Net input and output size must be equal.", __LINE__, __FUNCTION__, __FILE__); - spResizeAndMergeCaffe->setScaleGap(scaleGap); + const auto resizeScale = mNetOutputSize.x / (float)netInputSize.x; + const auto resizeScaleCheck = resizeScale / (mNetOutputSize.y/(float)netInputSize.y); + if (1+1e-6 < resizeScaleCheck || resizeScaleCheck < 1-1e-6) + error("Net input and output size must be proportional. resizeScaleCheck = " + std::to_string(resizeScaleCheck), __LINE__, __FUNCTION__, __FILE__); } catch (const std::exception& e) { @@ -49,7 +51,7 @@ namespace op // HeatMaps extractor blob and layer spHeatMapsBlob = {std::make_shared>(1,1,1,1)}; - spResizeAndMergeCaffe->Reshape({spCaffeNetOutputBlob.get()}, {spHeatMapsBlob.get()}, POSE_CCN_DECREASE_FACTOR[(int)mPoseModel]); + spResizeAndMergeCaffe->Reshape({spCaffeNetOutputBlob.get()}, {spHeatMapsBlob.get()}, mResizeScale * POSE_CCN_DECREASE_FACTOR[(int)mPoseModel]); cudaCheck(__LINE__, __FUNCTION__, __FILE__); // Pose extractor blob and layer @@ -71,7 +73,7 @@ namespace op } } - void PoseExtractorCaffe::forwardPass(const Array& inputNetData, const Point& inputDataSize) + void PoseExtractorCaffe::forwardPass(const Array& inputNetData, const Point& inputDataSize, const std::vector& scaleRatios) { try { @@ -83,6 +85,7 @@ namespace op spNet->forwardPass(inputNetData.getConstPtr()); // ~79.3836ms // 2. Resize heat maps + merge different scales + spResizeAndMergeCaffe->setScaleRatios(scaleRatios); #ifndef CPU_ONLY spResizeAndMergeCaffe->Forward_gpu({spCaffeNetOutputBlob.get()}, {spHeatMapsBlob.get()}); // ~5ms cudaCheck(__LINE__, __FUNCTION__, __FILE__); diff --git a/src/openpose/pose/poseRenderGpu.cu b/src/openpose/pose/poseRenderGpu.cu index 7a5cea0cee7de65eede0d0bef02228bc6222ab6b..b01c7be914c90a5219d94748632c5e1a01392a33 100644 --- a/src/openpose/pose/poseRenderGpu.cu +++ b/src/openpose/pose/poseRenderGpu.cu @@ -244,7 +244,7 @@ namespace op const auto ySource = (y + 0.5f) / scaleToKeepRatio - 0.5f; const auto heatMapOffset = part * widthHeatMap * heightHeatMap; const auto* const heatMapPtrOffsetted = heatMapPtr + heatMapOffset; - const auto interpolatedValue = cubicResize(heatMapPtrOffsetted, xSource, ySource, widthHeatMap, heightHeatMap, widthHeatMap); + const auto interpolatedValue = bicubicInterpolate(heatMapPtrOffsetted, xSource, ySource, widthHeatMap, heightHeatMap, widthHeatMap); float rgbColor[3]; getColorHeatMap(rgbColor, interpolatedValue, 0.f, 1.f);