Improved 3d triangulation

fcf9ab8e · Gines Hidalgo · a9ede03f · fcf9ab8e · fcf9ab8e · fcf9ab8e
10 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -554,7 +554,7 @@ if (WIN32)
    download_zip("caffe_cpu_2018_05_27.zip" ${OP_WIN_URL} ${FIND_LIB_PREFIX} 87E8401B6DFBAC5B8E909DD20E3B3390)
  else (${GPU_MODE} MATCHES "OPENCL")
    # download_zip("caffe_2019_03_12.zip" ${OP_WIN_URL} ${FIND_LIB_PREFIX} 859a592310f0928fd4f40da1456a217f)
-    download_zip("caffe_15_2019_05_16.zip" ${OP_WIN_URL} ${FIND_LIB_PREFIX} f2d8beee9e4959fa419dcb54d507107c)
+    download_zip("caffe_15_2019_05_16.zip" ${OP_WIN_URL} ${FIND_LIB_PREFIX} 12F7A675A3276CD33C76B37D21C2F421)
  endif (${GPU_MODE} MATCHES "OPENCL")
  if (WITH_3D_RENDERER)
    download_zip("freeglut_2018_01_14.zip" ${OP_WIN_URL} ${FIND_LIB_PREFIX} BB182187285E06880F0EDE3A39530091)

--- a/doc/demo_overview.md
+++ b/doc/demo_overview.md
@@ -197,7 +197,7 @@ Each flag is divided into flag name, default value, and description.

 8. OpenPose 3-D Reconstruction
 - DEFINE_bool(3d,                         false,          "Running OpenPose 3-D reconstruction demo: 1) Reading from a stereo camera system. 2) Performing 3-D reconstruction from the multiple views. 3) Displaying 3-D reconstruction results. Note that it will only display 1 person. If multiple people is present, it will fail.");
- DEFINE_int32(3d_min_views,              -1,             "Minimum number of views required to reconstruct each keypoint. By default (-1), it will require all the cameras to see the keypoint in order to reconstruct it.");
+- DEFINE_int32(3d_min_views,              -1,             "Minimum number of views required to reconstruct each keypoint. By default (-1), it will require max(2, min(4, #cameras-1)) cameras to see the keypoint in order to reconstruct it.");
 - DEFINE_int32(3d_views,                  -1,             "Complementary option for `--image_dir` or `--video`. OpenPose will read as many images per iteration, allowing tasks such as stereo camera processing (`--3d`). Note that `--camera_parameter_path` must be set. OpenPose must find as many `xml` files in the parameter folder as this number indicates.");

 9. Extra algorithms

--- a/doc/quick_start.md
+++ b/doc/quick_start.md
@@ -165,11 +165,13 @@ build\x64\Release\OpenPoseDemo.exe --flir_camera --3d --number_people_max 1 --fa
 ./build/examples/openpose/openpose.bin --image_dir output_folder_path/ --3d_views 3 --3d --number_people_max 1 --output_resolution {desired_output_resolution}
 ```

-5. Reconstruction when at least n visible views
+5. Reconstruction when the keypoint is visible in at least `x` camera views out of the total `n` cameras
 ```
 # Ubuntu and Mac (same flags for Windows version)
-# Assuming >=2 cameras and reconstruction when at least 2 visible views
+# Reconstruction when a keypoint is visible in at least 2 camera views (assuming `n` >= 2)
 ./build/examples/openpose/openpose.bin --flir_camera --3d --number_people_max 1 --3d_min_views 2 --output_resolution {desired_output_resolution}
+# Reconstruction when a keypoint is visible in at least max(2, min(4, n-1)) camera views
+./build/examples/openpose/openpose.bin --flir_camera --3d --number_people_max 1 --output_resolution {desired_output_resolution}
 ```



--- a/doc/release_notes.md
+++ b/doc/release_notes.md
@@ -367,8 +367,12 @@ OpenPose Library - Release Notes

 ## Current version - future OpenPose 1.5.1
 1. Main improvements:
+    1. Highly improved 3D triangulation for >3 cameras by fixing some small bugs.
 2. Functions or parameters renamed:
+    1. `--3d_min_views` default value (-1) no longer means that all camera views are required. Instead, it will be equal to max(2, min(4, #cameras-1)). This should provide a good trade-off between recall and precission.
 3. Main bugs fixed:
+    1. Windows: Added back support for OpenGL and Spinnaker, as well as DLLs for debug compilation.
+4. Changes/additions that affect the compatibility with the OpenPose Unity Plugin:




--- a/include/openpose/3d/poseTriangulation.hpp
+++ b/include/openpose/3d/poseTriangulation.hpp
@@ -33,13 +33,13 @@ namespace op

        void initializationOnThread();

-        Array<float> reconstructArray(const std::vector<Array<float>>& keypointsVector,
-                                      const std::vector<cv::Mat>& cameraMatrices,
-                                      const std::vector<Point<int>>& imageSizes) const;
+        Array<float> reconstructArray(
+            const std::vector<Array<float>>& keypointsVector, const std::vector<cv::Mat>& cameraMatrices,
+            const std::vector<Point<int>>& imageSizes) const;

-        std::vector<Array<float>> reconstructArray(const std::vector<std::vector<Array<float>>>& keypointsVector,
-                                                   const std::vector<cv::Mat>& cameraMatrices,
-                                                   const std::vector<Point<int>>& imageSizes) const;
+        std::vector<Array<float>> reconstructArray(
+            const std::vector<std::vector<Array<float>>>& keypointsVector, const std::vector<cv::Mat>& cameraMatrices,
+            const std::vector<Point<int>>& imageSizes) const;

    private:
        const int mMinViews3d;

--- a/include/openpose/flags.hpp
+++ b/include/openpose/flags.hpp
@@ -160,7 +160,8 @@ DEFINE_bool(3d,                         false,          "Running OpenPose 3-D re
                                                        " results. Note that it will only display 1 person. If multiple people is present, it will"
                                                        " fail.");
 DEFINE_int32(3d_min_views,              -1,             "Minimum number of views required to reconstruct each keypoint. By default (-1), it will"
-                                                        " require all the cameras to see the keypoint in order to reconstruct it.");
+                                                        " require max(2, min(4, #cameras-1)) cameras to see the keypoint in order to reconstruct"
+                                                        " it.");
 DEFINE_int32(3d_views,                  -1,             "Complementary option for `--image_dir` or `--video`. OpenPose will read as many images per"
                                                        " iteration, allowing tasks such as stereo camera processing (`--3d`). Note that"
                                                        " `--camera_parameter_path` must be set. OpenPose must find as many `xml` files in the"

--- a/include/openpose/wrapper/wrapperStructExtra.hpp
+++ b/include/openpose/wrapper/wrapperStructExtra.hpp
@@ -22,7 +22,8 @@ namespace op

        /**
         * Minimum number of views required to reconstruct each keypoint.
-         * By default (-1), it will require all the cameras to see the keypoint in order to reconstruct it.
+         * By default (-1), it will require max(2, min(4, #cameras-1)) cameras to see the keypoint in order to
+         * reconstruct it.
         */
        int minViews3d;


--- a/include/openpose/wrapper/wrapperStructOutput.hpp
+++ b/include/openpose/wrapper/wrapperStructOutput.hpp
@@ -152,12 +152,12 @@ namespace op
            const DataFormat writeKeypointFormat = DataFormat::Xml, const std::string& writeJson = "",
            const std::string& writeCocoJson = "", const int writeCocoJsonVariants = 1,
            const int writeCocoJsonVariant = 1, const std::string& writeImages = "",
-            const std::string& writeImagesFormat = "", const std::string& writeVideo = "",
+            const std::string& writeImagesFormat = "png", const std::string& writeVideo = "",
            const double writeVideoFps = -1., const bool writeVideoWithAudio = false,
-            const std::string& writeHeatMaps = "", const std::string& writeHeatMapsFormat = "",
+            const std::string& writeHeatMaps = "", const std::string& writeHeatMapsFormat = "png",
            const std::string& writeVideo3D = "", const std::string& writeVideoAdam = "",
            const std::string& writeBvh = "", const std::string& udpHost = "",
-            const std::string& udpPort = "");
+            const std::string& udpPort = "8051");
    };
 }


--- a/include/openpose/wrapper/wrapperStructPose.hpp
+++ b/include/openpose/wrapper/wrapperStructPose.hpp
@@ -209,15 +209,15 @@ namespace op
         * Since all the elements of the struct are public, they can also be manually filled.
         */
        WrapperStructPose(
-            const PoseMode poseMode = PoseMode::Enabled, const Point<int>& netInputSize = Point<int>{656, 368},
+            const PoseMode poseMode = PoseMode::Enabled, const Point<int>& netInputSize = Point<int>{-1, 368},
            const Point<int>& outputSize = Point<int>{-1, -1},
            const ScaleMode keypointScaleMode = ScaleMode::InputResolution, const int gpuNumber = -1,
-            const int gpuNumberStart = 0, const int scalesNumber = 1, const float scaleGap = 0.15f,
+            const int gpuNumberStart = 0, const int scalesNumber = 1, const float scaleGap = 0.25f,
            const RenderMode renderMode = RenderMode::Auto, const PoseModel poseModel = PoseModel::BODY_25,
            const bool blendOriginalFrame = true, const float alphaKeypoint = POSE_DEFAULT_ALPHA_KEYPOINT,
            const float alphaHeatMap = POSE_DEFAULT_ALPHA_HEAT_MAP, const int defaultPartToRender = 0,
            const std::string& modelFolder = "models/", const std::vector<HeatMapType>& heatMapTypes = {},
-            const ScaleMode heatMapScaleMode = ScaleMode::ZeroToOne, const bool addPartCandidates = false,
+            const ScaleMode heatMapScaleMode = ScaleMode::UnsignedChar, const bool addPartCandidates = false,
            const float renderThreshold = 0.05f, const int numberPeopleMax = -1, const bool maximizePositives = false,
            const double fpsMax = -1., const std::string& protoTxtPath = "", const std::string& caffeModelPath = "",
            const float upsamplingRatio = 0.f, const bool enableGoogleLogging = true);

--- a/src/openpose/3d/poseTriangulation.cpp
+++ b/src/openpose/3d/poseTriangulation.cpp
@@ -4,6 +4,7 @@
    #include <ceres/rotation.h>
 #endif
 #include <opencv2/calib3d/calib3d.hpp>
+#include <openpose/utilities/fastMath.hpp>
 #include <openpose/3d/poseTriangulation.hpp>

 namespace op
@@ -159,6 +160,16 @@ namespace op
    {
        try
        {
+            // Warning
+            if (cameraMatrices.size() >= 8)
+            {
+                error("We did not have that many camera views to test the 3D triangulation code, so it might not"
+                      " give the desired results here. But we would love to help! Please, share your video/images"
+                      " so we can test our code with them and guarantee the desired results even for >= 8"
+                      " cameras! Feel free to email them to gines@alumni.cmu.edu.",
+                      __LINE__, __FUNCTION__, __FILE__);
+            }
+
            // Information for 3 cameras:
            //     - Speed: triangulate ~0.01 ms vs. optimization ~0.2 ms
            //     - Accuracy: initial reprojection error ~14-21, reduced ~5% with non-linear optimization
@@ -178,8 +189,10 @@ namespace op
                && projectionError > 0.5 * reprojectionMaxAcceptable
                /*&& projectionError < 1.5 * reprojectionMaxAcceptable*/)
            {
+                // Find best projection
                auto bestReprojection = projectionError;
                auto bestReprojectionIndex = -1; // -1 means with all camera views
+                cv::Mat bestReconstructedPoint;
                for (auto i = 0u; i < cameraMatrices.size(); ++i)
                {
                    // Set initial values
@@ -188,31 +201,29 @@ namespace op
                    // Remove camera i
                    cameraMatricesSubset.erase(cameraMatricesSubset.begin() + i);
                    pointsOnEachCameraSubset.erase(pointsOnEachCameraSubset.begin() + i);
-                    // Remove camera i
+                    // Get new triangulation results
+                    cv::Mat reconstructedPointSubset;
                    const auto projectionErrorSubset = triangulate(
-                        reconstructedPoint, cameraMatricesSubset, pointsOnEachCameraSubset);
-                    // If projection doesn't change much, it usually means all points are bad.
-                    if (projectionErrorSubset > 0.9 * projectionError
-                        && projectionErrorSubset < 1.1 * projectionError)
-                    {
-                        bestReprojectionIndex = -1;
-                        continue;
-                    }
-                    // Save maximum
-                    if (bestReprojection > projectionErrorSubset)
+                        reconstructedPointSubset, cameraMatricesSubset, pointsOnEachCameraSubset);
+                    // If projection doesn't change much, this point is inlier (or all points are bad)
+                    // Thus, save new best results only if considerably better
+                    if (bestReprojection > projectionErrorSubset && projectionErrorSubset < 0.9*projectionError)
                    {
                        bestReprojection = projectionErrorSubset;
                        bestReprojectionIndex = i;
+                        bestReconstructedPoint = reconstructedPointSubset;
                    }
                }
-
-                if (bestReprojectionIndex != -1 && bestReprojection < 0.5 * reprojectionMaxAcceptable)
+                // Remove noisy camera
+                if (bestReprojectionIndex != -1) // && bestReprojection < 0.5 * reprojectionMaxAcceptable)
                {
                    // Remove camera i
                    cameraMatricesFinal.erase(cameraMatricesFinal.begin() + bestReprojectionIndex);
                    pointsOnEachCameraFinal.erase(pointsOnEachCameraFinal.begin() + bestReprojectionIndex);
+                    // Update reconstructedPoint & projectionError
+                    reconstructedPoint = bestReconstructedPoint;
+                    projectionError = bestReprojection;
                }
-		projectionError = bestReprojection; // updates the projection error with the best found after camera removal
            }

            #ifdef USE_CERES
@@ -324,11 +335,9 @@ namespace op
        }
    }

-    void reconstructArrayThread(Array<float>* keypoints3DPtr,
-                                const std::vector<Array<float>>& keypointsVector,
-                                const std::vector<cv::Mat>& cameraMatrices,
-                                const std::vector<Point<int>>& imageSizes,
-                                const int minViews3d)
+    bool reconstructArrayThread(
+        Array<float>* keypoints3DPtr, const std::vector<Array<float>>& keypointsVector,
+        const std::vector<cv::Mat>& cameraMatrices, const std::vector<Point<int>>& imageSizes, const int minViews3d)
    {
        try
        {
@@ -339,21 +348,26 @@ namespace op
                error("Only 1 camera detected. The 3-D reconstruction module can only be used with > 1 cameras"
                      " simultaneously. E.g., using FLIR stereo cameras (`--flir_camera`).",
                      __LINE__, __FUNCTION__, __FILE__);
-            // Get number body parts
-            auto detectionMissed = false;
-            for (auto& keypoints : keypointsVector)
+            // Get number body parts and whether at least 2 cameras have keypoints
+            auto detectionMissed = 0;
+            auto numberBodyParts = 0;
+            auto channel0Length = 0;
+            for (const auto& keypoints : keypointsVector)
            {
-                if (keypoints.empty())
+                if (!keypoints.empty())
                {
-                    detectionMissed = true;
-                    break;
+                    detectionMissed++;
+                    if (detectionMissed > 1)
+                    {
+                        numberBodyParts = keypoints.getSize(1);
+                        channel0Length = keypoints.getSize(2);
+                        break;
+                    }
                }
            }
-            // If at least one keypoints element not empty
-            if (!detectionMissed)
+            // If at least 2 set of keypoints not empty
+            if (numberBodyParts > 0)
            {
-                const auto numberBodyParts = keypointsVector.at(0).getSize(1);
-                const auto channel0Length = keypointsVector.at(0).getSize(2);
                // Create x-y vector from high score results
                std::vector<int> indexesUsed;
                std::vector<std::vector<cv::Point2d>> xyPoints;
@@ -371,15 +385,19 @@ namespace op
                        const auto& keypoints = keypointsVector[i];
                        if (isValidKeypoint(&keypoints[baseIndex], imageSizes[i]))
                        {
-                            xyPointsElement.emplace_back(cv::Point2d{keypoints[baseIndex],
-                                                                     keypoints[baseIndex+1]});
+                            xyPointsElement.emplace_back(
+                                cv::Point2d{keypoints[baseIndex], keypoints[baseIndex+1]});
                            cameraMatricesElement.emplace_back(cameraMatrices[i]);
                        }
                    }
-                    // If visible from all views (minViews3d < 0)
-                    // or if visible for at least minViews3d views
-                    if ((minViews3d < 0 && cameraMatricesElement.size() == cameraMatrices.size())
-                        || (minViews3d > 1 && minViews3d <= (int)xyPointsElement.size()))
+                    const auto minViews3dValue = (minViews3d > 0 ? minViews3d
+                        : fastMax(2u, fastMin(4u, (unsigned int)cameraMatrices.size()-1u)));
+                    // If visible for at least minViews3dValue views
+                    if (minViews3dValue <= xyPointsElement.size())
+                    // Old code
+                    // // If visible from all views (minViews3d < 0) or if visible for at least minViews3d views
+                    // if ((minViews3d < 0 && cameraMatricesElement.size() == cameraMatrices.size())
+                    //     || (minViews3d > 1 && minViews3d <= (int)xyPointsElement.size()))
                    {
                        indexesUsed.emplace_back(part);
                        xyPoints.emplace_back(xyPointsElement);
@@ -432,7 +450,8 @@ namespace op
                            atLeastOnePointProjected = true;
                        }
                    }
-                    if (!atLeastOnePointProjected || reprojectionErrorTotal > 60)
+                    // Warning
+                    if (reprojectionErrorTotal > 60)
                        log("Unusual high re-projection error (averaged over #keypoints) of value "
                            + std::to_string(reprojectionErrorTotal) + " pixels, while the average for a good OpenPose"
                            " detection from 4 cameras is about 2-3 pixels. It might be simply a wrong OpenPose"
@@ -441,12 +460,18 @@ namespace op
                            " upper triangular matrix (as specified in the OpenPose doc/modules/calibration_module.md"
                            " and 3d_reconstruction_module.md)?", Priority::High);
                    // log("Reprojection error: " + std::to_string(reprojectionErrorTotal)); // To debug reprojection error
+                    return atLeastOnePointProjected;
                }
+                return false;
            }
+            // Keypoints in < 2 images
+            else
+                return true; // True because it is not a 3D problem but rather it is e.g., a scene without people on it
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+            return false;
        }
    }

@@ -474,9 +499,9 @@ namespace op
    {
    }

-    Array<float> PoseTriangulation::reconstructArray(const std::vector<Array<float>>& keypointsVector,
-                                                     const std::vector<cv::Mat>& cameraMatrices,
-                                                     const std::vector<Point<int>>& imageSizes) const
+    Array<float> PoseTriangulation::reconstructArray(
+        const std::vector<Array<float>>& keypointsVector, const std::vector<cv::Mat>& cameraMatrices,
+        const std::vector<Point<int>>& imageSizes) const
    {
        try
        {
@@ -497,6 +522,7 @@ namespace op
    {
        try
        {
+            bool keypointsReconstructed = false;
            std::vector<Array<float>> keypoints3Ds(keypointsVectors.size());
            // std::vector<std::thread> threads(keypointsVectors.size()-1);
            for (auto i = 0u; i < keypointsVectors.size()-1; i++)
@@ -507,15 +533,22 @@ namespace op
                //                             &keypoints3Ds[i], keypointsVectors[i], cameraMatrices,
                //                             imageSizes, mMinViews3d};
                // Single-thread option
-                reconstructArrayThread(&keypoints3Ds[i], keypointsVectors[i], cameraMatrices,
-                                       imageSizes, mMinViews3d);
+                keypointsReconstructed |= reconstructArrayThread(
+                    &keypoints3Ds[i], keypointsVectors[i], cameraMatrices, imageSizes, mMinViews3d);
            }
-            reconstructArrayThread(&keypoints3Ds.back(), keypointsVectors.back(), cameraMatrices,
-                                   imageSizes, mMinViews3d);
+            keypointsReconstructed |= reconstructArrayThread(
+                &keypoints3Ds.back(), keypointsVectors.back(), cameraMatrices, imageSizes, mMinViews3d);
            // // Close threads
            // for (auto& thread : threads)
            //     if (thread.joinable())
            //         thread.join();
+            // Warning
+            if (!keypointsReconstructed)
+                log("No keypoints were reconstructed on this frame. It might be simply a challenging frame."
+                    " However, if this message appears frequently, OpenPose is facing some unknown issue,"
+                    " mabe the calibration parameters are not accurate. Feel free to open a GitHub issue"
+                    " (remember to fill all the required information detailed in the GitHub issue template"
+                    " when it is created).", Priority::High);
            // Return results
            return keypoints3Ds;
        }