diff --git a/doc/tutorials/features2d/akaze_matching/akaze_matching.rst b/doc/tutorials/features2d/akaze_matching/akaze_matching.rst index 3fe5df4f6208d0cff9f3da3df28a68d8400cb8f1..d1b51858dd9cd171dbdf062d2d21ae4de67129d4 100644 --- a/doc/tutorials/features2d/akaze_matching/akaze_matching.rst +++ b/doc/tutorials/features2d/akaze_matching/akaze_matching.rst @@ -46,7 +46,7 @@ Source Code Explanation =========== -1. **Load images and homography** +#. **Load images and homography** .. code-block:: cpp @@ -59,7 +59,7 @@ Explanation We are loading grayscale images here. Homography is stored in the xml created with FileStorage. -2. **Detect keypoints and compute descriptors using AKAZE** +#. **Detect keypoints and compute descriptors using AKAZE** .. code-block:: cpp @@ -72,7 +72,7 @@ Explanation We create AKAZE object and use it's *operator()* functionality. Since we don't need the *mask* parameter, *noArray()* is used. -3. **Use brute-force matcher to find 2-nn matches** +#. **Use brute-force matcher to find 2-nn matches** .. code-block:: cpp @@ -82,7 +82,7 @@ Explanation We use Hamming distance, because AKAZE uses binary descriptor by default. -4. **Use 2-nn matches to find correct keypoint matches** +#. **Use 2-nn matches to find correct keypoint matches** .. code-block:: cpp @@ -99,7 +99,7 @@ Explanation If the closest match is *ratio* closer than the second closest one, then the match is correct. -5. **Check if our matches fit in the homography model** +#. **Check if our matches fit in the homography model** .. code-block:: cpp @@ -125,7 +125,7 @@ Explanation We create a new set of matches for the inliers, because it is required by the drawing function. -6. **Output results** +#. **Output results** .. code-block:: cpp @@ -150,12 +150,10 @@ Found matches A-KAZE Matching Results -------------------------- -Keypoints 1: 2943 -Keypoints 2: 3511 - -Matches: 447 - -Inliers: 308 - -Inliers Ratio: 0.689038 + ::code-block:: none + Keypoints 1: 2943 + Keypoints 2: 3511 + Matches: 447 + Inliers: 308 + Inlier Ratio: 0.689038 diff --git a/doc/tutorials/features2d/akaze_tracking/akaze_tracking.rst b/doc/tutorials/features2d/akaze_tracking/akaze_tracking.rst new file mode 100644 index 0000000000000000000000000000000000000000..9d45429e8726b6a939fed98a0043bda71ce61c71 --- /dev/null +++ b/doc/tutorials/features2d/akaze_tracking/akaze_tracking.rst @@ -0,0 +1,155 @@ +.. _akazeTracking: + + +AKAZE and ORB planar tracking +****************************** + +Introduction +------------------ + +In this tutorial we will compare *AKAZE* and *ORB* local features +using them to find matches between video frames and track object movements. + +The algorithm is as follows: + +* Detect and describe keypoints on the first frame, manually set object boundaries +* For every next frame: + + #. Detect and describe keypoints + #. Match them using bruteforce matcher + #. Estimate homography transformation using RANSAC + #. Filter inliers from all the matches + #. Apply homography transformation to the bounding box to find the object + #. Draw bounding box and inliers, compute inlier ratio as evaluation metric + +.. image:: images/frame.png + :height: 480pt + :width: 640pt + :alt: Result frame example + :align: center + +Data +=========== +To do the tracking we need a video and object position on the first frame. + +You can download our example video and data from `here `_. + +To run the code you have to specify input and output video path and object bounding box. + +.. code-block:: none + + ./planar_tracking blais.mp4 result.avi blais_bb.xml.gz + +Source Code +=========== +.. literalinclude:: ../../../../samples/cpp/tutorial_code/features2D/AKAZE_tracking/planar_tracking.cpp + :language: cpp + :linenos: + :tab-width: 4 + +Explanation +=========== + +Tracker class +-------------- + + This class implements algorithm described abobve + using given feature detector and descriptor matcher. + +* **Setting up the first frame** + + .. code-block:: cpp + + void Tracker::setFirstFrame(const Mat frame, vector bb, string title, Stats& stats) + { + first_frame = frame.clone(); + (*detector)(first_frame, noArray(), first_kp, first_desc); + stats.keypoints = (int)first_kp.size(); + drawBoundingBox(first_frame, bb); + putText(first_frame, title, Point(0, 60), FONT_HERSHEY_PLAIN, 5, Scalar::all(0), 4); + object_bb = bb; + } + + We compute and store keypoints and descriptors from the first frame and prepare it for the output. + + We need to save number of detected keypoints to make sure both detectors locate roughly the same number of those. + +* **Processing frames** + + #. Locate keypoints and compute descriptors + + .. code-block:: cpp + + (*detector)(frame, noArray(), kp, desc); + + To find matches between frames we have to locate the keypoints first. + + In this tutorial detectors are set up to find about 1000 keypoints on each frame. + + #. Use 2-nn matcher to find correspondences + + .. code-block:: cpp + + matcher->knnMatch(first_desc, desc, matches, 2); + for(unsigned i = 0; i < matches.size(); i++) { + if(matches[i][0].distance < nn_match_ratio * matches[i][1].distance) { + matched1.push_back(first_kp[matches[i][0].queryIdx]); + matched2.push_back( kp[matches[i][0].trainIdx]); + } + } + + If the closest match is *nn_match_ratio* closer than the second closest one, then it's a match. + + 2. Use *RANSAC* to estimate homography transformation + + .. code-block:: cpp + + homography = findHomography(Points(matched1), Points(matched2), + RANSAC, ransac_thresh, inlier_mask); + + If there are at least 4 matches we can use random sample consensus to estimate image transformation. + + 3. Save the inliers + + .. code-block:: cpp + + for(unsigned i = 0; i < matched1.size(); i++) { + if(inlier_mask.at(i)) { + int new_i = static_cast(inliers1.size()); + inliers1.push_back(matched1[i]); + inliers2.push_back(matched2[i]); + inlier_matches.push_back(DMatch(new_i, new_i, 0)); + } + } + + Since *findHomography* computes the inliers we only have to save the chosen points and matches. + + 4. Project object bounding box + + .. code-block:: cpp + + perspectiveTransform(object_bb, new_bb, homography); + + If there is a reasonable number of inliers we can use estimated transformation to locate the object. + +Results +======= +You can watch the resulting `video on youtube `_. + +*AKAZE* statistics: + + .. code-block:: none + + Matches 626 + Inliers 410 + Inlier ratio 0.58 + Keypoints 1117 + +*ORB* statistics: + + .. code-block:: none + + Matches 504 + Inliers 319 + Inlier ratio 0.56 + Keypoints 1112 diff --git a/doc/tutorials/features2d/akaze_tracking/images/frame.png b/doc/tutorials/features2d/akaze_tracking/images/frame.png new file mode 100644 index 0000000000000000000000000000000000000000..1775a96ede9b3887f6acd7dd78110a3961e95b2b Binary files /dev/null and b/doc/tutorials/features2d/akaze_tracking/images/frame.png differ diff --git a/doc/tutorials/features2d/table_of_content_features2d/images/AKAZE_Tracking_Tutorial_Cover.png b/doc/tutorials/features2d/table_of_content_features2d/images/AKAZE_Tracking_Tutorial_Cover.png new file mode 100644 index 0000000000000000000000000000000000000000..bb3272c96bb7dc4e6e4e7290127274a410df49c8 Binary files /dev/null and b/doc/tutorials/features2d/table_of_content_features2d/images/AKAZE_Tracking_Tutorial_Cover.png differ diff --git a/doc/tutorials/features2d/table_of_content_features2d/table_of_content_features2d.rst b/doc/tutorials/features2d/table_of_content_features2d/table_of_content_features2d.rst index bb79ca32f9ef31d4aa7c20ff4ddd3e0e429a7c27..50b684cf561b804713382d74f428876dc71a6349 100644 --- a/doc/tutorials/features2d/table_of_content_features2d/table_of_content_features2d.rst +++ b/doc/tutorials/features2d/table_of_content_features2d/table_of_content_features2d.rst @@ -194,7 +194,7 @@ Learn about how to use the feature points detectors, descriptors and matching f *Author:* Fedor Morozov - Use *AKAZE* local features to find correspondence between two images. + Using *AKAZE* local features to find correspondence between two images. ===================== ============================================== @@ -202,6 +202,21 @@ Learn about how to use the feature points detectors, descriptors and matching f :height: 90pt :width: 90pt + ===================== ============================================== + |AkazeTracking| **Title:** :ref:`akazeTracking` + + *Compatibility:* > OpenCV 3.0 + + *Author:* Fedor Morozov + + Using *AKAZE* and *ORB* for planar object tracking. + + ===================== ============================================== + + .. |AkazeTracking| image:: images/AKAZE_Tracking_Tutorial_Cover.png + :height: 90pt + :width: 90pt + .. raw:: latex \pagebreak @@ -221,3 +236,4 @@ Learn about how to use the feature points detectors, descriptors and matching f ../feature_homography/feature_homography ../detection_of_planar_objects/detection_of_planar_objects ../akaze_matching/akaze_matching + ../akaze_tracking/akaze_tracking diff --git a/modules/features2d/doc/feature_detection_and_description.rst b/modules/features2d/doc/feature_detection_and_description.rst index ab7cdecf96b75414748a6f58aeda716b542b6a99..de024a41c74852da7a581530e1621dc07d918677 100644 --- a/modules/features2d/doc/feature_detection_and_description.rst +++ b/modules/features2d/doc/feature_detection_and_description.rst @@ -226,7 +226,7 @@ Class implementing the AKAZE keypoint detector and descriptor extractor, describ float threshold = 0.001f, int octaves = 4, int sublevels = 4, int diffusivity = DIFF_PM_G2); }; -.. note:: AKAZE descriptor can only be used with KAZE or AKAZE keypoints +.. note:: AKAZE descriptors can only be used with KAZE or AKAZE keypoints. Try to avoid using *extract* and *detect* instead of *operator()* due to performance reasons. .. [ANB13] Fast Explicit Diffusion for Accelerated Features in Nonlinear Scale Spaces. Pablo F. Alcantarilla, Jesús Nuevo and Adrien Bartoli. In British Machine Vision Conference (BMVC), Bristol, UK, September 2013. @@ -249,4 +249,4 @@ SIFT .. ocv:class:: SIFT : public Feature2D -The SIFT algorithm has been moved to opencv_contrib/xfeatures2d module. \ No newline at end of file +The SIFT algorithm has been moved to opencv_contrib/xfeatures2d module. diff --git a/modules/features2d/src/akaze.cpp b/modules/features2d/src/akaze.cpp index 1d09d061587e08c89e44b222eb840b8b6f1cd04a..d875b464411efb6764f6587e4b01695c51ed5172 100644 --- a/modules/features2d/src/akaze.cpp +++ b/modules/features2d/src/akaze.cpp @@ -209,6 +209,10 @@ namespace cv options.descriptor_size = descriptor_size; options.img_width = img.cols; options.img_height = img.rows; + options.dthreshold = threshold; + options.omax = octaves; + options.nsublevels = sublevels; + options.diffusivity = diffusivity; AKAZEFeatures impl(options); impl.Create_Nonlinear_Scale_Space(img1_32); @@ -237,6 +241,10 @@ namespace cv options.descriptor_size = descriptor_size; options.img_width = img.cols; options.img_height = img.rows; + options.dthreshold = threshold; + options.omax = octaves; + options.nsublevels = sublevels; + options.diffusivity = diffusivity; AKAZEFeatures impl(options); impl.Create_Nonlinear_Scale_Space(img1_32); diff --git a/modules/features2d/src/features2d_init.cpp b/modules/features2d/src/features2d_init.cpp index 470cb3c54e72ef4a8e94b7bbae0b577bffa16f52..32cdbfee81bff56bcbb577d89f55513bca87594a 100644 --- a/modules/features2d/src/features2d_init.cpp +++ b/modules/features2d/src/features2d_init.cpp @@ -106,14 +106,22 @@ CV_INIT_ALGORITHM(GFTTDetector, "Feature2D.GFTT", CV_INIT_ALGORITHM(KAZE, "Feature2D.KAZE", obj.info()->addParam(obj, "upright", obj.upright); - obj.info()->addParam(obj, "extended", obj.extended)) + obj.info()->addParam(obj, "extended", obj.extended); + obj.info()->addParam(obj, "threshold", obj.threshold); + obj.info()->addParam(obj, "octaves", obj.octaves); + obj.info()->addParam(obj, "sublevels", obj.sublevels); + obj.info()->addParam(obj, "diffusivity", obj.diffusivity)) /////////////////////////////////////////////////////////////////////////////////////////////////////////// CV_INIT_ALGORITHM(AKAZE, "Feature2D.AKAZE", - obj.info()->addParam(obj, "descriptor_channels", obj.descriptor_channels); obj.info()->addParam(obj, "descriptor", obj.descriptor); - obj.info()->addParam(obj, "descriptor_size", obj.descriptor_size)) + obj.info()->addParam(obj, "descriptor_channels", obj.descriptor_channels); + obj.info()->addParam(obj, "descriptor_size", obj.descriptor_size); + obj.info()->addParam(obj, "threshold", obj.threshold); + obj.info()->addParam(obj, "octaves", obj.octaves); + obj.info()->addParam(obj, "sublevels", obj.sublevels); + obj.info()->addParam(obj, "diffusivity", obj.diffusivity)) /////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -183,4 +191,4 @@ bool cv::initModule_features2d(void) all &= !FlannBasedMatcher_info_auto.name().empty(); return all; -} \ No newline at end of file +} diff --git a/modules/features2d/src/kaze.cpp b/modules/features2d/src/kaze.cpp index 910ec271f6b4528de7e9cabad8167bc8ed86f4fd..e4ddbe44513374e21bde7c54db3d803d3fc4cfcd 100644 --- a/modules/features2d/src/kaze.cpp +++ b/modules/features2d/src/kaze.cpp @@ -158,6 +158,10 @@ namespace cv options.img_height = img.rows; options.extended = extended; options.upright = upright; + options.dthreshold = threshold; + options.omax = octaves; + options.nsublevels = sublevels; + options.diffusivity = diffusivity; KAZEFeatures impl(options); impl.Create_Nonlinear_Scale_Space(img1_32); @@ -185,6 +189,10 @@ namespace cv options.img_height = img.rows; options.extended = extended; options.upright = upright; + options.dthreshold = threshold; + options.omax = octaves; + options.nsublevels = sublevels; + options.diffusivity = diffusivity; KAZEFeatures impl(options); impl.Create_Nonlinear_Scale_Space(img1_32); diff --git a/samples/cpp/tutorial_code/features2D/AKAZE_tracking/planar_tracking.cpp b/samples/cpp/tutorial_code/features2D/AKAZE_tracking/planar_tracking.cpp new file mode 100755 index 0000000000000000000000000000000000000000..ddeae5c647a0d6bdea66f5b2d8065f010621b6e8 --- /dev/null +++ b/samples/cpp/tutorial_code/features2D/AKAZE_tracking/planar_tracking.cpp @@ -0,0 +1,183 @@ +#include +#include +#include +#include +#include +#include + +#include "stats.h" // Stats structure definition +#include "utils.h" // Drawing and printing functions + +using namespace std; +using namespace cv; + +const double akaze_thresh = 3e-4; // AKAZE detection threshold set to locate about 1000 keypoints +const double ransac_thresh = 2.5f; // RANSAC inlier threshold +const double nn_match_ratio = 0.8f; // Nearest-neighbour matching ratio +const int bb_min_inliers = 100; // Minimal number of inliers to draw bounding box +const int stats_update_period = 10; // On-screen statistics are updated every 10 frames + +class Tracker +{ +public: + Tracker(Ptr _detector, Ptr _matcher) : + detector(_detector), + matcher(_matcher) + {} + + void setFirstFrame(const Mat frame, vector bb, string title, Stats& stats); + Mat process(const Mat frame, Stats& stats); + Ptr getDetector() { + return detector; + } +protected: + Ptr detector; + Ptr matcher; + Mat first_frame, first_desc; + vector first_kp; + vector object_bb; +}; + +void Tracker::setFirstFrame(const Mat frame, vector bb, string title, Stats& stats) +{ + first_frame = frame.clone(); + (*detector)(first_frame, noArray(), first_kp, first_desc); + stats.keypoints = (int)first_kp.size(); + drawBoundingBox(first_frame, bb); + putText(first_frame, title, Point(0, 60), FONT_HERSHEY_PLAIN, 5, Scalar::all(0), 4); + object_bb = bb; +} + +Mat Tracker::process(const Mat frame, Stats& stats) +{ + vector kp; + Mat desc; + (*detector)(frame, noArray(), kp, desc); + stats.keypoints = (int)kp.size(); + + vector< vector > matches; + vector matched1, matched2; + matcher->knnMatch(first_desc, desc, matches, 2); + for(unsigned i = 0; i < matches.size(); i++) { + if(matches[i][0].distance < nn_match_ratio * matches[i][1].distance) { + matched1.push_back(first_kp[matches[i][0].queryIdx]); + matched2.push_back( kp[matches[i][0].trainIdx]); + } + } + stats.matches = (int)matched1.size(); + + Mat inlier_mask, homography; + vector inliers1, inliers2; + vector inlier_matches; + if(matched1.size() >= 4) { + homography = findHomography(Points(matched1), Points(matched2), + RANSAC, ransac_thresh, inlier_mask); + } + + if(matched1.size() < 4 || homography.empty()) { + Mat res; + hconcat(first_frame, frame, res); + stats.inliers = 0; + stats.ratio = 0; + return res; + } + for(unsigned i = 0; i < matched1.size(); i++) { + if(inlier_mask.at(i)) { + int new_i = static_cast(inliers1.size()); + inliers1.push_back(matched1[i]); + inliers2.push_back(matched2[i]); + inlier_matches.push_back(DMatch(new_i, new_i, 0)); + } + } + stats.inliers = (int)inliers1.size(); + stats.ratio = stats.inliers * 1.0 / stats.matches; + + vector new_bb; + perspectiveTransform(object_bb, new_bb, homography); + Mat frame_with_bb = frame.clone(); + if(stats.inliers >= bb_min_inliers) { + drawBoundingBox(frame_with_bb, new_bb); + } + Mat res; + drawMatches(first_frame, inliers1, frame_with_bb, inliers2, + inlier_matches, res, + Scalar(255, 0, 0), Scalar(255, 0, 0)); + return res; +} + +int main(int argc, char **argv) +{ + if(argc < 4) { + cerr << "Usage: " << endl << + "akaze_track input_path output_path bounding_box" << endl; + return 1; + } + VideoCapture video_in(argv[1]); + VideoWriter video_out(argv[2], + (int)video_in.get(CAP_PROP_FOURCC), + (int)video_in.get(CAP_PROP_FPS), + Size(2 * (int)video_in.get(CAP_PROP_FRAME_WIDTH), + 2 * (int)video_in.get(CAP_PROP_FRAME_HEIGHT))); + + if(!video_in.isOpened()) { + cerr << "Couldn't open " << argv[1] << endl; + return 1; + } + if(!video_out.isOpened()) { + cerr << "Couldn't open " << argv[2] << endl; + return 1; + } + + vector bb; + FileStorage fs(argv[3], FileStorage::READ); + if(fs["bounding_box"].empty()) { + cerr << "Couldn't read bounding_box from " << argv[3] << endl; + return 1; + } + fs["bounding_box"] >> bb; + Ptr akaze = Feature2D::create("AKAZE"); + akaze->set("threshold", akaze_thresh); + Ptr orb = Feature2D::create("ORB"); + Ptr matcher = DescriptorMatcher::create("BruteForce-Hamming"); + Tracker akaze_tracker(akaze, matcher); + Tracker orb_tracker(orb, matcher); + + Stats stats, akaze_stats, orb_stats; + Mat frame; + video_in >> frame; + akaze_tracker.setFirstFrame(frame, bb, "AKAZE", stats); + orb_tracker.getDetector()->set("nFeatures", stats.keypoints); + orb_tracker.setFirstFrame(frame, bb, "ORB", stats); + + Stats akaze_draw_stats, orb_draw_stats; + int frame_count = (int)video_in.get(CAP_PROP_FRAME_COUNT); + Mat akaze_res, orb_res, res_frame; + for(int i = 1; i < frame_count; i++) { + bool update_stats = (i % stats_update_period == 0); + video_in >> frame; + + akaze_res = akaze_tracker.process(frame, stats); + akaze_stats += stats; + if(update_stats) { + akaze_draw_stats = stats; + } + + orb_tracker.getDetector()->set("nFeatures", stats.keypoints); + orb_res = orb_tracker.process(frame, stats); + orb_stats += stats; + if(update_stats) { + orb_draw_stats = stats; + } + + drawStatistics(akaze_res, akaze_draw_stats); + drawStatistics(orb_res, orb_draw_stats); + vconcat(akaze_res, orb_res, res_frame); + video_out << res_frame; + cout << i << "/" << frame_count - 1 << endl; + } + akaze_stats /= frame_count - 1; + orb_stats /= frame_count - 1; + printStatistics("AKAZE", akaze_stats); + printStatistics("ORB", orb_stats); + return 0; +} diff --git a/samples/cpp/tutorial_code/features2D/AKAZE_tracking/stats.h b/samples/cpp/tutorial_code/features2D/AKAZE_tracking/stats.h new file mode 100644 index 0000000000000000000000000000000000000000..343b789ea3452c7bf4bec10bf2f872fbe65e3369 --- /dev/null +++ b/samples/cpp/tutorial_code/features2D/AKAZE_tracking/stats.h @@ -0,0 +1,34 @@ +#ifndef STATS_H +#define STATS_H + +struct Stats +{ + int matches; + int inliers; + double ratio; + int keypoints; + + Stats() : matches(0), + inliers(0), + ratio(0), + keypoints(0) + {} + + Stats& operator+=(const Stats& op) { + matches += op.matches; + inliers += op.inliers; + ratio += op.ratio; + keypoints += op.keypoints; + return *this; + } + Stats& operator/=(int num) + { + matches /= num; + inliers /= num; + ratio /= num; + keypoints /= num; + return *this; + } +}; + +#endif // STATS_H diff --git a/samples/cpp/tutorial_code/features2D/AKAZE_tracking/utils.h b/samples/cpp/tutorial_code/features2D/AKAZE_tracking/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..fbd897ec524867c341524d71b6929f1d5f177366 --- /dev/null +++ b/samples/cpp/tutorial_code/features2D/AKAZE_tracking/utils.h @@ -0,0 +1,59 @@ +#ifndef UTILS_H +#define UTILS_H + +#include +#include +#include "stats.h" + +using namespace std; +using namespace cv; + +void drawBoundingBox(Mat image, vector bb); +void drawStatistics(Mat image, const Stats& stats); +void printStatistics(string name, Stats stats); +vector Points(vector keypoints); + +void drawBoundingBox(Mat image, vector bb) +{ + for(unsigned i = 0; i < bb.size() - 1; i++) { + line(image, bb[i], bb[i + 1], Scalar(0, 0, 255), 2); + } + line(image, bb[bb.size() - 1], bb[0], Scalar(0, 0, 255), 2); +} + +void drawStatistics(Mat image, const Stats& stats) +{ + static const int font = FONT_HERSHEY_PLAIN; + stringstream str1, str2, str3; + + str1 << "Matches: " << stats.matches; + str2 << "Inliers: " << stats.inliers; + str3 << "Inlier ratio: " << setprecision(2) << stats.ratio; + + putText(image, str1.str(), Point(0, image.rows - 90), font, 2, Scalar::all(255), 3); + putText(image, str2.str(), Point(0, image.rows - 60), font, 2, Scalar::all(255), 3); + putText(image, str3.str(), Point(0, image.rows - 30), font, 2, Scalar::all(255), 3); +} + +void printStatistics(string name, Stats stats) +{ + cout << name << endl; + cout << "----------" << endl; + + cout << "Matches " << stats.matches << endl; + cout << "Inliers " << stats.inliers << endl; + cout << "Inlier ratio " << setprecision(2) << stats.ratio << endl; + cout << "Keypoints " << stats.keypoints << endl; + cout << endl; +} + +vector Points(vector keypoints) +{ + vector res; + for(unsigned i = 0; i < keypoints.size(); i++) { + res.push_back(keypoints[i].pt); + } + return res; +} + +#endif // UTILS_H