objdetect.hpp 36.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                           License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#ifndef __OPENCV_OBJDETECT_HPP__
#define __OPENCV_OBJDETECT_HPP__

46 47 48 49
#ifdef __cplusplus
#  include "opencv2/core.hpp"
#endif
#include "opencv2/core/core_c.h"
50 51

#ifdef __cplusplus
52 53 54
#include <map>
#include <deque>

55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
extern "C" {
#endif

/****************************************************************************************\
*                         Haar-like Object Detection functions                           *
\****************************************************************************************/

#define CV_HAAR_MAGIC_VAL    0x42500000
#define CV_TYPE_NAME_HAAR    "opencv-haar-classifier"

#define CV_IS_HAAR_CLASSIFIER( haar )                                                    \
    ((haar) != NULL &&                                                                   \
    (((const CvHaarClassifierCascade*)(haar))->flags & CV_MAGIC_MASK)==CV_HAAR_MAGIC_VAL)

#define CV_HAAR_FEATURE_MAX  3

typedef struct CvHaarFeature
{
73
    int tilted;
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
    struct
    {
        CvRect r;
        float weight;
    } rect[CV_HAAR_FEATURE_MAX];
} CvHaarFeature;

typedef struct CvHaarClassifier
{
    int count;
    CvHaarFeature* haar_feature;
    float* threshold;
    int* left;
    int* right;
    float* alpha;
} CvHaarClassifier;

typedef struct CvHaarStageClassifier
{
    int  count;
    float threshold;
    CvHaarClassifier* classifier;

    int next;
    int child;
    int parent;
} CvHaarStageClassifier;

typedef struct CvHidHaarClassifierCascade CvHidHaarClassifierCascade;

typedef struct CvHaarClassifierCascade
{
    int  flags;
    int  count;
    CvSize orig_window_size;
    CvSize real_window_size;
    double scale;
    CvHaarStageClassifier* stage_classifier;
    CvHidHaarClassifierCascade* hid_cascade;
} CvHaarClassifierCascade;

typedef struct CvAvgComp
{
    CvRect rect;
    int neighbors;
} CvAvgComp;

/* Loads haar classifier cascade from a directory.
   It is obsolete: convert your cascade to xml and use cvLoad instead */
CVAPI(CvHaarClassifierCascade*) cvLoadHaarClassifierCascade(
                    const char* directory, CvSize orig_window_size);

CVAPI(void) cvReleaseHaarClassifierCascade( CvHaarClassifierCascade** cascade );

#define CV_HAAR_DO_CANNY_PRUNING    1
#define CV_HAAR_SCALE_IMAGE         2
#define CV_HAAR_FIND_BIGGEST_OBJECT 4
#define CV_HAAR_DO_ROUGH_SEARCH     8

A
Alexey Kazakov 已提交
133 134 135 136 137 138 139 140
//CVAPI(CvSeq*) cvHaarDetectObjectsForROC( const CvArr* image,
//                     CvHaarClassifierCascade* cascade, CvMemStorage* storage,
//                     CvSeq** rejectLevels, CvSeq** levelWeightds,
//                     double scale_factor CV_DEFAULT(1.1),
//                     int min_neighbors CV_DEFAULT(3), int flags CV_DEFAULT(0),
//                     CvSize min_size CV_DEFAULT(cvSize(0,0)), CvSize max_size CV_DEFAULT(cvSize(0,0)),
//                     bool outputRejectLevels = false );

141

142
CVAPI(CvSeq*) cvHaarDetectObjects( const CvArr* image,
A
Andrey Kamaev 已提交
143
                     CvHaarClassifierCascade* cascade, CvMemStorage* storage,
144
                     double scale_factor CV_DEFAULT(1.1),
145
                     int min_neighbors CV_DEFAULT(3), int flags CV_DEFAULT(0),
146
                     CvSize min_size CV_DEFAULT(cvSize(0,0)), CvSize max_size CV_DEFAULT(cvSize(0,0)));
147 148 149 150 151 152 153 154 155 156

/* sets images for haar classifier cascade */
CVAPI(void) cvSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* cascade,
                                                const CvArr* sum, const CvArr* sqsum,
                                                const CvArr* tilted_sum, double scale );

/* runs the cascade on the specified window */
CVAPI(int) cvRunHaarClassifierCascade( const CvHaarClassifierCascade* cascade,
                                       CvPoint pt, int start_stage CV_DEFAULT(0));

157 158 159 160 161 162 163 164 165

/****************************************************************************************\
*                         Latent SVM Object Detection functions                          *
\****************************************************************************************/

// DataType: STRUCT position
// Structure describes the position of the filter in the feature pyramid
// l - level in the feature pyramid
// (x, y) - coordinate in level l
A
Andrey Kamaev 已提交
166
typedef struct CvLSVMFilterPosition
167
{
E
Evgeniy Kozinov 已提交
168 169 170
    int x;
    int y;
    int l;
171
} CvLSVMFilterPosition;
172 173 174 175 176 177 178 179

// DataType: STRUCT filterObject
// Description of the filter, which corresponds to the part of the object
// V               - ideal (penalty = 0) position of the partial filter
//                   from the root filter position (V_i in the paper)
// penaltyFunction - vector describes penalty function (d_i in the paper)
//                   pf[0] * x + pf[1] * y + pf[2] * x^2 + pf[3] * y^2
// FILTER DESCRIPTION
A
Andrey Kamaev 已提交
180
//   Rectangular map (sizeX x sizeY),
181 182
//   every cell stores feature vector (dimension = p)
// H               - matrix of feature vectors
A
Andrey Kamaev 已提交
183
//                   to set and get feature vectors (i,j)
184 185 186
//                   used formula H[(j * sizeX + i) * p + k], where
//                   k - component of feature vector in cell (i, j)
// END OF FILTER DESCRIPTION
A
Andrey Kamaev 已提交
187
typedef struct CvLSVMFilterObject{
188
    CvLSVMFilterPosition V;
189
    float fineFunction[4];
E
Evgeniy Kozinov 已提交
190 191 192
    int sizeX;
    int sizeY;
    int numFeatures;
193
    float *H;
194
} CvLSVMFilterObject;
195 196 197

// data type: STRUCT CvLatentSvmDetector
// structure contains internal representation of trained Latent SVM detector
A
Andrey Kamaev 已提交
198
// num_filters			- total number of filters (root plus part) in model
199 200 201 202 203 204 205
// num_components		- number of components in model
// num_part_filters		- array containing number of part filters for each component
// filters				- root and part filters for all model components
// b					- biases for all model components
// score_threshold		- confidence level threshold
typedef struct CvLatentSvmDetector
{
206 207 208 209 210 211
    int num_filters;
    int num_components;
    int* num_part_filters;
    CvLSVMFilterObject** filters;
    float* b;
    float score_threshold;
212 213 214 215
}
CvLatentSvmDetector;

// data type: STRUCT CvObjectDetection
A
Andrey Kamaev 已提交
216
// structure contains the bounding box and confidence level for detected object
217
// rect					- bounding box for a detected object
A
Andrey Kamaev 已提交
218
// score				- confidence level
219 220
typedef struct CvObjectDetection
{
221 222
    CvRect rect;
    float score;
223 224 225 226 227 228 229 230 231 232 233 234
} CvObjectDetection;

//////////////// Object Detection using Latent SVM //////////////


/*
// load trained detector from a file
//
// API
// CvLatentSvmDetector* cvLoadLatentSvmDetector(const char* filename);
// INPUT
// filename				- path to the file containing the parameters of
235
                        - trained Latent SVM detector
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
// OUTPUT
// trained Latent SVM detector in internal representation
*/
CVAPI(CvLatentSvmDetector*) cvLoadLatentSvmDetector(const char* filename);

/*
// release memory allocated for CvLatentSvmDetector structure
//
// API
// void cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector);
// INPUT
// detector				- CvLatentSvmDetector structure to be released
// OUTPUT
*/
CVAPI(void) cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector);

/*
A
Andrey Kamaev 已提交
253
// find rectangular regions in the given image that are likely
254 255 256
// to contain objects and corresponding confidence levels
//
// API
A
Andrey Kamaev 已提交
257 258 259
// CvSeq* cvLatentSvmDetectObjects(const IplImage* image,
//									CvLatentSvmDetector* detector,
//									CvMemStorage* storage,
260 261
//									float overlap_threshold = 0.5f,
//                                  int numThreads = -1);
262 263 264
// INPUT
// image				- image to detect objects in
// detector				- Latent SVM detector in internal representation
A
Andrey Kamaev 已提交
265
// storage				- memory storage to store the resultant sequence
266
//							of the object candidate rectangles
A
Andrey Kamaev 已提交
267
// overlap_threshold	- threshold for the non-maximum suppression algorithm
268 269 270 271
                           = 0.5f [here will be the reference to original paper]
// OUTPUT
// sequence of detected objects (bounding boxes and confidence levels stored in CvObjectDetection structures)
*/
A
Andrey Kamaev 已提交
272
CVAPI(CvSeq*) cvLatentSvmDetectObjects(IplImage* image,
273 274 275
                                CvLatentSvmDetector* detector,
                                CvMemStorage* storage,
                                float overlap_threshold CV_DEFAULT(0.5f),
276
                                int numThreads CV_DEFAULT(-1));
277

278 279 280
#ifdef __cplusplus
}

A
Alexey Kazakov 已提交
281 282 283 284 285 286 287 288
CV_EXPORTS CvSeq* cvHaarDetectObjectsForROC( const CvArr* image,
                     CvHaarClassifierCascade* cascade, CvMemStorage* storage,
                     std::vector<int>& rejectLevels, std::vector<double>& levelWeightds,
                     double scale_factor CV_DEFAULT(1.1),
                     int min_neighbors CV_DEFAULT(3), int flags CV_DEFAULT(0),
                     CvSize min_size CV_DEFAULT(cvSize(0,0)), CvSize max_size CV_DEFAULT(cvSize(0,0)),
                     bool outputRejectLevels = false );

289 290
namespace cv
{
A
Andrey Kamaev 已提交
291

292 293
///////////////////////////// Object Detection ////////////////////////////

M
Maria Dimashova 已提交
294 295 296 297 298 299
/*
 * This is a class wrapping up the structure CvLatentSvmDetector and functions working with it.
 * The class goals are:
 * 1) provide c++ interface;
 * 2) make it possible to load and detect more than one class (model) unlike CvLatentSvmDetector.
 */
300
class CV_EXPORTS LatentSvmDetector
M
Maria Dimashova 已提交
301 302
{
public:
303
    struct CV_EXPORTS ObjectDetection
M
Maria Dimashova 已提交
304 305 306 307 308 309 310 311
    {
        ObjectDetection();
        ObjectDetection( const Rect& rect, float score, int classID=-1 );
        Rect rect;
        float score;
        int classID;
    };

312
    LatentSvmDetector();
313
    LatentSvmDetector( const std::vector<String>& filenames, const std::vector<String>& classNames=std::vector<String>() );
M
Maria Dimashova 已提交
314 315
    virtual ~LatentSvmDetector();

316 317
    virtual void clear();
    virtual bool empty() const;
318
    bool load( const std::vector<String>& filenames, const std::vector<String>& classNames=std::vector<String>() );
M
Maria Dimashova 已提交
319

320
    virtual void detect( const Mat& image,
321
                         std::vector<ObjectDetection>& objectDetections,
322 323
                         float overlapThreshold=0.5f,
                         int numThreads=-1 );
M
Maria Dimashova 已提交
324

325
    const std::vector<String>& getClassNames() const;
M
Maria Dimashova 已提交
326 327 328
    size_t getClassCount() const;

private:
329
    std::vector<CvLatentSvmDetector*> detectors;
330
    std::vector<String> classNames;
M
Maria Dimashova 已提交
331 332
};

333 334 335 336 337 338
CV_EXPORTS void groupRectangles(CV_OUT CV_IN_OUT std::vector<Rect>& rectList, int groupThreshold, double eps=0.2);
CV_EXPORTS_W void groupRectangles(CV_OUT CV_IN_OUT std::vector<Rect>& rectList, CV_OUT std::vector<int>& weights, int groupThreshold, double eps=0.2);
CV_EXPORTS void groupRectangles( std::vector<Rect>& rectList, int groupThreshold, double eps, std::vector<int>* weights, std::vector<double>* levelWeights );
CV_EXPORTS void groupRectangles(std::vector<Rect>& rectList, std::vector<int>& rejectLevels,
                                std::vector<double>& levelWeights, int groupThreshold, double eps=0.2);
CV_EXPORTS void groupRectangles_meanshift(std::vector<Rect>& rectList, std::vector<double>& foundWeights, std::vector<double>& foundScales,
339
                                          double detectThreshold = 0.0, Size winDetSize = Size(64, 128));
340

A
Andrey Kamaev 已提交
341

342 343
class CV_EXPORTS FeatureEvaluator
{
A
Andrey Kamaev 已提交
344
public:
345
    enum { HAAR = 0, LBP = 1, HOG = 2 };
346
    virtual ~FeatureEvaluator();
347

348 349 350
    virtual bool read(const FileNode& node);
    virtual Ptr<FeatureEvaluator> clone() const;
    virtual int getFeatureType() const;
A
Andrey Kamaev 已提交
351

352
    virtual bool setImage(const Mat& img, Size origWinSize);
353 354 355 356 357 358 359 360 361
    virtual bool setWindow(Point p);

    virtual double calcOrd(int featureIdx) const;
    virtual int calcCat(int featureIdx) const;

    static Ptr<FeatureEvaluator> create(int type);
};

template<> CV_EXPORTS void Ptr<CvHaarClassifierCascade>::delete_obj();
362 363 364

enum
{
365 366 367 368
    CASCADE_DO_CANNY_PRUNING=1,
    CASCADE_SCALE_IMAGE=2,
    CASCADE_FIND_BIGGEST_OBJECT=4,
    CASCADE_DO_ROUGH_SEARCH=8
369 370
};

371
class CV_EXPORTS_W CascadeClassifier
372 373
{
public:
374
    CV_WRAP CascadeClassifier();
375
    CV_WRAP CascadeClassifier( const String& filename );
376
    virtual ~CascadeClassifier();
A
Andrey Kamaev 已提交
377

378
    CV_WRAP virtual bool empty() const;
379
    CV_WRAP bool load( const String& filename );
380
    virtual bool read( const FileNode& node );
381
    CV_WRAP virtual void detectMultiScale( const Mat& image,
382
                                   CV_OUT std::vector<Rect>& objects,
383 384 385
                                   double scaleFactor=1.1,
                                   int minNeighbors=3, int flags=0,
                                   Size minSize=Size(),
A
Alexey Kazakov 已提交
386 387 388
                                   Size maxSize=Size() );

    CV_WRAP virtual void detectMultiScale( const Mat& image,
389
                                   CV_OUT std::vector<Rect>& objects,
390 391
                                   CV_OUT std::vector<int>& rejectLevels,
                                   CV_OUT std::vector<double>& levelWeights,
A
Alexey Kazakov 已提交
392 393 394
                                   double scaleFactor=1.1,
                                   int minNeighbors=3, int flags=0,
                                   Size minSize=Size(),
395
                                   Size maxSize=Size(),
396
                                   bool outputRejectLevels=false );
397

398 399 400 401

    bool isOldFormatCascade() const;
    virtual Size getOriginalWindowSize() const;
    int getFeatureType() const;
K
Kirill Kornyakov 已提交
402
    bool setImage( const Mat& );
403 404

protected:
A
Alexey Kazakov 已提交
405
    //virtual bool detectSingleScale( const Mat& image, int stripCount, Size processingRectSize,
406
    //                                int stripSize, int yStep, double factor, std::vector<Rect>& candidates );
A
Alexey Kazakov 已提交
407

408
    virtual bool detectSingleScale( const Mat& image, int stripCount, Size processingRectSize,
409 410
                                    int stripSize, int yStep, double factor, std::vector<Rect>& candidates,
                                    std::vector<int>& rejectLevels, std::vector<double>& levelWeights, bool outputRejectLevels=false);
411

412
protected:
413 414 415 416
    enum { BOOST = 0 };
    enum { DO_CANNY_PRUNING = 1, SCALE_IMAGE = 2,
           FIND_BIGGEST_OBJECT = 4, DO_ROUGH_SEARCH = 8 };

M
marina.kolpakova 已提交
417
    friend class CascadeClassifierInvoker;
418 419

    template<class FEval>
420
    friend int predictOrdered( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
421 422

    template<class FEval>
423
    friend int predictCategorical( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
424 425

    template<class FEval>
426
    friend int predictOrderedStump( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
427 428

    template<class FEval>
429
    friend int predictCategoricalStump( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
430

A
Andrey Kamaev 已提交
431 432
    bool setImage( Ptr<FeatureEvaluator>& feval, const Mat& image);
    virtual int runAt( Ptr<FeatureEvaluator>& feval, Point pt, double& weight );
433

434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465
    class Data
    {
    public:
        struct CV_EXPORTS DTreeNode
        {
            int featureIdx;
            float threshold; // for ordered features only
            int left;
            int right;
        };

        struct CV_EXPORTS DTree
        {
            int nodeCount;
        };

        struct CV_EXPORTS Stage
        {
            int first;
            int ntrees;
            float threshold;
        };

        bool read(const FileNode &node);

        bool isStumpBased;

        int stageType;
        int featureType;
        int ncategories;
        Size origWinSize;

466 467 468 469 470
        std::vector<Stage> stages;
        std::vector<DTree> classifiers;
        std::vector<DTreeNode> nodes;
        std::vector<float> leaves;
        std::vector<int> subsets;
471
    };
472

473 474
    Data data;
    Ptr<FeatureEvaluator> featureEvaluator;
475
    Ptr<CvHaarClassifierCascade> oldCascade;
476 477

public:
478
    class CV_EXPORTS MaskGenerator
479
    {
480
    public:
A
Andrey Kamaev 已提交
481
        virtual ~MaskGenerator() {}
482 483
        virtual cv::Mat generateMask(const cv::Mat& src)=0;
        virtual void initializeMask(const cv::Mat& /*src*/) {};
484 485 486
    };
    void setMaskGenerator(Ptr<MaskGenerator> maskGenerator);
    Ptr<MaskGenerator> getMaskGenerator();
487 488 489

    void setFaceDetectionMaskGenerator();

490 491
protected:
    Ptr<MaskGenerator> maskGenerator;
492 493 494 495
};

//////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////

496 497 498 499 500 501
// struct for detection region of interest (ROI)
struct DetectionROI
{
   // scale(size) of the bounding box
   double scale;
   // set of requrested locations to be evaluated
502
   std::vector<cv::Point> locations;
503
   // vector that will contain confidence values for each location
504
   std::vector<double> confidences;
505 506
};

507
struct CV_EXPORTS_W HOGDescriptor
508 509 510
{
public:
    enum { L2Hys=0 };
511
    enum { DEFAULT_NLEVELS=64 };
A
Andrey Kamaev 已提交
512

513
    CV_WRAP HOGDescriptor() : winSize(64,128), blockSize(16,16), blockStride(8,8),
514
        cellSize(8,8), nbins(9), derivAperture(1), winSigma(-1),
A
Andrey Kamaev 已提交
515
        histogramNormType(HOGDescriptor::L2Hys), L2HysThreshold(0.2), gammaCorrection(true),
V
Vadim Pisarevsky 已提交
516
        nlevels(HOGDescriptor::DEFAULT_NLEVELS)
517
    {}
A
Andrey Kamaev 已提交
518

519
    CV_WRAP HOGDescriptor(Size _winSize, Size _blockSize, Size _blockStride,
520
                  Size _cellSize, int _nbins, int _derivAperture=1, double _winSigma=-1,
V
Vadim Pisarevsky 已提交
521
                  int _histogramNormType=HOGDescriptor::L2Hys,
V
Vadim Pisarevsky 已提交
522 523
                  double _L2HysThreshold=0.2, bool _gammaCorrection=false,
                  int _nlevels=HOGDescriptor::DEFAULT_NLEVELS)
524 525 526
    : winSize(_winSize), blockSize(_blockSize), blockStride(_blockStride), cellSize(_cellSize),
    nbins(_nbins), derivAperture(_derivAperture), winSigma(_winSigma),
    histogramNormType(_histogramNormType), L2HysThreshold(_L2HysThreshold),
527
    gammaCorrection(_gammaCorrection), nlevels(_nlevels)
528
    {}
A
Andrey Kamaev 已提交
529

530
    CV_WRAP HOGDescriptor(const String& filename)
531 532 533
    {
        load(filename);
    }
A
Andrey Kamaev 已提交
534

535 536 537 538
    HOGDescriptor(const HOGDescriptor& d)
    {
        d.copyTo(*this);
    }
A
Andrey Kamaev 已提交
539

540
    virtual ~HOGDescriptor() {}
A
Andrey Kamaev 已提交
541

542 543 544
    CV_WRAP size_t getDescriptorSize() const;
    CV_WRAP bool checkDetectorSize() const;
    CV_WRAP double getWinSigma() const;
A
Andrey Kamaev 已提交
545

546
    CV_WRAP virtual void setSVMDetector(InputArray _svmdetector);
A
Andrey Kamaev 已提交
547

548
    virtual bool read(FileNode& fn);
549
    virtual void write(FileStorage& fs, const String& objname) const;
A
Andrey Kamaev 已提交
550

551 552
    CV_WRAP virtual bool load(const String& filename, const String& objname=String());
    CV_WRAP virtual void save(const String& filename, const String& objname=String()) const;
553
    virtual void copyTo(HOGDescriptor& c) const;
554

555
    CV_WRAP virtual void compute(const Mat& img,
556
                         CV_OUT std::vector<float>& descriptors,
557
                         Size winStride=Size(), Size padding=Size(),
558
                         const std::vector<Point>& locations=std::vector<Point>()) const;
559
    //with found weights output
560 561
    CV_WRAP virtual void detect(const Mat& img, CV_OUT std::vector<Point>& foundLocations,
                        CV_OUT std::vector<double>& weights,
A
Andrey Kamaev 已提交
562
                        double hitThreshold=0, Size winStride=Size(),
563
                        Size padding=Size(),
564
                        const std::vector<Point>& searchLocations=std::vector<Point>()) const;
565
    //without found weights output
566
    virtual void detect(const Mat& img, CV_OUT std::vector<Point>& foundLocations,
567 568
                        double hitThreshold=0, Size winStride=Size(),
                        Size padding=Size(),
569
                        const std::vector<Point>& searchLocations=std::vector<Point>()) const;
570
    //with result weights output
571 572
    CV_WRAP virtual void detectMultiScale(const Mat& img, CV_OUT std::vector<Rect>& foundLocations,
                                  CV_OUT std::vector<double>& foundWeights, double hitThreshold=0,
573 574 575
                                  Size winStride=Size(), Size padding=Size(), double scale=1.05,
                                  double finalThreshold=2.0,bool useMeanshiftGrouping = false) const;
    //without found weights output
576
    virtual void detectMultiScale(const Mat& img, CV_OUT std::vector<Rect>& foundLocations,
577
                                  double hitThreshold=0, Size winStride=Size(),
A
Andrey Kamaev 已提交
578
                                  Size padding=Size(), double scale=1.05,
579
                                  double finalThreshold=2.0, bool useMeanshiftGrouping = false) const;
580

581
    CV_WRAP virtual void computeGradient(const Mat& img, CV_OUT Mat& grad, CV_OUT Mat& angleOfs,
582
                                 Size paddingTL=Size(), Size paddingBR=Size()) const;
A
Andrey Kamaev 已提交
583

584 585
    CV_WRAP static std::vector<float> getDefaultPeopleDetector();
    CV_WRAP static std::vector<float> getDaimlerPeopleDetector();
A
Andrey Kamaev 已提交
586

587 588 589 590 591 592 593 594 595 596
    CV_PROP Size winSize;
    CV_PROP Size blockSize;
    CV_PROP Size blockStride;
    CV_PROP Size cellSize;
    CV_PROP int nbins;
    CV_PROP int derivAperture;
    CV_PROP double winSigma;
    CV_PROP int histogramNormType;
    CV_PROP double L2HysThreshold;
    CV_PROP bool gammaCorrection;
597
    CV_PROP std::vector<float> svmDetector;
598
    CV_PROP int nlevels;
599 600


601
   // evaluate specified ROI and return confidence value for each location
602
   virtual void detectROI(const cv::Mat& img, const std::vector<cv::Point> &locations,
603 604 605 606 607 608 609 610 611 612 613 614
                                   CV_OUT std::vector<cv::Point>& foundLocations, CV_OUT std::vector<double>& confidences,
                                   double hitThreshold = 0, cv::Size winStride = Size(),
                                   cv::Size padding = Size()) const;

   // evaluate specified ROI and return confidence value for each location in multiple scales
   virtual void detectMultiScaleROI(const cv::Mat& img,
                                                       CV_OUT std::vector<cv::Rect>& foundLocations,
                                                       std::vector<DetectionROI>& locations,
                                                       double hitThreshold = 0,
                                                       int groupThreshold = 0) const;

   // read/parse Dalal's alt model file
615
   void readALTModel(String modelfile);
616 617
};

618

V
Vadim Pisarevsky 已提交
619
CV_EXPORTS_W void findDataMatrix(InputArray image,
620
                                 CV_OUT std::vector<String>& codes,
V
Vadim Pisarevsky 已提交
621 622 623
                                 OutputArray corners=noArray(),
                                 OutputArrayOfArrays dmtx=noArray());
CV_EXPORTS_W void drawDataMatrixCodes(InputOutputArray image,
624
                                      const std::vector<String>& codes,
V
Vadim Pisarevsky 已提交
625
                                      InputArray corners);
626 627
}

J
James Bowman 已提交
628 629 630 631
/****************************************************************************************\
*                                Datamatrix                                              *
\****************************************************************************************/

632
struct CV_EXPORTS CvDataMatrixCode {
J
James Bowman 已提交
633 634 635 636 637
  char msg[4];
  CvMat *original;
  CvMat *corners;
};

638
CV_EXPORTS std::deque<CvDataMatrixCode> cvFindDataMatrix(CvMat *im);
639 640 641 642 643 644 645 646 647 648 649 650 651

/****************************************************************************************\
*                                 LINE-MOD                                               *
\****************************************************************************************/

namespace cv {
namespace linemod {

/// @todo Convert doxy comments to rst

/**
 * \brief Discriminant feature described by its location and label.
 */
652
struct CV_EXPORTS Feature
653 654 655 656 657
{
  int x; ///< x offset
  int y; ///< y offset
  int label; ///< Quantization

A
Andrey Kamaev 已提交
658
  Feature() : x(0), y(0), label(0) {}
A
Andrey Kamaev 已提交
659
  Feature(int x, int y, int label);
660 661 662 663 664

  void read(const FileNode& fn);
  void write(FileStorage& fs) const;
};

A
Andrey Kamaev 已提交
665 666
inline Feature::Feature(int _x, int _y, int _label) : x(_x), y(_y), label(_label) {}

667
struct CV_EXPORTS Template
668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712
{
  int width;
  int height;
  int pyramid_level;
  std::vector<Feature> features;

  void read(const FileNode& fn);
  void write(FileStorage& fs) const;
};

/**
 * \brief Represents a modality operating over an image pyramid.
 */
class QuantizedPyramid
{
public:
  // Virtual destructor
  virtual ~QuantizedPyramid() {}

  /**
   * \brief Compute quantized image at current pyramid level for online detection.
   *
   * \param[out] dst The destination 8-bit image. For each pixel at most one bit is set,
   *                 representing its classification.
   */
  virtual void quantize(Mat& dst) const =0;

  /**
   * \brief Extract most discriminant features at current pyramid level to form a new template.
   *
   * \param[out] templ The new template.
   */
  virtual bool extractTemplate(Template& templ) const =0;

  /**
   * \brief Go to the next pyramid level.
   *
   * \todo Allow pyramid scale factor other than 2
   */
  virtual void pyrDown() =0;

protected:
  /// Candidate feature with a score
  struct Candidate
  {
A
Andrey Kamaev 已提交
713
    Candidate(int x, int y, int label, float score);
714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737

    /// Sort candidates with high score to the front
    bool operator<(const Candidate& rhs) const
    {
      return score > rhs.score;
    }

    Feature f;
    float score;
  };

  /**
   * \brief Choose candidate features so that they are not bunched together.
   *
   * \param[in]  candidates   Candidate features sorted by score.
   * \param[out] features     Destination vector of selected features.
   * \param[in]  num_features Number of candidates to select.
   * \param[in]  distance     Hint for desired distance between features.
   */
  static void selectScatteredFeatures(const std::vector<Candidate>& candidates,
                                      std::vector<Feature>& features,
                                      size_t num_features, float distance);
};

A
Andrey Kamaev 已提交
738 739
inline QuantizedPyramid::Candidate::Candidate(int x, int y, int label, float _score) : f(x, y, label), score(_score) {}

740 741 742 743 744
/**
 * \brief Interface for modalities that plug into the LINE template matching representation.
 *
 * \todo Max response, to allow optimization of summing (255/MAX) features as uint8
 */
745
class CV_EXPORTS Modality
746 747 748 749 750 751 752 753 754 755 756 757 758
{
public:
  // Virtual destructor
  virtual ~Modality() {}

  /**
   * \brief Form a quantized image pyramid from a source image.
   *
   * \param[in] src  The source image. Type depends on the modality.
   * \param[in] mask Optional mask. If not empty, unmasked pixels are set to zero
   *                 in quantized image and cannot be extracted as features.
   */
  Ptr<QuantizedPyramid> process(const Mat& src,
759
                    const Mat& mask = Mat()) const
760 761 762 763
  {
    return processImpl(src, mask);
  }

764
  virtual String name() const =0;
765 766 767 768 769 770 771 772 773 774 775

  virtual void read(const FileNode& fn) =0;
  virtual void write(FileStorage& fs) const =0;

  /**
   * \brief Create modality by name.
   *
   * The following modality types are supported:
   * - "ColorGradient"
   * - "DepthNormal"
   */
776
  static Ptr<Modality> create(const String& modality_type);
777 778 779 780 781 782 783 784 785

  /**
   * \brief Load a modality from file.
   */
  static Ptr<Modality> create(const FileNode& fn);

protected:
  // Indirection is because process() has a default parameter.
  virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
786
                        const Mat& mask) const =0;
787 788 789 790 791
};

/**
 * \brief Modality that computes quantized gradient orientations from a color image.
 */
792
class CV_EXPORTS ColorGradient : public Modality
793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809
{
public:
  /**
   * \brief Default constructor. Uses reasonable default parameter values.
   */
  ColorGradient();

  /**
   * \brief Constructor.
   *
   * \param weak_threshold   When quantizing, discard gradients with magnitude less than this.
   * \param num_features     How many features a template must contain.
   * \param strong_threshold Consider as candidate features only gradients whose norms are
   *                         larger than this.
   */
  ColorGradient(float weak_threshold, size_t num_features, float strong_threshold);

810
  virtual String name() const;
811 812 813 814 815 816 817 818 819 820

  virtual void read(const FileNode& fn);
  virtual void write(FileStorage& fs) const;

  float weak_threshold;
  size_t num_features;
  float strong_threshold;

protected:
  virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
821
                        const Mat& mask) const;
822 823 824 825 826
};

/**
 * \brief Modality that computes quantized surface normals from a dense depth map.
 */
827
class CV_EXPORTS DepthNormal : public Modality
828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847
{
public:
  /**
   * \brief Default constructor. Uses reasonable default parameter values.
   */
  DepthNormal();

  /**
   * \brief Constructor.
   *
   * \param distance_threshold   Ignore pixels beyond this distance.
   * \param difference_threshold When computing normals, ignore contributions of pixels whose
   *                             depth difference with the central pixel is above this threshold.
   * \param num_features         How many features a template must contain.
   * \param extract_threshold    Consider as candidate feature only if there are no differing
   *                             orientations within a distance of extract_threshold.
   */
  DepthNormal(int distance_threshold, int difference_threshold, size_t num_features,
              int extract_threshold);

848
  virtual String name() const;
849 850 851 852 853 854 855 856 857 858 859

  virtual void read(const FileNode& fn);
  virtual void write(FileStorage& fs) const;

  int distance_threshold;
  int difference_threshold;
  size_t num_features;
  int extract_threshold;

protected:
  virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
860
                        const Mat& mask) const;
861 862 863 864 865 866 867 868 869 870
};

/**
 * \brief Debug function to colormap a quantized image for viewing.
 */
void colormap(const Mat& quantized, Mat& dst);

/**
 * \brief Represents a successful template match.
 */
871
struct CV_EXPORTS Match
872 873 874 875 876
{
  Match()
  {
  }

877
  Match(int x, int y, float similarity, const String& class_id, int template_id);
878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896

  /// Sort matches with high similarity to the front
  bool operator<(const Match& rhs) const
  {
    // Secondarily sort on template_id for the sake of duplicate removal
    if (similarity != rhs.similarity)
      return similarity > rhs.similarity;
    else
      return template_id < rhs.template_id;
  }

  bool operator==(const Match& rhs) const
  {
    return x == rhs.x && y == rhs.y && similarity == rhs.similarity && class_id == rhs.class_id;
  }

  int x;
  int y;
  float similarity;
897
  String class_id;
898 899 900
  int template_id;
};

901
inline  Match::Match(int _x, int _y, float _similarity, const String& _class_id, int _template_id)
A
Andrey Kamaev 已提交
902 903 904 905
    : x(_x), y(_y), similarity(_similarity), class_id(_class_id), template_id(_template_id)
  {
  }

906 907 908 909
/**
 * \brief Object detector using the LINE template matching algorithm with any set of
 * modalities.
 */
910
class CV_EXPORTS Detector
911 912 913 914 915 916 917 918 919 920 921 922 923 924
{
public:
  /**
   * \brief Empty constructor, initialize with read().
   */
  Detector();

  /**
   * \brief Constructor.
   *
   * \param modalities       Modalities to use (color gradients, depth normals, ...).
   * \param T_pyramid        Value of the sampling step T at each pyramid level. The
   *                         number of pyramid levels is T_pyramid.size().
   */
925
  Detector(const std::vector< Ptr<Modality> >& modalities, const std::vector<int>& T_pyramid);
926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942

  /**
   * \brief Detect objects by template matching.
   *
   * Matches globally at the lowest pyramid level, then refines locally stepping up the pyramid.
   *
   * \param      sources   Source images, one for each modality.
   * \param      threshold Similarity threshold, a percentage between 0 and 100.
   * \param[out] matches   Template matches, sorted by similarity score.
   * \param      class_ids If non-empty, only search for the desired object classes.
   * \param[out] quantized_images Optionally return vector<Mat> of quantized images.
   * \param      masks     The masks for consideration during matching. The masks should be CV_8UC1
   *                       where 255 represents a valid pixel.  If non-empty, the vector must be
   *                       the same size as sources.  Each element must be
   *                       empty or the same size as its corresponding source.
   */
  void match(const std::vector<Mat>& sources, float threshold, std::vector<Match>& matches,
943
             const std::vector<String>& class_ids = std::vector<String>(),
944 945 946 947 948 949 950 951 952 953 954 955 956
             OutputArrayOfArrays quantized_images = noArray(),
             const std::vector<Mat>& masks = std::vector<Mat>()) const;

  /**
   * \brief Add new object template.
   *
   * \param      sources      Source images, one for each modality.
   * \param      class_id     Object class ID.
   * \param      object_mask  Mask separating object from background.
   * \param[out] bounding_box Optionally return bounding box of the extracted features.
   *
   * \return Template ID, or -1 if failed to extract a valid template.
   */
957
  int addTemplate(const std::vector<Mat>& sources, const String& class_id,
958
          const Mat& object_mask, Rect* bounding_box = NULL);
959 960 961 962

  /**
   * \brief Add a new object template computed by external means.
   */
963
  int addSyntheticTemplate(const std::vector<Template>& templates, const String& class_id);
964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988

  /**
   * \brief Get the modalities used by this detector.
   *
   * You are not permitted to add/remove modalities, but you may dynamic_cast them to
   * tweak parameters.
   */
  const std::vector< Ptr<Modality> >& getModalities() const { return modalities; }

  /**
   * \brief Get sampling step T at pyramid_level.
   */
  int getT(int pyramid_level) const { return T_at_level[pyramid_level]; }

  /**
   * \brief Get number of pyramid levels used by this detector.
   */
  int pyramidLevels() const { return pyramid_levels; }

  /**
   * \brief Get the template pyramid identified by template_id.
   *
   * For example, with 2 modalities (Gradient, Normal) and two pyramid levels
   * (L0, L1), the order is (GradientL0, NormalL0, GradientL1, NormalL1).
   */
989
  const std::vector<Template>& getTemplates(const String& class_id, int template_id) const;
990 991

  int numTemplates() const;
992
  int numTemplates(const String& class_id) const;
993
  int numClasses() const { return static_cast<int>(class_templates.size()); }
994

995
  std::vector<String> classIds() const;
996 997 998 999

  void read(const FileNode& fn);
  void write(FileStorage& fs) const;

1000 1001
  String readClass(const FileNode& fn, const String &class_id_override = "");
  void writeClass(const String& class_id, FileStorage& fs) const;
1002

1003 1004 1005
  void readClasses(const std::vector<String>& class_ids,
                   const String& format = "templates_%s.yml.gz");
  void writeClasses(const String& format = "templates_%s.yml.gz") const;
1006 1007 1008 1009 1010 1011 1012

protected:
  std::vector< Ptr<Modality> > modalities;
  int pyramid_levels;
  std::vector<int> T_at_level;

  typedef std::vector<Template> TemplatePyramid;
1013
  typedef std::map<String, std::vector<TemplatePyramid> > TemplatesMap;
1014 1015 1016 1017 1018 1019 1020 1021 1022
  TemplatesMap class_templates;

  typedef std::vector<Mat> LinearMemories;
  // Indexed as [pyramid level][modality][quantized label]
  typedef std::vector< std::vector<LinearMemories> > LinearMemoryPyramid;

  void matchClass(const LinearMemoryPyramid& lm_pyramid,
                  const std::vector<Size>& sizes,
                  float threshold, std::vector<Match>& matches,
1023
                  const String& class_id,
1024 1025 1026 1027 1028 1029 1030 1031
                  const std::vector<TemplatePyramid>& template_pyramids) const;
};

/**
 * \brief Factory function for detector using LINE algorithm with color gradients.
 *
 * Default parameter settings suitable for VGA images.
 */
1032
CV_EXPORTS Ptr<Detector> getDefaultLINE();
1033 1034 1035 1036 1037 1038 1039

/**
 * \brief Factory function for detector using LINE-MOD algorithm with color gradients
 * and depth normals.
 *
 * Default parameter settings suitable for VGA images.
 */
1040
CV_EXPORTS Ptr<Detector> getDefaultLINEMOD();
1041 1042 1043 1044

} // namespace linemod
} // namespace cv

1045 1046 1047
#endif

#endif