objdetect.hpp 36.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                           License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#ifndef __OPENCV_OBJDETECT_HPP__
#define __OPENCV_OBJDETECT_HPP__

46
#include "opencv2/core.hpp"
47 48

#ifdef __cplusplus
49 50 51
#include <map>
#include <deque>

52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
extern "C" {
#endif

/****************************************************************************************\
*                         Haar-like Object Detection functions                           *
\****************************************************************************************/

#define CV_HAAR_MAGIC_VAL    0x42500000
#define CV_TYPE_NAME_HAAR    "opencv-haar-classifier"

#define CV_IS_HAAR_CLASSIFIER( haar )                                                    \
    ((haar) != NULL &&                                                                   \
    (((const CvHaarClassifierCascade*)(haar))->flags & CV_MAGIC_MASK)==CV_HAAR_MAGIC_VAL)

#define CV_HAAR_FEATURE_MAX  3

typedef struct CvHaarFeature
{
70
    int tilted;
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
    struct
    {
        CvRect r;
        float weight;
    } rect[CV_HAAR_FEATURE_MAX];
} CvHaarFeature;

typedef struct CvHaarClassifier
{
    int count;
    CvHaarFeature* haar_feature;
    float* threshold;
    int* left;
    int* right;
    float* alpha;
} CvHaarClassifier;

typedef struct CvHaarStageClassifier
{
    int  count;
    float threshold;
    CvHaarClassifier* classifier;

    int next;
    int child;
    int parent;
} CvHaarStageClassifier;

typedef struct CvHidHaarClassifierCascade CvHidHaarClassifierCascade;

typedef struct CvHaarClassifierCascade
{
    int  flags;
    int  count;
    CvSize orig_window_size;
    CvSize real_window_size;
    double scale;
    CvHaarStageClassifier* stage_classifier;
    CvHidHaarClassifierCascade* hid_cascade;
} CvHaarClassifierCascade;

typedef struct CvAvgComp
{
    CvRect rect;
    int neighbors;
} CvAvgComp;

/* Loads haar classifier cascade from a directory.
   It is obsolete: convert your cascade to xml and use cvLoad instead */
CVAPI(CvHaarClassifierCascade*) cvLoadHaarClassifierCascade(
                    const char* directory, CvSize orig_window_size);

CVAPI(void) cvReleaseHaarClassifierCascade( CvHaarClassifierCascade** cascade );

#define CV_HAAR_DO_CANNY_PRUNING    1
#define CV_HAAR_SCALE_IMAGE         2
#define CV_HAAR_FIND_BIGGEST_OBJECT 4
#define CV_HAAR_DO_ROUGH_SEARCH     8

A
Alexey Kazakov 已提交
130 131 132 133 134 135 136 137
//CVAPI(CvSeq*) cvHaarDetectObjectsForROC( const CvArr* image,
//                     CvHaarClassifierCascade* cascade, CvMemStorage* storage,
//                     CvSeq** rejectLevels, CvSeq** levelWeightds,
//                     double scale_factor CV_DEFAULT(1.1),
//                     int min_neighbors CV_DEFAULT(3), int flags CV_DEFAULT(0),
//                     CvSize min_size CV_DEFAULT(cvSize(0,0)), CvSize max_size CV_DEFAULT(cvSize(0,0)),
//                     bool outputRejectLevels = false );

138

139
CVAPI(CvSeq*) cvHaarDetectObjects( const CvArr* image,
A
Andrey Kamaev 已提交
140
                     CvHaarClassifierCascade* cascade, CvMemStorage* storage,
141
                     double scale_factor CV_DEFAULT(1.1),
142
                     int min_neighbors CV_DEFAULT(3), int flags CV_DEFAULT(0),
143
                     CvSize min_size CV_DEFAULT(cvSize(0,0)), CvSize max_size CV_DEFAULT(cvSize(0,0)));
144 145 146 147 148 149 150 151 152 153

/* sets images for haar classifier cascade */
CVAPI(void) cvSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* cascade,
                                                const CvArr* sum, const CvArr* sqsum,
                                                const CvArr* tilted_sum, double scale );

/* runs the cascade on the specified window */
CVAPI(int) cvRunHaarClassifierCascade( const CvHaarClassifierCascade* cascade,
                                       CvPoint pt, int start_stage CV_DEFAULT(0));

154 155 156 157 158 159 160 161 162

/****************************************************************************************\
*                         Latent SVM Object Detection functions                          *
\****************************************************************************************/

// DataType: STRUCT position
// Structure describes the position of the filter in the feature pyramid
// l - level in the feature pyramid
// (x, y) - coordinate in level l
A
Andrey Kamaev 已提交
163
typedef struct CvLSVMFilterPosition
164
{
E
Evgeniy Kozinov 已提交
165 166 167
    int x;
    int y;
    int l;
168
} CvLSVMFilterPosition;
169 170 171 172 173 174 175 176

// DataType: STRUCT filterObject
// Description of the filter, which corresponds to the part of the object
// V               - ideal (penalty = 0) position of the partial filter
//                   from the root filter position (V_i in the paper)
// penaltyFunction - vector describes penalty function (d_i in the paper)
//                   pf[0] * x + pf[1] * y + pf[2] * x^2 + pf[3] * y^2
// FILTER DESCRIPTION
A
Andrey Kamaev 已提交
177
//   Rectangular map (sizeX x sizeY),
178 179
//   every cell stores feature vector (dimension = p)
// H               - matrix of feature vectors
A
Andrey Kamaev 已提交
180
//                   to set and get feature vectors (i,j)
181 182 183
//                   used formula H[(j * sizeX + i) * p + k], where
//                   k - component of feature vector in cell (i, j)
// END OF FILTER DESCRIPTION
A
Andrey Kamaev 已提交
184
typedef struct CvLSVMFilterObject{
185
    CvLSVMFilterPosition V;
186
    float fineFunction[4];
E
Evgeniy Kozinov 已提交
187 188 189
    int sizeX;
    int sizeY;
    int numFeatures;
190
    float *H;
191
} CvLSVMFilterObject;
192 193 194

// data type: STRUCT CvLatentSvmDetector
// structure contains internal representation of trained Latent SVM detector
A
Andrey Kamaev 已提交
195
// num_filters			- total number of filters (root plus part) in model
196 197 198 199 200 201 202
// num_components		- number of components in model
// num_part_filters		- array containing number of part filters for each component
// filters				- root and part filters for all model components
// b					- biases for all model components
// score_threshold		- confidence level threshold
typedef struct CvLatentSvmDetector
{
203 204 205 206 207 208
    int num_filters;
    int num_components;
    int* num_part_filters;
    CvLSVMFilterObject** filters;
    float* b;
    float score_threshold;
209 210 211 212
}
CvLatentSvmDetector;

// data type: STRUCT CvObjectDetection
A
Andrey Kamaev 已提交
213
// structure contains the bounding box and confidence level for detected object
214
// rect					- bounding box for a detected object
A
Andrey Kamaev 已提交
215
// score				- confidence level
216 217
typedef struct CvObjectDetection
{
218 219
    CvRect rect;
    float score;
220 221 222 223 224 225 226 227 228 229 230 231
} CvObjectDetection;

//////////////// Object Detection using Latent SVM //////////////


/*
// load trained detector from a file
//
// API
// CvLatentSvmDetector* cvLoadLatentSvmDetector(const char* filename);
// INPUT
// filename				- path to the file containing the parameters of
232
                        - trained Latent SVM detector
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
// OUTPUT
// trained Latent SVM detector in internal representation
*/
CVAPI(CvLatentSvmDetector*) cvLoadLatentSvmDetector(const char* filename);

/*
// release memory allocated for CvLatentSvmDetector structure
//
// API
// void cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector);
// INPUT
// detector				- CvLatentSvmDetector structure to be released
// OUTPUT
*/
CVAPI(void) cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector);

/*
A
Andrey Kamaev 已提交
250
// find rectangular regions in the given image that are likely
251 252 253
// to contain objects and corresponding confidence levels
//
// API
A
Andrey Kamaev 已提交
254 255 256
// CvSeq* cvLatentSvmDetectObjects(const IplImage* image,
//									CvLatentSvmDetector* detector,
//									CvMemStorage* storage,
257 258
//									float overlap_threshold = 0.5f,
//                                  int numThreads = -1);
259 260 261
// INPUT
// image				- image to detect objects in
// detector				- Latent SVM detector in internal representation
A
Andrey Kamaev 已提交
262
// storage				- memory storage to store the resultant sequence
263
//							of the object candidate rectangles
A
Andrey Kamaev 已提交
264
// overlap_threshold	- threshold for the non-maximum suppression algorithm
265 266 267 268
                           = 0.5f [here will be the reference to original paper]
// OUTPUT
// sequence of detected objects (bounding boxes and confidence levels stored in CvObjectDetection structures)
*/
A
Andrey Kamaev 已提交
269
CVAPI(CvSeq*) cvLatentSvmDetectObjects(IplImage* image,
270 271 272
                                CvLatentSvmDetector* detector,
                                CvMemStorage* storage,
                                float overlap_threshold CV_DEFAULT(0.5f),
273
                                int numThreads CV_DEFAULT(-1));
274

275 276 277
#ifdef __cplusplus
}

A
Alexey Kazakov 已提交
278 279 280 281 282 283 284 285
CV_EXPORTS CvSeq* cvHaarDetectObjectsForROC( const CvArr* image,
                     CvHaarClassifierCascade* cascade, CvMemStorage* storage,
                     std::vector<int>& rejectLevels, std::vector<double>& levelWeightds,
                     double scale_factor CV_DEFAULT(1.1),
                     int min_neighbors CV_DEFAULT(3), int flags CV_DEFAULT(0),
                     CvSize min_size CV_DEFAULT(cvSize(0,0)), CvSize max_size CV_DEFAULT(cvSize(0,0)),
                     bool outputRejectLevels = false );

286 287
namespace cv
{
A
Andrey Kamaev 已提交
288

289 290
///////////////////////////// Object Detection ////////////////////////////

M
Maria Dimashova 已提交
291 292 293 294 295 296
/*
 * This is a class wrapping up the structure CvLatentSvmDetector and functions working with it.
 * The class goals are:
 * 1) provide c++ interface;
 * 2) make it possible to load and detect more than one class (model) unlike CvLatentSvmDetector.
 */
297
class CV_EXPORTS LatentSvmDetector
M
Maria Dimashova 已提交
298 299
{
public:
300
    struct CV_EXPORTS ObjectDetection
M
Maria Dimashova 已提交
301 302 303 304 305 306 307 308
    {
        ObjectDetection();
        ObjectDetection( const Rect& rect, float score, int classID=-1 );
        Rect rect;
        float score;
        int classID;
    };

309
    LatentSvmDetector();
310
    LatentSvmDetector( const std::vector<cv::String>& filenames, const std::vector<cv::String>& classNames=std::vector<cv::String>() );
M
Maria Dimashova 已提交
311 312
    virtual ~LatentSvmDetector();

313 314
    virtual void clear();
    virtual bool empty() const;
315
    bool load( const std::vector<cv::String>& filenames, const std::vector<cv::String>& classNames=std::vector<cv::String>() );
M
Maria Dimashova 已提交
316

317
    virtual void detect( const Mat& image,
318
                         std::vector<ObjectDetection>& objectDetections,
319 320
                         float overlapThreshold=0.5f,
                         int numThreads=-1 );
M
Maria Dimashova 已提交
321

322
    const std::vector<cv::String>& getClassNames() const;
M
Maria Dimashova 已提交
323 324 325
    size_t getClassCount() const;

private:
326
    std::vector<CvLatentSvmDetector*> detectors;
327
    std::vector<cv::String> classNames;
M
Maria Dimashova 已提交
328 329
};

330 331 332 333 334 335
CV_EXPORTS void groupRectangles(CV_OUT CV_IN_OUT std::vector<Rect>& rectList, int groupThreshold, double eps=0.2);
CV_EXPORTS_W void groupRectangles(CV_OUT CV_IN_OUT std::vector<Rect>& rectList, CV_OUT std::vector<int>& weights, int groupThreshold, double eps=0.2);
CV_EXPORTS void groupRectangles( std::vector<Rect>& rectList, int groupThreshold, double eps, std::vector<int>* weights, std::vector<double>* levelWeights );
CV_EXPORTS void groupRectangles(std::vector<Rect>& rectList, std::vector<int>& rejectLevels,
                                std::vector<double>& levelWeights, int groupThreshold, double eps=0.2);
CV_EXPORTS void groupRectangles_meanshift(std::vector<Rect>& rectList, std::vector<double>& foundWeights, std::vector<double>& foundScales,
336
                                          double detectThreshold = 0.0, Size winDetSize = Size(64, 128));
337

A
Andrey Kamaev 已提交
338

339 340
class CV_EXPORTS FeatureEvaluator
{
A
Andrey Kamaev 已提交
341
public:
342
    enum { HAAR = 0, LBP = 1, HOG = 2 };
343
    virtual ~FeatureEvaluator();
344

345 346 347
    virtual bool read(const FileNode& node);
    virtual Ptr<FeatureEvaluator> clone() const;
    virtual int getFeatureType() const;
A
Andrey Kamaev 已提交
348

349
    virtual bool setImage(const Mat& img, Size origWinSize);
350 351 352 353 354 355 356 357 358
    virtual bool setWindow(Point p);

    virtual double calcOrd(int featureIdx) const;
    virtual int calcCat(int featureIdx) const;

    static Ptr<FeatureEvaluator> create(int type);
};

template<> CV_EXPORTS void Ptr<CvHaarClassifierCascade>::delete_obj();
359 360 361

enum
{
362 363 364 365
    CASCADE_DO_CANNY_PRUNING=1,
    CASCADE_SCALE_IMAGE=2,
    CASCADE_FIND_BIGGEST_OBJECT=4,
    CASCADE_DO_ROUGH_SEARCH=8
366 367
};

368
class CV_EXPORTS_W CascadeClassifier
369 370
{
public:
371
    CV_WRAP CascadeClassifier();
372
    CV_WRAP CascadeClassifier( const cv::String& filename );
373
    virtual ~CascadeClassifier();
A
Andrey Kamaev 已提交
374

375
    CV_WRAP virtual bool empty() const;
376
    CV_WRAP bool load( const cv::String& filename );
377
    virtual bool read( const FileNode& node );
378
    CV_WRAP virtual void detectMultiScale( const Mat& image,
379
                                   CV_OUT std::vector<Rect>& objects,
380 381 382
                                   double scaleFactor=1.1,
                                   int minNeighbors=3, int flags=0,
                                   Size minSize=Size(),
A
Alexey Kazakov 已提交
383 384 385
                                   Size maxSize=Size() );

    CV_WRAP virtual void detectMultiScale( const Mat& image,
386 387 388
                                   CV_OUT std::vector<Rect>& objects,
                                   std::vector<int>& rejectLevels,
                                   std::vector<double>& levelWeights,
A
Alexey Kazakov 已提交
389 390 391
                                   double scaleFactor=1.1,
                                   int minNeighbors=3, int flags=0,
                                   Size minSize=Size(),
392
                                   Size maxSize=Size(),
393
                                   bool outputRejectLevels=false );
394

395 396 397 398

    bool isOldFormatCascade() const;
    virtual Size getOriginalWindowSize() const;
    int getFeatureType() const;
K
Kirill Kornyakov 已提交
399
    bool setImage( const Mat& );
400 401

protected:
A
Alexey Kazakov 已提交
402
    //virtual bool detectSingleScale( const Mat& image, int stripCount, Size processingRectSize,
403
    //                                int stripSize, int yStep, double factor, std::vector<Rect>& candidates );
A
Alexey Kazakov 已提交
404

405
    virtual bool detectSingleScale( const Mat& image, int stripCount, Size processingRectSize,
406 407
                                    int stripSize, int yStep, double factor, std::vector<Rect>& candidates,
                                    std::vector<int>& rejectLevels, std::vector<double>& levelWeights, bool outputRejectLevels=false);
408

409
protected:
410 411 412 413
    enum { BOOST = 0 };
    enum { DO_CANNY_PRUNING = 1, SCALE_IMAGE = 2,
           FIND_BIGGEST_OBJECT = 4, DO_ROUGH_SEARCH = 8 };

M
marina.kolpakova 已提交
414
    friend class CascadeClassifierInvoker;
415 416

    template<class FEval>
417
    friend int predictOrdered( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
418 419

    template<class FEval>
420
    friend int predictCategorical( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
421 422

    template<class FEval>
423
    friend int predictOrderedStump( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
424 425

    template<class FEval>
426
    friend int predictCategoricalStump( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
427

A
Andrey Kamaev 已提交
428 429
    bool setImage( Ptr<FeatureEvaluator>& feval, const Mat& image);
    virtual int runAt( Ptr<FeatureEvaluator>& feval, Point pt, double& weight );
430

431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462
    class Data
    {
    public:
        struct CV_EXPORTS DTreeNode
        {
            int featureIdx;
            float threshold; // for ordered features only
            int left;
            int right;
        };

        struct CV_EXPORTS DTree
        {
            int nodeCount;
        };

        struct CV_EXPORTS Stage
        {
            int first;
            int ntrees;
            float threshold;
        };

        bool read(const FileNode &node);

        bool isStumpBased;

        int stageType;
        int featureType;
        int ncategories;
        Size origWinSize;

463 464 465 466 467
        std::vector<Stage> stages;
        std::vector<DTree> classifiers;
        std::vector<DTreeNode> nodes;
        std::vector<float> leaves;
        std::vector<int> subsets;
468
    };
469

470 471
    Data data;
    Ptr<FeatureEvaluator> featureEvaluator;
472
    Ptr<CvHaarClassifierCascade> oldCascade;
473 474

public:
475
    class CV_EXPORTS MaskGenerator
476
    {
477
    public:
A
Andrey Kamaev 已提交
478
        virtual ~MaskGenerator() {}
479 480
        virtual cv::Mat generateMask(const cv::Mat& src)=0;
        virtual void initializeMask(const cv::Mat& /*src*/) {};
481 482 483
    };
    void setMaskGenerator(Ptr<MaskGenerator> maskGenerator);
    Ptr<MaskGenerator> getMaskGenerator();
484 485 486

    void setFaceDetectionMaskGenerator();

487 488
protected:
    Ptr<MaskGenerator> maskGenerator;
489 490 491 492
};

//////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////

493 494 495 496 497 498
// struct for detection region of interest (ROI)
struct DetectionROI
{
   // scale(size) of the bounding box
   double scale;
   // set of requrested locations to be evaluated
499
   std::vector<cv::Point> locations;
500
   // vector that will contain confidence values for each location
501
   std::vector<double> confidences;
502 503
};

504
struct CV_EXPORTS_W HOGDescriptor
505 506 507
{
public:
    enum { L2Hys=0 };
508
    enum { DEFAULT_NLEVELS=64 };
A
Andrey Kamaev 已提交
509

510
    CV_WRAP HOGDescriptor() : winSize(64,128), blockSize(16,16), blockStride(8,8),
511
        cellSize(8,8), nbins(9), derivAperture(1), winSigma(-1),
A
Andrey Kamaev 已提交
512
        histogramNormType(HOGDescriptor::L2Hys), L2HysThreshold(0.2), gammaCorrection(true),
V
Vadim Pisarevsky 已提交
513
        nlevels(HOGDescriptor::DEFAULT_NLEVELS)
514
    {}
A
Andrey Kamaev 已提交
515

516
    CV_WRAP HOGDescriptor(Size _winSize, Size _blockSize, Size _blockStride,
517
                  Size _cellSize, int _nbins, int _derivAperture=1, double _winSigma=-1,
V
Vadim Pisarevsky 已提交
518
                  int _histogramNormType=HOGDescriptor::L2Hys,
V
Vadim Pisarevsky 已提交
519 520
                  double _L2HysThreshold=0.2, bool _gammaCorrection=false,
                  int _nlevels=HOGDescriptor::DEFAULT_NLEVELS)
521 522 523
    : winSize(_winSize), blockSize(_blockSize), blockStride(_blockStride), cellSize(_cellSize),
    nbins(_nbins), derivAperture(_derivAperture), winSigma(_winSigma),
    histogramNormType(_histogramNormType), L2HysThreshold(_L2HysThreshold),
524
    gammaCorrection(_gammaCorrection), nlevels(_nlevels)
525
    {}
A
Andrey Kamaev 已提交
526

527
    CV_WRAP HOGDescriptor(const cv::String& filename)
528 529 530
    {
        load(filename);
    }
A
Andrey Kamaev 已提交
531

532 533 534 535
    HOGDescriptor(const HOGDescriptor& d)
    {
        d.copyTo(*this);
    }
A
Andrey Kamaev 已提交
536

537
    virtual ~HOGDescriptor() {}
A
Andrey Kamaev 已提交
538

539 540 541
    CV_WRAP size_t getDescriptorSize() const;
    CV_WRAP bool checkDetectorSize() const;
    CV_WRAP double getWinSigma() const;
A
Andrey Kamaev 已提交
542

543
    CV_WRAP virtual void setSVMDetector(InputArray _svmdetector);
A
Andrey Kamaev 已提交
544

545
    virtual bool read(FileNode& fn);
546
    virtual void write(FileStorage& fs, const cv::String& objname) const;
A
Andrey Kamaev 已提交
547

548 549
    CV_WRAP virtual bool load(const cv::String& filename, const cv::String& objname=cv::String());
    CV_WRAP virtual void save(const cv::String& filename, const cv::String& objname=cv::String()) const;
550
    virtual void copyTo(HOGDescriptor& c) const;
551

552
    CV_WRAP virtual void compute(const Mat& img,
553
                         CV_OUT std::vector<float>& descriptors,
554
                         Size winStride=Size(), Size padding=Size(),
555
                         const std::vector<Point>& locations=std::vector<Point>()) const;
556
    //with found weights output
557 558
    CV_WRAP virtual void detect(const Mat& img, CV_OUT std::vector<Point>& foundLocations,
                        CV_OUT std::vector<double>& weights,
A
Andrey Kamaev 已提交
559
                        double hitThreshold=0, Size winStride=Size(),
560
                        Size padding=Size(),
561
                        const std::vector<Point>& searchLocations=std::vector<Point>()) const;
562
    //without found weights output
563
    virtual void detect(const Mat& img, CV_OUT std::vector<Point>& foundLocations,
564 565
                        double hitThreshold=0, Size winStride=Size(),
                        Size padding=Size(),
566
                        const std::vector<Point>& searchLocations=std::vector<Point>()) const;
567
    //with result weights output
568 569
    CV_WRAP virtual void detectMultiScale(const Mat& img, CV_OUT std::vector<Rect>& foundLocations,
                                  CV_OUT std::vector<double>& foundWeights, double hitThreshold=0,
570 571 572
                                  Size winStride=Size(), Size padding=Size(), double scale=1.05,
                                  double finalThreshold=2.0,bool useMeanshiftGrouping = false) const;
    //without found weights output
573
    virtual void detectMultiScale(const Mat& img, CV_OUT std::vector<Rect>& foundLocations,
574
                                  double hitThreshold=0, Size winStride=Size(),
A
Andrey Kamaev 已提交
575
                                  Size padding=Size(), double scale=1.05,
576
                                  double finalThreshold=2.0, bool useMeanshiftGrouping = false) const;
577

578
    CV_WRAP virtual void computeGradient(const Mat& img, CV_OUT Mat& grad, CV_OUT Mat& angleOfs,
579
                                 Size paddingTL=Size(), Size paddingBR=Size()) const;
A
Andrey Kamaev 已提交
580

581 582
    CV_WRAP static std::vector<float> getDefaultPeopleDetector();
    CV_WRAP static std::vector<float> getDaimlerPeopleDetector();
A
Andrey Kamaev 已提交
583

584 585 586 587 588 589 590 591 592 593
    CV_PROP Size winSize;
    CV_PROP Size blockSize;
    CV_PROP Size blockStride;
    CV_PROP Size cellSize;
    CV_PROP int nbins;
    CV_PROP int derivAperture;
    CV_PROP double winSigma;
    CV_PROP int histogramNormType;
    CV_PROP double L2HysThreshold;
    CV_PROP bool gammaCorrection;
594
    CV_PROP std::vector<float> svmDetector;
595
    CV_PROP int nlevels;
596 597


598
   // evaluate specified ROI and return confidence value for each location
599
   virtual void detectROI(const cv::Mat& img, const std::vector<cv::Point> &locations,
600 601 602 603 604 605 606 607 608 609 610 611
                                   CV_OUT std::vector<cv::Point>& foundLocations, CV_OUT std::vector<double>& confidences,
                                   double hitThreshold = 0, cv::Size winStride = Size(),
                                   cv::Size padding = Size()) const;

   // evaluate specified ROI and return confidence value for each location in multiple scales
   virtual void detectMultiScaleROI(const cv::Mat& img,
                                                       CV_OUT std::vector<cv::Rect>& foundLocations,
                                                       std::vector<DetectionROI>& locations,
                                                       double hitThreshold = 0,
                                                       int groupThreshold = 0) const;

   // read/parse Dalal's alt model file
612
   void readALTModel(cv::String modelfile);
613 614
};

615

V
Vadim Pisarevsky 已提交
616
CV_EXPORTS_W void findDataMatrix(InputArray image,
617
                                 CV_OUT std::vector<cv::String>& codes,
V
Vadim Pisarevsky 已提交
618 619 620
                                 OutputArray corners=noArray(),
                                 OutputArrayOfArrays dmtx=noArray());
CV_EXPORTS_W void drawDataMatrixCodes(InputOutputArray image,
621
                                      const std::vector<cv::String>& codes,
V
Vadim Pisarevsky 已提交
622
                                      InputArray corners);
623 624
}

J
James Bowman 已提交
625 626 627 628
/****************************************************************************************\
*                                Datamatrix                                              *
\****************************************************************************************/

629
struct CV_EXPORTS CvDataMatrixCode {
J
James Bowman 已提交
630 631 632 633 634
  char msg[4];
  CvMat *original;
  CvMat *corners;
};

635
CV_EXPORTS std::deque<CvDataMatrixCode> cvFindDataMatrix(CvMat *im);
636 637 638 639 640 641 642 643 644 645 646 647 648

/****************************************************************************************\
*                                 LINE-MOD                                               *
\****************************************************************************************/

namespace cv {
namespace linemod {

/// @todo Convert doxy comments to rst

/**
 * \brief Discriminant feature described by its location and label.
 */
649
struct CV_EXPORTS Feature
650 651 652 653 654
{
  int x; ///< x offset
  int y; ///< y offset
  int label; ///< Quantization

A
Andrey Kamaev 已提交
655
  Feature() : x(0), y(0), label(0) {}
A
Andrey Kamaev 已提交
656
  Feature(int x, int y, int label);
657 658 659 660 661

  void read(const FileNode& fn);
  void write(FileStorage& fs) const;
};

A
Andrey Kamaev 已提交
662 663
inline Feature::Feature(int _x, int _y, int _label) : x(_x), y(_y), label(_label) {}

664
struct CV_EXPORTS Template
665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709
{
  int width;
  int height;
  int pyramid_level;
  std::vector<Feature> features;

  void read(const FileNode& fn);
  void write(FileStorage& fs) const;
};

/**
 * \brief Represents a modality operating over an image pyramid.
 */
class QuantizedPyramid
{
public:
  // Virtual destructor
  virtual ~QuantizedPyramid() {}

  /**
   * \brief Compute quantized image at current pyramid level for online detection.
   *
   * \param[out] dst The destination 8-bit image. For each pixel at most one bit is set,
   *                 representing its classification.
   */
  virtual void quantize(Mat& dst) const =0;

  /**
   * \brief Extract most discriminant features at current pyramid level to form a new template.
   *
   * \param[out] templ The new template.
   */
  virtual bool extractTemplate(Template& templ) const =0;

  /**
   * \brief Go to the next pyramid level.
   *
   * \todo Allow pyramid scale factor other than 2
   */
  virtual void pyrDown() =0;

protected:
  /// Candidate feature with a score
  struct Candidate
  {
A
Andrey Kamaev 已提交
710
    Candidate(int x, int y, int label, float score);
711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734

    /// Sort candidates with high score to the front
    bool operator<(const Candidate& rhs) const
    {
      return score > rhs.score;
    }

    Feature f;
    float score;
  };

  /**
   * \brief Choose candidate features so that they are not bunched together.
   *
   * \param[in]  candidates   Candidate features sorted by score.
   * \param[out] features     Destination vector of selected features.
   * \param[in]  num_features Number of candidates to select.
   * \param[in]  distance     Hint for desired distance between features.
   */
  static void selectScatteredFeatures(const std::vector<Candidate>& candidates,
                                      std::vector<Feature>& features,
                                      size_t num_features, float distance);
};

A
Andrey Kamaev 已提交
735 736
inline QuantizedPyramid::Candidate::Candidate(int x, int y, int label, float _score) : f(x, y, label), score(_score) {}

737 738 739 740 741
/**
 * \brief Interface for modalities that plug into the LINE template matching representation.
 *
 * \todo Max response, to allow optimization of summing (255/MAX) features as uint8
 */
742
class CV_EXPORTS Modality
743 744 745 746 747 748 749 750 751 752 753 754 755
{
public:
  // Virtual destructor
  virtual ~Modality() {}

  /**
   * \brief Form a quantized image pyramid from a source image.
   *
   * \param[in] src  The source image. Type depends on the modality.
   * \param[in] mask Optional mask. If not empty, unmasked pixels are set to zero
   *                 in quantized image and cannot be extracted as features.
   */
  Ptr<QuantizedPyramid> process(const Mat& src,
756
                    const Mat& mask = Mat()) const
757 758 759 760
  {
    return processImpl(src, mask);
  }

761
  virtual cv::String name() const =0;
762 763 764 765 766 767 768 769 770 771 772

  virtual void read(const FileNode& fn) =0;
  virtual void write(FileStorage& fs) const =0;

  /**
   * \brief Create modality by name.
   *
   * The following modality types are supported:
   * - "ColorGradient"
   * - "DepthNormal"
   */
773
  static Ptr<Modality> create(const cv::String& modality_type);
774 775 776 777 778 779 780 781 782

  /**
   * \brief Load a modality from file.
   */
  static Ptr<Modality> create(const FileNode& fn);

protected:
  // Indirection is because process() has a default parameter.
  virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
783
                        const Mat& mask) const =0;
784 785 786 787 788
};

/**
 * \brief Modality that computes quantized gradient orientations from a color image.
 */
789
class CV_EXPORTS ColorGradient : public Modality
790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806
{
public:
  /**
   * \brief Default constructor. Uses reasonable default parameter values.
   */
  ColorGradient();

  /**
   * \brief Constructor.
   *
   * \param weak_threshold   When quantizing, discard gradients with magnitude less than this.
   * \param num_features     How many features a template must contain.
   * \param strong_threshold Consider as candidate features only gradients whose norms are
   *                         larger than this.
   */
  ColorGradient(float weak_threshold, size_t num_features, float strong_threshold);

807
  virtual cv::String name() const;
808 809 810 811 812 813 814 815 816 817

  virtual void read(const FileNode& fn);
  virtual void write(FileStorage& fs) const;

  float weak_threshold;
  size_t num_features;
  float strong_threshold;

protected:
  virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
818
                        const Mat& mask) const;
819 820 821 822 823
};

/**
 * \brief Modality that computes quantized surface normals from a dense depth map.
 */
824
class CV_EXPORTS DepthNormal : public Modality
825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844
{
public:
  /**
   * \brief Default constructor. Uses reasonable default parameter values.
   */
  DepthNormal();

  /**
   * \brief Constructor.
   *
   * \param distance_threshold   Ignore pixels beyond this distance.
   * \param difference_threshold When computing normals, ignore contributions of pixels whose
   *                             depth difference with the central pixel is above this threshold.
   * \param num_features         How many features a template must contain.
   * \param extract_threshold    Consider as candidate feature only if there are no differing
   *                             orientations within a distance of extract_threshold.
   */
  DepthNormal(int distance_threshold, int difference_threshold, size_t num_features,
              int extract_threshold);

845
  virtual cv::String name() const;
846 847 848 849 850 851 852 853 854 855 856

  virtual void read(const FileNode& fn);
  virtual void write(FileStorage& fs) const;

  int distance_threshold;
  int difference_threshold;
  size_t num_features;
  int extract_threshold;

protected:
  virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
857
                        const Mat& mask) const;
858 859 860 861 862 863 864 865 866 867
};

/**
 * \brief Debug function to colormap a quantized image for viewing.
 */
void colormap(const Mat& quantized, Mat& dst);

/**
 * \brief Represents a successful template match.
 */
868
struct CV_EXPORTS Match
869 870 871 872 873
{
  Match()
  {
  }

874
  Match(int x, int y, float similarity, const cv::String& class_id, int template_id);
875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893

  /// Sort matches with high similarity to the front
  bool operator<(const Match& rhs) const
  {
    // Secondarily sort on template_id for the sake of duplicate removal
    if (similarity != rhs.similarity)
      return similarity > rhs.similarity;
    else
      return template_id < rhs.template_id;
  }

  bool operator==(const Match& rhs) const
  {
    return x == rhs.x && y == rhs.y && similarity == rhs.similarity && class_id == rhs.class_id;
  }

  int x;
  int y;
  float similarity;
894
  cv::String class_id;
895 896 897
  int template_id;
};

898
inline  Match::Match(int _x, int _y, float _similarity, const cv::String& _class_id, int _template_id)
A
Andrey Kamaev 已提交
899 900 901 902
    : x(_x), y(_y), similarity(_similarity), class_id(_class_id), template_id(_template_id)
  {
  }

903 904 905 906
/**
 * \brief Object detector using the LINE template matching algorithm with any set of
 * modalities.
 */
907
class CV_EXPORTS Detector
908 909 910 911 912 913 914 915 916 917 918 919 920 921
{
public:
  /**
   * \brief Empty constructor, initialize with read().
   */
  Detector();

  /**
   * \brief Constructor.
   *
   * \param modalities       Modalities to use (color gradients, depth normals, ...).
   * \param T_pyramid        Value of the sampling step T at each pyramid level. The
   *                         number of pyramid levels is T_pyramid.size().
   */
922
  Detector(const std::vector< Ptr<Modality> >& modalities, const std::vector<int>& T_pyramid);
923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939

  /**
   * \brief Detect objects by template matching.
   *
   * Matches globally at the lowest pyramid level, then refines locally stepping up the pyramid.
   *
   * \param      sources   Source images, one for each modality.
   * \param      threshold Similarity threshold, a percentage between 0 and 100.
   * \param[out] matches   Template matches, sorted by similarity score.
   * \param      class_ids If non-empty, only search for the desired object classes.
   * \param[out] quantized_images Optionally return vector<Mat> of quantized images.
   * \param      masks     The masks for consideration during matching. The masks should be CV_8UC1
   *                       where 255 represents a valid pixel.  If non-empty, the vector must be
   *                       the same size as sources.  Each element must be
   *                       empty or the same size as its corresponding source.
   */
  void match(const std::vector<Mat>& sources, float threshold, std::vector<Match>& matches,
940
             const std::vector<cv::String>& class_ids = std::vector<cv::String>(),
941 942 943 944 945 946 947 948 949 950 951 952 953
             OutputArrayOfArrays quantized_images = noArray(),
             const std::vector<Mat>& masks = std::vector<Mat>()) const;

  /**
   * \brief Add new object template.
   *
   * \param      sources      Source images, one for each modality.
   * \param      class_id     Object class ID.
   * \param      object_mask  Mask separating object from background.
   * \param[out] bounding_box Optionally return bounding box of the extracted features.
   *
   * \return Template ID, or -1 if failed to extract a valid template.
   */
954
  int addTemplate(const std::vector<Mat>& sources, const cv::String& class_id,
955
          const Mat& object_mask, Rect* bounding_box = NULL);
956 957 958 959

  /**
   * \brief Add a new object template computed by external means.
   */
960
  int addSyntheticTemplate(const std::vector<Template>& templates, const cv::String& class_id);
961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985

  /**
   * \brief Get the modalities used by this detector.
   *
   * You are not permitted to add/remove modalities, but you may dynamic_cast them to
   * tweak parameters.
   */
  const std::vector< Ptr<Modality> >& getModalities() const { return modalities; }

  /**
   * \brief Get sampling step T at pyramid_level.
   */
  int getT(int pyramid_level) const { return T_at_level[pyramid_level]; }

  /**
   * \brief Get number of pyramid levels used by this detector.
   */
  int pyramidLevels() const { return pyramid_levels; }

  /**
   * \brief Get the template pyramid identified by template_id.
   *
   * For example, with 2 modalities (Gradient, Normal) and two pyramid levels
   * (L0, L1), the order is (GradientL0, NormalL0, GradientL1, NormalL1).
   */
986
  const std::vector<Template>& getTemplates(const cv::String& class_id, int template_id) const;
987 988

  int numTemplates() const;
989
  int numTemplates(const cv::String& class_id) const;
990
  int numClasses() const { return static_cast<int>(class_templates.size()); }
991

992
  std::vector<cv::String> classIds() const;
993 994 995 996

  void read(const FileNode& fn);
  void write(FileStorage& fs) const;

997 998
  cv::String readClass(const FileNode& fn, const cv::String &class_id_override = "");
  void writeClass(const cv::String& class_id, FileStorage& fs) const;
999

1000 1001 1002
  void readClasses(const std::vector<cv::String>& class_ids,
                   const cv::String& format = "templates_%s.yml.gz");
  void writeClasses(const cv::String& format = "templates_%s.yml.gz") const;
1003 1004 1005 1006 1007 1008 1009

protected:
  std::vector< Ptr<Modality> > modalities;
  int pyramid_levels;
  std::vector<int> T_at_level;

  typedef std::vector<Template> TemplatePyramid;
1010
  typedef std::map<cv::String, std::vector<TemplatePyramid> > TemplatesMap;
1011 1012 1013 1014 1015 1016 1017 1018 1019
  TemplatesMap class_templates;

  typedef std::vector<Mat> LinearMemories;
  // Indexed as [pyramid level][modality][quantized label]
  typedef std::vector< std::vector<LinearMemories> > LinearMemoryPyramid;

  void matchClass(const LinearMemoryPyramid& lm_pyramid,
                  const std::vector<Size>& sizes,
                  float threshold, std::vector<Match>& matches,
1020
                  const cv::String& class_id,
1021 1022 1023 1024 1025 1026 1027 1028
                  const std::vector<TemplatePyramid>& template_pyramids) const;
};

/**
 * \brief Factory function for detector using LINE algorithm with color gradients.
 *
 * Default parameter settings suitable for VGA images.
 */
1029
CV_EXPORTS Ptr<Detector> getDefaultLINE();
1030 1031 1032 1033 1034 1035 1036

/**
 * \brief Factory function for detector using LINE-MOD algorithm with color gradients
 * and depth normals.
 *
 * Default parameter settings suitable for VGA images.
 */
1037
CV_EXPORTS Ptr<Detector> getDefaultLINEMOD();
1038 1039 1040 1041

} // namespace linemod
} // namespace cv

1042 1043 1044
#endif

#endif