lkpyramid.cpp 46.3 KB
Newer Older
1 2 3 4 5 6 7 8 9
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
10
//                           License Agreement
11 12 13
//                For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
14
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
15 16 17 18 19 20 21 22 23 24 25 26
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
27
//   * The name of the copyright holders may not be used to endorse or promote products
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <float.h>
#include <stdio.h>
45
#include "lkpyramid.hpp"
46
#include "opencl_kernels.hpp"
47

48 49
#define  CV_DESCALE(x,n)     (((x) + (1 << ((n)-1))) >> (n))

50
namespace
51
{
52
static void calcSharrDeriv(const cv::Mat& src, cv::Mat& dst)
53
{
54 55
    using namespace cv;
    using cv::detail::deriv_type;
56 57 58
    int rows = src.rows, cols = src.cols, cn = src.channels(), colsn = cols*cn, depth = src.depth();
    CV_Assert(depth == CV_8U);
    dst.create(rows, cols, CV_MAKETYPE(DataType<deriv_type>::depth, cn*2));
59

60
#ifdef HAVE_TEGRA_OPTIMIZATION
61 62
    if (tegra::calcSharrDeriv(src, dst))
        return;
63 64
#endif

65 66 67
    int x, y, delta = (int)alignSize((cols + 2)*cn, 16);
    AutoBuffer<deriv_type> _tempBuf(delta*2 + 64);
    deriv_type *trow0 = alignPtr(_tempBuf + cn, 16), *trow1 = alignPtr(trow0 + delta, 16);
68

69 70 71
#if CV_SSE2
    __m128i z = _mm_setzero_si128(), c3 = _mm_set1_epi16(3), c10 = _mm_set1_epi16(10);
#endif
72

73
    for( y = 0; y < rows; y++ )
74
    {
75 76 77 78
        const uchar* srow0 = src.ptr<uchar>(y > 0 ? y-1 : rows > 1 ? 1 : 0);
        const uchar* srow1 = src.ptr<uchar>(y);
        const uchar* srow2 = src.ptr<uchar>(y < rows-1 ? y+1 : rows > 1 ? rows-2 : 0);
        deriv_type* drow = dst.ptr<deriv_type>(y);
79

80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
        // do vertical convolution
        x = 0;
#if CV_SSE2
        for( ; x <= colsn - 8; x += 8 )
        {
            __m128i s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(srow0 + x)), z);
            __m128i s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(srow1 + x)), z);
            __m128i s2 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(srow2 + x)), z);
            __m128i t0 = _mm_add_epi16(_mm_mullo_epi16(_mm_add_epi16(s0, s2), c3), _mm_mullo_epi16(s1, c10));
            __m128i t1 = _mm_sub_epi16(s2, s0);
            _mm_store_si128((__m128i*)(trow0 + x), t0);
            _mm_store_si128((__m128i*)(trow1 + x), t1);
        }
#endif
        for( ; x < colsn; x++ )
        {
            int t0 = (srow0[x] + srow2[x])*3 + srow1[x]*10;
            int t1 = srow2[x] - srow0[x];
            trow0[x] = (deriv_type)t0;
            trow1[x] = (deriv_type)t1;
        }
101

102 103 104 105 106 107 108
        // make border
        int x0 = (cols > 1 ? 1 : 0)*cn, x1 = (cols > 1 ? cols-2 : 0)*cn;
        for( int k = 0; k < cn; k++ )
        {
            trow0[-cn + k] = trow0[x0 + k]; trow0[colsn + k] = trow0[x1 + k];
            trow1[-cn + k] = trow1[x0 + k]; trow1[colsn + k] = trow1[x1 + k];
        }
109

110 111 112 113 114 115 116 117 118 119
        // do horizontal convolution, interleave the results and store them to dst
        x = 0;
#if CV_SSE2
        for( ; x <= colsn - 8; x += 8 )
        {
            __m128i s0 = _mm_loadu_si128((const __m128i*)(trow0 + x - cn));
            __m128i s1 = _mm_loadu_si128((const __m128i*)(trow0 + x + cn));
            __m128i s2 = _mm_loadu_si128((const __m128i*)(trow1 + x - cn));
            __m128i s3 = _mm_load_si128((const __m128i*)(trow1 + x));
            __m128i s4 = _mm_loadu_si128((const __m128i*)(trow1 + x + cn));
120

121 122 123 124 125 126 127 128
            __m128i t0 = _mm_sub_epi16(s1, s0);
            __m128i t1 = _mm_add_epi16(_mm_mullo_epi16(_mm_add_epi16(s2, s4), c3), _mm_mullo_epi16(s3, c10));
            __m128i t2 = _mm_unpacklo_epi16(t0, t1);
            t0 = _mm_unpackhi_epi16(t0, t1);
            // this can probably be replaced with aligned stores if we aligned dst properly.
            _mm_storeu_si128((__m128i*)(drow + x*2), t2);
            _mm_storeu_si128((__m128i*)(drow + x*2 + 8), t0);
        }
129
#endif
130 131 132 133 134 135
        for( ; x < colsn; x++ )
        {
            deriv_type t0 = (deriv_type)(trow0[x+cn] - trow0[x-cn]);
            deriv_type t1 = (deriv_type)((trow1[x+cn] + trow1[x-cn])*3 + trow1[x]*10);
            drow[x*2] = t0; drow[x*2+1] = t1;
        }
136
    }
137
}
138

139
}//namespace
140

141 142
cv::detail::LKTrackerInvoker::LKTrackerInvoker(
                      const Mat& _prevImg, const Mat& _prevDeriv, const Mat& _nextImg,
143 144 145
                      const Point2f* _prevPts, Point2f* _nextPts,
                      uchar* _status, float* _err,
                      Size _winSize, TermCriteria _criteria,
146
                      int _level, int _maxLevel, int _flags, float _minEigThreshold )
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
{
    prevImg = &_prevImg;
    prevDeriv = &_prevDeriv;
    nextImg = &_nextImg;
    prevPts = _prevPts;
    nextPts = _nextPts;
    status = _status;
    err = _err;
    winSize = _winSize;
    criteria = _criteria;
    level = _level;
    maxLevel = _maxLevel;
    flags = _flags;
    minEigThreshold = _minEigThreshold;
}

163 164 165 166 167 168 169 170
#if defined __arm__ && !CV_NEON
typedef int64 acctype;
typedef int itemtype;
#else
typedef float acctype;
typedef float itemtype;
#endif

171
void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
172 173 174 175 176
{
    Point2f halfWin((winSize.width-1)*0.5f, (winSize.height-1)*0.5f);
    const Mat& I = *prevImg;
    const Mat& J = *nextImg;
    const Mat& derivI = *prevDeriv;
177

178 179 180
    int j, cn = I.channels(), cn2 = cn*2;
    cv::AutoBuffer<deriv_type> _buf(winSize.area()*(cn + cn2));
    int derivDepth = DataType<deriv_type>::depth;
181

182 183
    Mat IWinBuf(winSize, CV_MAKETYPE(derivDepth, cn), (deriv_type*)_buf);
    Mat derivIWinBuf(winSize, CV_MAKETYPE(derivDepth, cn2), (deriv_type*)_buf + winSize.area()*cn);
184

185
    for( int ptidx = range.start; ptidx < range.end; ptidx++ )
186
    {
187 188 189 190 191 192 193 194 195 196 197 198
        Point2f prevPt = prevPts[ptidx]*(float)(1./(1 << level));
        Point2f nextPt;
        if( level == maxLevel )
        {
            if( flags & OPTFLOW_USE_INITIAL_FLOW )
                nextPt = nextPts[ptidx]*(float)(1./(1 << level));
            else
                nextPt = prevPt;
        }
        else
            nextPt = nextPts[ptidx]*2.f;
        nextPts[ptidx] = nextPt;
199

200 201 202 203
        Point2i iprevPt, inextPt;
        prevPt -= halfWin;
        iprevPt.x = cvFloor(prevPt.x);
        iprevPt.y = cvFloor(prevPt.y);
204

205 206
        if( iprevPt.x < -winSize.width || iprevPt.x >= derivI.cols ||
            iprevPt.y < -winSize.height || iprevPt.y >= derivI.rows )
207
        {
208
            if( level == 0 )
209
            {
210 211 212 213
                if( status )
                    status[ptidx] = false;
                if( err )
                    err[ptidx] = 0;
214
            }
215 216
            continue;
        }
217

218 219 220 221 222 223 224 225
        float a = prevPt.x - iprevPt.x;
        float b = prevPt.y - iprevPt.y;
        const int W_BITS = 14, W_BITS1 = 14;
        const float FLT_SCALE = 1.f/(1 << 20);
        int iw00 = cvRound((1.f - a)*(1.f - b)*(1 << W_BITS));
        int iw01 = cvRound(a*(1.f - b)*(1 << W_BITS));
        int iw10 = cvRound((1.f - a)*b*(1 << W_BITS));
        int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
226

227
        int dstep = (int)(derivI.step/derivI.elemSize1());
228 229
        int stepI = (int)(I.step/I.elemSize1());
        int stepJ = (int)(J.step/J.elemSize1());
230 231
        acctype iA11 = 0, iA12 = 0, iA22 = 0;
        float A11, A12, A22;
232

233 234 235 236 237 238 239 240
#if CV_SSE2
        __m128i qw0 = _mm_set1_epi32(iw00 + (iw01 << 16));
        __m128i qw1 = _mm_set1_epi32(iw10 + (iw11 << 16));
        __m128i z = _mm_setzero_si128();
        __m128i qdelta_d = _mm_set1_epi32(1 << (W_BITS1-1));
        __m128i qdelta = _mm_set1_epi32(1 << (W_BITS1-5-1));
        __m128 qA11 = _mm_setzero_ps(), qA12 = _mm_setzero_ps(), qA22 = _mm_setzero_ps();
#endif
241

242 243 244 245
        // extract the patch from the first image, compute covariation matrix of derivatives
        int x, y;
        for( y = 0; y < winSize.height; y++ )
        {
246
            const uchar* src = (const uchar*)I.data + (y + iprevPt.y)*stepI + iprevPt.x*cn;
247
            const deriv_type* dsrc = (const deriv_type*)derivI.data + (y + iprevPt.y)*dstep + iprevPt.x*cn2;
248

249 250
            deriv_type* Iptr = (deriv_type*)(IWinBuf.data + y*IWinBuf.step);
            deriv_type* dIptr = (deriv_type*)(derivIWinBuf.data + y*derivIWinBuf.step);
251

252
            x = 0;
253

254 255
#if CV_SSE2
            for( ; x <= winSize.width*cn - 4; x += 4, dsrc += 4*2, dIptr += 4*2 )
256
            {
257
                __m128i v00, v01, v10, v11, t0, t1;
258

259 260
                v00 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x)), z);
                v01 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x + cn)), z);
261 262
                v10 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x + stepI)), z);
                v11 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x + stepI + cn)), z);
263

264 265 266 267
                t0 = _mm_add_epi32(_mm_madd_epi16(_mm_unpacklo_epi16(v00, v01), qw0),
                                   _mm_madd_epi16(_mm_unpacklo_epi16(v10, v11), qw1));
                t0 = _mm_srai_epi32(_mm_add_epi32(t0, qdelta), W_BITS1-5);
                _mm_storel_epi64((__m128i*)(Iptr + x), _mm_packs_epi32(t0,t0));
268

269 270 271 272
                v00 = _mm_loadu_si128((const __m128i*)(dsrc));
                v01 = _mm_loadu_si128((const __m128i*)(dsrc + cn2));
                v10 = _mm_loadu_si128((const __m128i*)(dsrc + dstep));
                v11 = _mm_loadu_si128((const __m128i*)(dsrc + dstep + cn2));
273

274 275 276 277 278 279 280
                t0 = _mm_add_epi32(_mm_madd_epi16(_mm_unpacklo_epi16(v00, v01), qw0),
                                   _mm_madd_epi16(_mm_unpacklo_epi16(v10, v11), qw1));
                t1 = _mm_add_epi32(_mm_madd_epi16(_mm_unpackhi_epi16(v00, v01), qw0),
                                   _mm_madd_epi16(_mm_unpackhi_epi16(v10, v11), qw1));
                t0 = _mm_srai_epi32(_mm_add_epi32(t0, qdelta_d), W_BITS1);
                t1 = _mm_srai_epi32(_mm_add_epi32(t1, qdelta_d), W_BITS1);
                v00 = _mm_packs_epi32(t0, t1); // Ix0 Iy0 Ix1 Iy1 ...
281

282 283 284
                _mm_storeu_si128((__m128i*)dIptr, v00);
                t0 = _mm_srai_epi32(v00, 16); // Iy0 Iy1 Iy2 Iy3
                t1 = _mm_srai_epi32(_mm_slli_epi32(v00, 16), 16); // Ix0 Ix1 Ix2 Ix3
285

286 287
                __m128 fy = _mm_cvtepi32_ps(t0);
                __m128 fx = _mm_cvtepi32_ps(t1);
288

289 290 291
                qA22 = _mm_add_ps(qA22, _mm_mul_ps(fy, fy));
                qA12 = _mm_add_ps(qA12, _mm_mul_ps(fx, fy));
                qA11 = _mm_add_ps(qA11, _mm_mul_ps(fx, fx));
292
            }
293
#endif
294

295 296 297
            for( ; x < winSize.width*cn; x++, dsrc += 2, dIptr += 2 )
            {
                int ival = CV_DESCALE(src[x]*iw00 + src[x+cn]*iw01 +
298
                                      src[x+stepI]*iw10 + src[x+stepI+cn]*iw11, W_BITS1-5);
299 300 301 302
                int ixval = CV_DESCALE(dsrc[0]*iw00 + dsrc[cn2]*iw01 +
                                       dsrc[dstep]*iw10 + dsrc[dstep+cn2]*iw11, W_BITS1);
                int iyval = CV_DESCALE(dsrc[1]*iw00 + dsrc[cn2+1]*iw01 + dsrc[dstep+1]*iw10 +
                                       dsrc[dstep+cn2+1]*iw11, W_BITS1);
303

304 305 306
                Iptr[x] = (short)ival;
                dIptr[0] = (short)ixval;
                dIptr[1] = (short)iyval;
307

308 309 310
                iA11 += (itemtype)(ixval*ixval);
                iA12 += (itemtype)(ixval*iyval);
                iA22 += (itemtype)(iyval*iyval);
311 312
            }
        }
313

314 315 316 317 318
#if CV_SSE2
        float CV_DECL_ALIGNED(16) A11buf[4], A12buf[4], A22buf[4];
        _mm_store_ps(A11buf, qA11);
        _mm_store_ps(A12buf, qA12);
        _mm_store_ps(A22buf, qA22);
319 320 321
        iA11 += A11buf[0] + A11buf[1] + A11buf[2] + A11buf[3];
        iA12 += A12buf[0] + A12buf[1] + A12buf[2] + A12buf[3];
        iA22 += A22buf[0] + A22buf[1] + A22buf[2] + A22buf[3];
322
#endif
323

324 325 326
        A11 = iA11*FLT_SCALE;
        A12 = iA12*FLT_SCALE;
        A22 = iA22*FLT_SCALE;
327

328 329 330
        float D = A11*A22 - A12*A12;
        float minEig = (A22 + A11 - std::sqrt((A11-A22)*(A11-A22) +
                        4.f*A12*A12))/(2*winSize.width*winSize.height);
331

332
        if( err && (flags & OPTFLOW_LK_GET_MIN_EIGENVALS) != 0 )
333
            err[ptidx] = (float)minEig;
334

335 336 337 338 339 340
        if( minEig < minEigThreshold || D < FLT_EPSILON )
        {
            if( level == 0 && status )
                status[ptidx] = false;
            continue;
        }
341

342
        D = 1.f/D;
343

344 345
        nextPt -= halfWin;
        Point2f prevDelta;
346

347 348 349 350
        for( j = 0; j < criteria.maxCount; j++ )
        {
            inextPt.x = cvFloor(nextPt.x);
            inextPt.y = cvFloor(nextPt.y);
351

352 353 354 355 356 357 358
            if( inextPt.x < -winSize.width || inextPt.x >= J.cols ||
               inextPt.y < -winSize.height || inextPt.y >= J.rows )
            {
                if( level == 0 && status )
                    status[ptidx] = false;
                break;
            }
359

360 361 362 363 364 365
            a = nextPt.x - inextPt.x;
            b = nextPt.y - inextPt.y;
            iw00 = cvRound((1.f - a)*(1.f - b)*(1 << W_BITS));
            iw01 = cvRound(a*(1.f - b)*(1 << W_BITS));
            iw10 = cvRound((1.f - a)*b*(1 << W_BITS));
            iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
366 367
            acctype ib1 = 0, ib2 = 0;
            float b1, b2;
368
#if CV_SSE2
369 370 371
            qw0 = _mm_set1_epi32(iw00 + (iw01 << 16));
            qw1 = _mm_set1_epi32(iw10 + (iw11 << 16));
            __m128 qb0 = _mm_setzero_ps(), qb1 = _mm_setzero_ps();
372
#endif
373

374 375
            for( y = 0; y < winSize.height; y++ )
            {
376
                const uchar* Jptr = (const uchar*)J.data + (y + inextPt.y)*stepJ + inextPt.x*cn;
377 378
                const deriv_type* Iptr = (const deriv_type*)(IWinBuf.data + y*IWinBuf.step);
                const deriv_type* dIptr = (const deriv_type*)(derivIWinBuf.data + y*derivIWinBuf.step);
379

380
                x = 0;
381

382
#if CV_SSE2
383
                for( ; x <= winSize.width*cn - 8; x += 8, dIptr += 8*2 )
384
                {
385 386 387
                    __m128i diff0 = _mm_loadu_si128((const __m128i*)(Iptr + x)), diff1;
                    __m128i v00 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x)), z);
                    __m128i v01 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x + cn)), z);
388 389
                    __m128i v10 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x + stepJ)), z);
                    __m128i v11 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x + stepJ + cn)), z);
390

391 392 393 394
                    __m128i t0 = _mm_add_epi32(_mm_madd_epi16(_mm_unpacklo_epi16(v00, v01), qw0),
                                               _mm_madd_epi16(_mm_unpacklo_epi16(v10, v11), qw1));
                    __m128i t1 = _mm_add_epi32(_mm_madd_epi16(_mm_unpackhi_epi16(v00, v01), qw0),
                                               _mm_madd_epi16(_mm_unpackhi_epi16(v10, v11), qw1));
395
                    t0 = _mm_srai_epi32(_mm_add_epi32(t0, qdelta), W_BITS1-5);
396 397 398 399
                    t1 = _mm_srai_epi32(_mm_add_epi32(t1, qdelta), W_BITS1-5);
                    diff0 = _mm_subs_epi16(_mm_packs_epi32(t0, t1), diff0);
                    diff1 = _mm_unpackhi_epi16(diff0, diff0);
                    diff0 = _mm_unpacklo_epi16(diff0, diff0); // It0 It0 It1 It1 ...
400
                    v00 = _mm_loadu_si128((const __m128i*)(dIptr)); // Ix0 Iy0 Ix1 Iy1 ...
401 402 403 404 405 406 407 408 409 410 411 412 413
                    v01 = _mm_loadu_si128((const __m128i*)(dIptr + 8));
                    v10 = _mm_mullo_epi16(v00, diff0);
                    v11 = _mm_mulhi_epi16(v00, diff0);
                    v00 = _mm_unpacklo_epi16(v10, v11);
                    v10 = _mm_unpackhi_epi16(v10, v11);
                    qb0 = _mm_add_ps(qb0, _mm_cvtepi32_ps(v00));
                    qb1 = _mm_add_ps(qb1, _mm_cvtepi32_ps(v10));
                    v10 = _mm_mullo_epi16(v01, diff1);
                    v11 = _mm_mulhi_epi16(v01, diff1);
                    v00 = _mm_unpacklo_epi16(v10, v11);
                    v10 = _mm_unpackhi_epi16(v10, v11);
                    qb0 = _mm_add_ps(qb0, _mm_cvtepi32_ps(v00));
                    qb1 = _mm_add_ps(qb1, _mm_cvtepi32_ps(v10));
414 415
                }
#endif
416

417
                for( ; x < winSize.width*cn; x++, dIptr += 2 )
418
                {
419
                    int diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 +
420
                                          Jptr[x+stepJ]*iw10 + Jptr[x+stepJ+cn]*iw11,
421
                                          W_BITS1-5) - Iptr[x];
422 423
                    ib1 += (itemtype)(diff*dIptr[0]);
                    ib2 += (itemtype)(diff*dIptr[1]);
424 425
                }
            }
426

427
#if CV_SSE2
428 429
            float CV_DECL_ALIGNED(16) bbuf[4];
            _mm_store_ps(bbuf, _mm_add_ps(qb0, qb1));
430 431
            ib1 += bbuf[0] + bbuf[2];
            ib2 += bbuf[1] + bbuf[3];
432
#endif
433

434 435
            b1 = ib1*FLT_SCALE;
            b2 = ib2*FLT_SCALE;
436

437 438 439
            Point2f delta( (float)((A12*b2 - A22*b1) * D),
                          (float)((A12*b1 - A11*b2) * D));
            //delta = -delta;
440

441 442
            nextPt += delta;
            nextPts[ptidx] = nextPt + halfWin;
443

444 445
            if( delta.ddot(delta) <= criteria.epsilon )
                break;
446

447 448
            if( j > 0 && std::abs(delta.x + prevDelta.x) < 0.01 &&
               std::abs(delta.y + prevDelta.y) < 0.01 )
449
            {
450 451
                nextPts[ptidx] -= delta*0.5f;
                break;
452
            }
453 454
            prevDelta = delta;
        }
455

456
        if( status[ptidx] && err && level == 0 && (flags & OPTFLOW_LK_GET_MIN_EIGENVALS) == 0 )
457
        {
458 459 460 461 462 463 464 465
            Point2f nextPoint = nextPts[ptidx] - halfWin;
            Point inextPoint;

            inextPoint.x = cvFloor(nextPoint.x);
            inextPoint.y = cvFloor(nextPoint.y);

            if( inextPoint.x < -winSize.width || inextPoint.x >= J.cols ||
                inextPoint.y < -winSize.height || inextPoint.y >= J.rows )
466
            {
467 468 469
                if( status )
                    status[ptidx] = false;
                continue;
470
            }
471 472 473 474 475 476

            float aa = nextPoint.x - inextPoint.x;
            float bb = nextPoint.y - inextPoint.y;
            iw00 = cvRound((1.f - aa)*(1.f - bb)*(1 << W_BITS));
            iw01 = cvRound(aa*(1.f - bb)*(1 << W_BITS));
            iw10 = cvRound((1.f - aa)*bb*(1 << W_BITS));
477 478
            iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
            float errval = 0.f;
479

480
            for( y = 0; y < winSize.height; y++ )
481
            {
482
                const uchar* Jptr = (const uchar*)J.data + (y + inextPoint.y)*stepJ + inextPoint.x*cn;
483
                const deriv_type* Iptr = (const deriv_type*)(IWinBuf.data + y*IWinBuf.step);
484

485
                for( x = 0; x < winSize.width*cn; x++ )
486
                {
487
                    int diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 +
488
                                          Jptr[x+stepJ]*iw10 + Jptr[x+stepJ+cn]*iw11,
489 490
                                          W_BITS1-5) - Iptr[x];
                    errval += std::abs((float)diff);
491 492
                }
            }
493
            err[ptidx] = errval * 1.f/(32*winSize.width*cn*winSize.height);
494 495 496
        }
    }
}
497

A
Andrey Kamaev 已提交
498 499 500 501 502 503 504 505 506
int cv::buildOpticalFlowPyramid(InputArray _img, OutputArrayOfArrays pyramid, Size winSize, int maxLevel, bool withDerivatives,
                                int pyrBorder, int derivBorder, bool tryReuseInputImage)
{
    Mat img = _img.getMat();
    CV_Assert(img.depth() == CV_8U && winSize.width > 2 && winSize.height > 2 );
    int pyrstep = withDerivatives ? 2 : 1;

    pyramid.create(1, (maxLevel + 1) * pyrstep, 0 /*type*/, -1, true, 0);

507
    int derivType = CV_MAKETYPE(DataType<cv::detail::deriv_type>::depth, img.channels() * 2);
A
Andrey Kamaev 已提交
508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527

    //level 0
    bool lvl0IsSet = false;
    if(tryReuseInputImage && img.isSubmatrix() && (pyrBorder & BORDER_ISOLATED) == 0)
    {
        Size wholeSize;
        Point ofs;
        img.locateROI(wholeSize, ofs);
        if (ofs.x >= winSize.width && ofs.y >= winSize.height
              && ofs.x + img.cols + winSize.width <= wholeSize.width
              && ofs.y + img.rows + winSize.height <= wholeSize.height)
        {
            pyramid.getMatRef(0) = img;
            lvl0IsSet = true;
        }
    }

    if(!lvl0IsSet)
    {
        Mat& temp = pyramid.getMatRef(0);
528

A
Andrey Kamaev 已提交
529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549
        if(!temp.empty())
            temp.adjustROI(winSize.height, winSize.height, winSize.width, winSize.width);
        if(temp.type() != img.type() || temp.cols != winSize.width*2 + img.cols || temp.rows != winSize.height * 2 + img.rows)
            temp.create(img.rows + winSize.height*2, img.cols + winSize.width*2, img.type());

        if(pyrBorder == BORDER_TRANSPARENT)
            img.copyTo(temp(Rect(winSize.width, winSize.height, img.cols, img.rows)));
        else
            copyMakeBorder(img, temp, winSize.height, winSize.height, winSize.width, winSize.width, pyrBorder);
        temp.adjustROI(-winSize.height, -winSize.height, -winSize.width, -winSize.width);
    }

    Size sz = img.size();
    Mat prevLevel = pyramid.getMatRef(0);
    Mat thisLevel = prevLevel;

    for(int level = 0; level <= maxLevel; ++level)
    {
        if (level != 0)
        {
            Mat& temp = pyramid.getMatRef(level * pyrstep);
550

A
Andrey Kamaev 已提交
551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593
            if(!temp.empty())
                temp.adjustROI(winSize.height, winSize.height, winSize.width, winSize.width);
            if(temp.type() != img.type() || temp.cols != winSize.width*2 + sz.width || temp.rows != winSize.height * 2 + sz.height)
                temp.create(sz.height + winSize.height*2, sz.width + winSize.width*2, img.type());

            thisLevel = temp(Rect(winSize.width, winSize.height, sz.width, sz.height));
            pyrDown(prevLevel, thisLevel, sz);

            if(pyrBorder != BORDER_TRANSPARENT)
                copyMakeBorder(thisLevel, temp, winSize.height, winSize.height, winSize.width, winSize.width, pyrBorder|BORDER_ISOLATED);
            temp.adjustROI(-winSize.height, -winSize.height, -winSize.width, -winSize.width);
        }

        if(withDerivatives)
        {
            Mat& deriv = pyramid.getMatRef(level * pyrstep + 1);

            if(!deriv.empty())
                deriv.adjustROI(winSize.height, winSize.height, winSize.width, winSize.width);
            if(deriv.type() != derivType || deriv.cols != winSize.width*2 + sz.width || deriv.rows != winSize.height * 2 + sz.height)
                deriv.create(sz.height + winSize.height*2, sz.width + winSize.width*2, derivType);

            Mat derivI = deriv(Rect(winSize.width, winSize.height, sz.width, sz.height));
            calcSharrDeriv(thisLevel, derivI);

            if(derivBorder != BORDER_TRANSPARENT)
                copyMakeBorder(derivI, deriv, winSize.height, winSize.height, winSize.width, winSize.width, derivBorder|BORDER_ISOLATED);
            deriv.adjustROI(-winSize.height, -winSize.height, -winSize.width, -winSize.width);
        }

        sz = Size((sz.width+1)/2, (sz.height+1)/2);
        if( sz.width <= winSize.width || sz.height <= winSize.height )
        {
            pyramid.create(1, (level + 1) * pyrstep, 0 /*type*/, -1, true, 0);//check this
            return level;
        }

        prevLevel = thisLevel;
    }

    return maxLevel;
}

594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613
namespace cv
{
    class PyrLKOpticalFlow
    {
        struct dim3
        {
            unsigned int x, y, z;
        };
    public:
        PyrLKOpticalFlow()
        {
            winSize = Size(21, 21);
            maxLevel = 3;
            iters = 30;
            derivLambda = 0.5;
            useInitialFlow = false;
            //minEigThreshold = 1e-4f;
            //getMinEigenVals = false;
        }

614
        bool checkParam()
615
        {
616
            iters = std::min(std::max(iters, 0), 100);
617 618 619 620 621 622

            derivLambda = std::min(std::max(derivLambda, 0.0), 1.0);
            if (derivLambda < 0)
                return false;
            if (maxLevel < 0 || winSize.width <= 2 || winSize.height <= 2)
                return false;
V
vbystricky 已提交
623
            calcPatchSize();
624 625 626 627
            if (patch.x <= 0 || patch.x >= 6 || patch.y <= 0 || patch.y >= 6)
                return false;
            if (!initWaveSize())
                return false;
628 629 630 631 632 633 634
            return true;
        }

        bool sparse(const UMat &prevImg, const UMat &nextImg, const UMat &prevPts, UMat &nextPts, UMat &status, UMat &err)
        {
            if (!checkParam())
                return false;
635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657

            UMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
            UMat temp2 = nextPts.reshape(1);
            multiply(1.0f / (1 << maxLevel) /2.0f, temp1, temp2);

            status.setTo(Scalar::all(1));

            // build the image pyramids.
            std::vector<UMat> prevPyr; prevPyr.resize(maxLevel + 1);
            std::vector<UMat> nextPyr; nextPyr.resize(maxLevel + 1);

            prevImg.convertTo(prevPyr[0], CV_32F);
            nextImg.convertTo(nextPyr[0], CV_32F);

            for (int level = 1; level <= maxLevel; ++level)
            {
                pyrDown(prevPyr[level - 1], prevPyr[level]);
                pyrDown(nextPyr[level - 1], nextPyr[level]);
            }

            // dI/dx ~ Ix, dI/dy ~ Iy
            for (int level = maxLevel; level >= 0; level--)
            {
658 659
                if (!lkSparse_run(prevPyr[level], nextPyr[level], prevPts,
                                  nextPts, status, err,
V
vbystricky 已提交
660
                                  prevPts.cols, level))
661
                    return false;
662 663 664 665 666 667 668 669 670 671 672 673 674
            }
            return true;
        }

        Size winSize;
        int maxLevel;
        int iters;
        double derivLambda;
        bool useInitialFlow;
        //float minEigThreshold;
        //bool getMinEigenVals;

    private:
V
vbystricky 已提交
675 676 677 678 679 680 681 682 683 684 685 686 687 688 689
        int waveSize;
        bool initWaveSize()
        {
            waveSize = 1;
            if (isDeviceCPU())
                return true;

            ocl::Kernel kernel;
            if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, ""))
                return false;
            waveSize = (int)kernel.preferedWorkGroupSizeMultiple();
            return true;
        }
        dim3 patch;
        void calcPatchSize()
690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711
        {
            dim3 block;
            //winSize.width *= cn;

            if (winSize.width > 32 && winSize.width > 2 * winSize.height)
            {
                block.x = 32;
                block.y = 8;
            }
            else
            {
                block.x = 16;
                block.y = 16;
            }

            patch.x = (winSize.width  + block.x - 1) / block.x;
            patch.y = (winSize.height + block.y - 1) / block.y;

            block.z = patch.z = 1;
        }

        bool lkSparse_run(UMat &I, UMat &J, const UMat &prevPts, UMat &nextPts, UMat &status, UMat& err,
V
vbystricky 已提交
712
            int ptcount, int level)
713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758
        {
            size_t localThreads[3]  = { 8, 8};
            size_t globalThreads[3] = { 8 * ptcount, 8};
            char calcErr = (0 == level) ? 1 : 0;

            cv::String build_options;
            if (isDeviceCPU())
                build_options = " -D CPU";
            else
                build_options = cv::format("-D WAVE_SIZE=%d", waveSize);

            ocl::Kernel kernel;
            if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, build_options))
                return false;

            ocl::Image2D imageI(I);
            ocl::Image2D imageJ(J);
            int idxArg = 0;
            idxArg = kernel.set(idxArg, imageI); //image2d_t I
            idxArg = kernel.set(idxArg, imageJ); //image2d_t J
            idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(prevPts)); // __global const float2* prevPts
            idxArg = kernel.set(idxArg, (int)prevPts.step); // int prevPtsStep
            idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(nextPts)); // __global const float2* nextPts
            idxArg = kernel.set(idxArg, (int)nextPts.step); //  int nextPtsStep
            idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(status)); // __global uchar* status
            idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(err)); // __global float* err
            idxArg = kernel.set(idxArg, (int)level); // const int level
            idxArg = kernel.set(idxArg, (int)I.rows); // const int rows
            idxArg = kernel.set(idxArg, (int)I.cols); // const int cols
            idxArg = kernel.set(idxArg, (int)patch.x); // int PATCH_X
            idxArg = kernel.set(idxArg, (int)patch.y); // int PATCH_Y
            idxArg = kernel.set(idxArg, (int)winSize.width); // int c_winSize_x
            idxArg = kernel.set(idxArg, (int)winSize.height); // int c_winSize_y
            idxArg = kernel.set(idxArg, (int)iters); // int c_iters
            idxArg = kernel.set(idxArg, (char)calcErr); //char calcErr

            return kernel.run(2, globalThreads, localThreads, true);
        }
    private:
        inline static bool isDeviceCPU()
        {
            return (cv::ocl::Device::TYPE_CPU == cv::ocl::Device::getDefault().type());
        }
    };


759
    static bool ocl_calcOpticalFlowPyrLK(InputArray _prevImg, InputArray _nextImg,
760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778
                                  InputArray _prevPts, InputOutputArray _nextPts,
                                  OutputArray _status, OutputArray _err,
                                  Size winSize, int maxLevel,
                                  TermCriteria criteria,
                                  int flags/*, double minEigThreshold*/ )
    {
        if (0 != (OPTFLOW_LK_GET_MIN_EIGENVALS & flags))
            return false;
        if (!cv::ocl::Device::getDefault().imageSupport())
            return false;
        if (_nextImg.size() != _prevImg.size())
            return false;
        int typePrev = _prevImg.type();
        int typeNext = _nextImg.type();
        if ((1 != CV_MAT_CN(typePrev)) || (1 != CV_MAT_CN(typeNext)))
            return false;
        if ((0 != CV_MAT_DEPTH(typePrev)) || (0 != CV_MAT_DEPTH(typeNext)))
            return false;

779 780 781 782 783 784 785 786 787
        if (_prevPts.empty() || _prevPts.size().height != 1 || _prevPts.type() != CV_32FC2)
            return false;
        bool useInitialFlow  = (0 != (flags & OPTFLOW_USE_INITIAL_FLOW));
        if (useInitialFlow)
        {
            if (_nextPts.size() != _prevPts.size() || _nextPts.type() != CV_32FC2)
                return false;
        }

788 789 790 791 792
        PyrLKOpticalFlow opticalFlow;
        opticalFlow.winSize     = winSize;
        opticalFlow.maxLevel    = maxLevel;
        opticalFlow.iters       = criteria.maxCount;
        opticalFlow.derivLambda = criteria.epsilon;
793 794 795 796
        opticalFlow.useInitialFlow  = useInitialFlow;

        if (!opticalFlow.checkParam())
            return false;
797 798 799 800

        UMat umatErr;
        if (_err.needed())
        {
801
            _err.create(_prevPts.size(), CV_32FC1);
802 803
            umatErr = _err.getUMat();
        }
804 805
        else
            umatErr.create(_prevPts.size(), CV_32FC1);
806 807 808 809 810 811 812 813 814

        _nextPts.create(_prevPts.size(), _prevPts.type());
        _status.create(_prevPts.size(), CV_8UC1);
        UMat umatNextPts = _nextPts.getUMat();
        UMat umatStatus = _status.getUMat();
        return opticalFlow.sparse(_prevImg.getUMat(), _nextImg.getUMat(), _prevPts.getUMat(), umatNextPts, umatStatus, umatErr);
    }
};

815 816 817 818 819
void cv::calcOpticalFlowPyrLK( InputArray _prevImg, InputArray _nextImg,
                           InputArray _prevPts, InputOutputArray _nextPts,
                           OutputArray _status, OutputArray _err,
                           Size winSize, int maxLevel,
                           TermCriteria criteria,
820
                           int flags, double minEigThreshold )
821
{
822 823 824 825
    bool use_opencl = ocl::useOpenCL() && (_prevImg.isUMat() || _nextImg.isUMat());
    if ( use_opencl && ocl_calcOpticalFlowPyrLK(_prevImg, _nextImg, _prevPts, _nextPts, _status, _err, winSize, maxLevel, criteria, flags/*, minEigThreshold*/))
        return;

A
Andrey Kamaev 已提交
826
    Mat prevPtsMat = _prevPts.getMat();
827
    const int derivDepth = DataType<cv::detail::deriv_type>::depth;
828

829
    CV_Assert( maxLevel >= 0 && winSize.width > 2 && winSize.height > 2 );
830

A
Andrey Kamaev 已提交
831
    int level=0, i, npoints;
832
    CV_Assert( (npoints = prevPtsMat.checkVector(2, CV_32F, true)) >= 0 );
833

834 835 836 837 838 839 840
    if( npoints == 0 )
    {
        _nextPts.release();
        _status.release();
        _err.release();
        return;
    }
841

842 843
    if( !(flags & OPTFLOW_USE_INITIAL_FLOW) )
        _nextPts.create(prevPtsMat.size(), prevPtsMat.type(), -1, true);
844

845 846
    Mat nextPtsMat = _nextPts.getMat();
    CV_Assert( nextPtsMat.checkVector(2, CV_32F, true) == npoints );
847

848 849
    const Point2f* prevPts = (const Point2f*)prevPtsMat.data;
    Point2f* nextPts = (Point2f*)nextPtsMat.data;
850

851 852 853 854 855
    _status.create((int)npoints, 1, CV_8U, -1, true);
    Mat statusMat = _status.getMat(), errMat;
    CV_Assert( statusMat.isContinuous() );
    uchar* status = statusMat.data;
    float* err = 0;
856

857 858
    for( i = 0; i < npoints; i++ )
        status[i] = true;
859

860 861 862 863 864 865 866
    if( _err.needed() )
    {
        _err.create((int)npoints, 1, CV_32F, -1, true);
        errMat = _err.getMat();
        CV_Assert( errMat.isContinuous() );
        err = (float*)errMat.data;
    }
867

868
    std::vector<Mat> prevPyr, nextPyr;
A
Andrey Kamaev 已提交
869 870 871 872 873 874
    int levels1 = -1;
    int lvlStep1 = 1;
    int levels2 = -1;
    int lvlStep2 = 1;

    if(_prevImg.kind() == _InputArray::STD_VECTOR_MAT)
875
    {
A
Andrey Kamaev 已提交
876 877 878 879 880 881
        _prevImg.getMatVector(prevPyr);

        levels1 = int(prevPyr.size()) - 1;
        CV_Assert(levels1 >= 0);

        if (levels1 % 2 == 1 && prevPyr[0].channels() * 2 == prevPyr[1].channels() && prevPyr[1].depth() == derivDepth)
882
        {
A
Andrey Kamaev 已提交
883 884 885 886 887 888 889 890 891 892 893 894 895
            lvlStep1 = 2;
            levels1 /= 2;
        }

        // ensure that pyramid has reqired padding
        if(levels1 > 0)
        {
            Size fullSize;
            Point ofs;
            prevPyr[lvlStep1].locateROI(fullSize, ofs);
            CV_Assert(ofs.x >= winSize.width && ofs.y >= winSize.height
                && ofs.x + prevPyr[lvlStep1].cols + winSize.width <= fullSize.width
                && ofs.y + prevPyr[lvlStep1].rows + winSize.height <= fullSize.height);
896
        }
897 898 899

        if(levels1 < maxLevel)
            maxLevel = levels1;
900
    }
A
Andrey Kamaev 已提交
901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925

    if(_nextImg.kind() == _InputArray::STD_VECTOR_MAT)
    {
        _nextImg.getMatVector(nextPyr);

        levels2 = int(nextPyr.size()) - 1;
        CV_Assert(levels2 >= 0);

        if (levels2 % 2 == 1 && nextPyr[0].channels() * 2 == nextPyr[1].channels() && nextPyr[1].depth() == derivDepth)
        {
            lvlStep2 = 2;
            levels2 /= 2;
        }

        // ensure that pyramid has reqired padding
        if(levels2 > 0)
        {
            Size fullSize;
            Point ofs;
            nextPyr[lvlStep2].locateROI(fullSize, ofs);
            CV_Assert(ofs.x >= winSize.width && ofs.y >= winSize.height
                && ofs.x + nextPyr[lvlStep2].cols + winSize.width <= fullSize.width
                && ofs.y + nextPyr[lvlStep2].rows + winSize.height <= fullSize.height);
        }

926 927 928
        if(levels2 < maxLevel)
            maxLevel = levels2;
    }
A
Andrey Kamaev 已提交
929 930

    if (levels1 < 0)
931
        maxLevel = buildOpticalFlowPyramid(_prevImg, prevPyr, winSize, maxLevel, false);
A
Andrey Kamaev 已提交
932 933

    if (levels2 < 0)
934
        maxLevel = buildOpticalFlowPyramid(_nextImg, nextPyr, winSize, maxLevel, false);
935

936 937 938 939 940 941 942 943 944 945
    if( (criteria.type & TermCriteria::COUNT) == 0 )
        criteria.maxCount = 30;
    else
        criteria.maxCount = std::min(std::max(criteria.maxCount, 0), 100);
    if( (criteria.type & TermCriteria::EPS) == 0 )
        criteria.epsilon = 0.01;
    else
        criteria.epsilon = std::min(std::max(criteria.epsilon, 0.), 10.);
    criteria.epsilon *= criteria.epsilon;

A
Andrey Kamaev 已提交
946 947 948 949 950
    // dI/dx ~ Ix, dI/dy ~ Iy
    Mat derivIBuf;
    if(lvlStep1 == 1)
        derivIBuf.create(prevPyr[0].rows + winSize.height*2, prevPyr[0].cols + winSize.width*2, CV_MAKETYPE(derivDepth, prevPyr[0].channels() * 2));

951 952
    for( level = maxLevel; level >= 0; level-- )
    {
A
Andrey Kamaev 已提交
953 954 955 956 957 958 959 960 961 962 963 964
        Mat derivI;
        if(lvlStep1 == 1)
        {
            Size imgSize = prevPyr[level * lvlStep1].size();
            Mat _derivI( imgSize.height + winSize.height*2,
                imgSize.width + winSize.width*2, derivIBuf.type(), derivIBuf.data );
            derivI = _derivI(Rect(winSize.width, winSize.height, imgSize.width, imgSize.height));
            calcSharrDeriv(prevPyr[level * lvlStep1], derivI);
            copyMakeBorder(derivI, _derivI, winSize.height, winSize.height, winSize.width, winSize.width, BORDER_CONSTANT|BORDER_ISOLATED);
        }
        else
            derivI = prevPyr[level * lvlStep1 + 1];
965

A
Andrey Kamaev 已提交
966 967 968
        CV_Assert(prevPyr[level * lvlStep1].size() == nextPyr[level * lvlStep2].size());
        CV_Assert(prevPyr[level * lvlStep1].type() == nextPyr[level * lvlStep2].type());

969 970 971
#ifdef HAVE_TEGRA_OPTIMIZATION
        typedef tegra::LKTrackerInvoker<cv::detail::LKTrackerInvoker> LKTrackerInvoker;
#else
972
        typedef cv::detail::LKTrackerInvoker LKTrackerInvoker;
973 974
#endif

975 976 977 978 979
        parallel_for_(Range(0, npoints), LKTrackerInvoker(prevPyr[level * lvlStep1], derivI,
                                                          nextPyr[level * lvlStep2], prevPts, nextPts,
                                                          status, err,
                                                          winSize, criteria, level, maxLevel,
                                                          flags, (float)minEigThreshold));
980
    }
981 982
}

983
namespace cv
984 985 986
{

static void
987 988
getRTMatrix( const Point2f* a, const Point2f* b,
             int count, Mat& M, bool fullAffine )
989
{
990
    CV_Assert( M.isContinuous() );
991

992
    if( fullAffine )
993
    {
994 995 996
        double sa[6][6]={{0.}}, sb[6]={0.};
        Mat A( 6, 6, CV_64F, &sa[0][0] ), B( 6, 1, CV_64F, sb );
        Mat MM = M.reshape(1, 6);
997

998
        for( int i = 0; i < count; i++ )
999
        {
1000 1001 1002
            sa[0][0] += a[i].x*a[i].x;
            sa[0][1] += a[i].y*a[i].x;
            sa[0][2] += a[i].x;
1003

1004 1005
            sa[1][1] += a[i].y*a[i].y;
            sa[1][2] += a[i].y;
1006

1007
            sa[2][2] += 1;
1008 1009 1010 1011 1012 1013 1014 1015 1016

            sb[0] += a[i].x*b[i].x;
            sb[1] += a[i].y*b[i].x;
            sb[2] += b[i].x;
            sb[3] += a[i].x*b[i].y;
            sb[4] += a[i].y*b[i].y;
            sb[5] += b[i].y;
        }

1017 1018 1019 1020 1021 1022 1023 1024 1025
        sa[3][4] = sa[4][3] = sa[1][0] = sa[0][1];
        sa[3][5] = sa[5][3] = sa[2][0] = sa[0][2];
        sa[4][5] = sa[5][4] = sa[2][1] = sa[1][2];

        sa[3][3] = sa[0][0];
        sa[4][4] = sa[1][1];
        sa[5][5] = sa[2][2];

        solve( A, B, MM, DECOMP_EIG );
1026 1027 1028
    }
    else
    {
1029 1030 1031
        double sa[4][4]={{0.}}, sb[4]={0.}, m[4];
        Mat A( 4, 4, CV_64F, sa ), B( 4, 1, CV_64F, sb );
        Mat MM( 4, 1, CV_64F, m );
1032

1033 1034 1035 1036 1037
        for( int i = 0; i < count; i++ )
        {
            sa[0][0] += a[i].x*a[i].x + a[i].y*a[i].y;
            sa[0][2] += a[i].x;
            sa[0][3] += a[i].y;
1038 1039


1040 1041 1042 1043 1044 1045
            sa[2][1] += -a[i].y;
            sa[2][2] += 1;

            sa[3][0] += a[i].y;
            sa[3][1] += a[i].x;
            sa[3][3] += 1;
1046 1047 1048 1049 1050 1051 1052

            sb[0] += a[i].x*b[i].x + a[i].y*b[i].y;
            sb[1] += a[i].x*b[i].y - a[i].y*b[i].x;
            sb[2] += b[i].x;
            sb[3] += b[i].y;
        }

1053 1054 1055 1056 1057 1058 1059
        sa[1][1] = sa[0][0];
        sa[2][1] = sa[1][2] = -sa[0][3];
        sa[3][1] = sa[1][3] = sa[2][0] = sa[0][2];
        sa[2][2] = sa[3][3] = count;
        sa[3][0] = sa[0][3];

        solve( A, B, MM, DECOMP_EIG );
1060

1061
        double* om = M.ptr<double>();
1062 1063 1064 1065 1066 1067 1068 1069
        om[0] = om[4] = m[0];
        om[1] = -m[1];
        om[3] = m[1];
        om[2] = m[2];
        om[5] = m[3];
    }
}

1070
}
1071

1072
cv::Mat cv::estimateRigidTransform( InputArray src1, InputArray src2, bool fullAffine )
1073
{
1074 1075
    Mat M(2, 3, CV_64F), A = src1.getMat(), B = src2.getMat();

1076 1077 1078 1079 1080 1081
    const int COUNT = 15;
    const int WIDTH = 160, HEIGHT = 120;
    const int RANSAC_MAX_ITERS = 500;
    const int RANSAC_SIZE0 = 3;
    const double RANSAC_GOOD_RATIO = 0.5;

1082 1083 1084
    std::vector<Point2f> pA, pB;
    std::vector<int> good_idx;
    std::vector<uchar> status;
1085

1086
    double scale = 1.;
1087 1088
    int i, j, k, k1;

1089 1090
    RNG rng((uint64)-1);
    int good_count = 0;
1091

1092
    if( A.size() != B.size() )
1093
        CV_Error( Error::StsUnmatchedSizes, "Both input images must have the same size" );
1094

1095
    if( A.type() != B.type() )
1096
        CV_Error( Error::StsUnmatchedFormats, "Both input images must have the same data type" );
1097

1098 1099 1100
    int count = A.checkVector(2);

    if( count > 0 )
1101
    {
1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112
        A.reshape(2, count).convertTo(pA, CV_32F);
        B.reshape(2, count).convertTo(pB, CV_32F);
    }
    else if( A.depth() == CV_8U )
    {
        int cn = A.channels();
        CV_Assert( cn == 1 || cn == 3 || cn == 4 );
        Size sz0 = A.size();
        Size sz1(WIDTH, HEIGHT);

        scale = std::max(1., std::max( (double)sz1.width/sz0.width, (double)sz1.height/sz0.height ));
1113 1114 1115 1116

        sz1.width = cvRound( sz0.width * scale );
        sz1.height = cvRound( sz0.height * scale );

1117
        bool equalSizes = sz1.width == sz0.width && sz1.height == sz0.height;
1118

1119
        if( !equalSizes || cn != 1 )
1120
        {
1121
            Mat sA, sB;
1122 1123 1124

            if( cn != 1 )
            {
1125 1126 1127 1128 1129
                Mat gray;
                cvtColor(A, gray, COLOR_BGR2GRAY);
                resize(gray, sA, sz1, 0., 0., INTER_AREA);
                cvtColor(B, gray, COLOR_BGR2GRAY);
                resize(gray, sB, sz1, 0., 0., INTER_AREA);
1130 1131 1132
            }
            else
            {
1133 1134
                resize(A, sA, sz1, 0., 0., INTER_AREA);
                resize(B, sB, sz1, 0., 0., INTER_AREA);
1135
            }
1136

1137 1138 1139 1140
            A = sA;
            B = sB;
        }

1141 1142
        int count_y = COUNT;
        int count_x = cvRound((double)COUNT*sz1.width/sz1.height);
1143 1144
        count = count_x * count_y;

1145 1146 1147
        pA.resize(count);
        pB.resize(count);
        status.resize(count);
1148 1149 1150 1151 1152 1153 1154 1155 1156

        for( i = 0, k = 0; i < count_y; i++ )
            for( j = 0; j < count_x; j++, k++ )
            {
                pA[k].x = (j+0.5f)*sz1.width/count_x;
                pA[k].y = (i+0.5f)*sz1.height/count_y;
            }

        // find the corresponding points in B
1157 1158
        calcOpticalFlowPyrLK(A, B, pA, pB, status, noArray(), Size(21, 21), 3,
                             TermCriteria(TermCriteria::MAX_ITER,40,0.1));
1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171

        // repack the remained points
        for( i = 0, k = 0; i < count; i++ )
            if( status[i] )
            {
                if( i > k )
                {
                    pA[k] = pA[i];
                    pB[k] = pB[i];
                }
                k++;
            }
        count = k;
1172 1173
        pA.resize(count);
        pB.resize(count);
1174 1175
    }
    else
1176
        CV_Error( Error::StsUnsupportedFormat, "Both input images must have either 8uC1 or 8uC3 type" );
1177

1178
    good_idx.resize(count);
1179 1180

    if( count < RANSAC_SIZE0 )
1181
        return Mat();
1182

1183
    Rect brect = boundingRect(pB);
1184 1185 1186 1187 1188 1189

    // RANSAC stuff:
    // 1. find the consensus
    for( k = 0; k < RANSAC_MAX_ITERS; k++ )
    {
        int idx[RANSAC_SIZE0];
1190 1191
        Point2f a[RANSAC_SIZE0];
        Point2f b[RANSAC_SIZE0];
1192 1193 1194 1195 1196 1197

        // choose random 3 non-complanar points from A & B
        for( i = 0; i < RANSAC_SIZE0; i++ )
        {
            for( k1 = 0; k1 < RANSAC_MAX_ITERS; k1++ )
            {
1198
                idx[i] = rng.uniform(0, count);
1199

1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225
                for( j = 0; j < i; j++ )
                {
                    if( idx[j] == idx[i] )
                        break;
                    // check that the points are not very close one each other
                    if( fabs(pA[idx[i]].x - pA[idx[j]].x) +
                        fabs(pA[idx[i]].y - pA[idx[j]].y) < FLT_EPSILON )
                        break;
                    if( fabs(pB[idx[i]].x - pB[idx[j]].x) +
                        fabs(pB[idx[i]].y - pB[idx[j]].y) < FLT_EPSILON )
                        break;
                }

                if( j < i )
                    continue;

                if( i+1 == RANSAC_SIZE0 )
                {
                    // additional check for non-complanar vectors
                    a[0] = pA[idx[0]];
                    a[1] = pA[idx[1]];
                    a[2] = pA[idx[2]];

                    b[0] = pB[idx[0]];
                    b[1] = pB[idx[1]];
                    b[2] = pB[idx[2]];
1226

1227 1228
                    double dax1 = a[1].x - a[0].x, day1 = a[1].y - a[0].y;
                    double dax2 = a[2].x - a[0].x, day2 = a[2].y - a[0].y;
1229
                    double dbx1 = b[1].x - b[0].x, dby1 = b[1].y - b[0].y;
1230 1231 1232
                    double dbx2 = b[2].x - b[0].x, dby2 = b[2].y - b[0].y;
                    const double eps = 0.01;

1233 1234
                    if( fabs(dax1*day2 - day1*dax2) < eps*std::sqrt(dax1*dax1+day1*day1)*std::sqrt(dax2*dax2+day2*day2) ||
                        fabs(dbx1*dby2 - dby1*dbx2) < eps*std::sqrt(dbx1*dbx1+dby1*dby1)*std::sqrt(dbx2*dbx2+dby2*dby2) )
1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247
                        continue;
                }
                break;
            }

            if( k1 >= RANSAC_MAX_ITERS )
                break;
        }

        if( i < RANSAC_SIZE0 )
            continue;

        // estimate the transformation using 3 points
1248
        getRTMatrix( a, b, 3, M, fullAffine );
1249

1250
        const double* m = M.ptr<double>();
1251 1252
        for( i = 0, good_count = 0; i < count; i++ )
        {
1253 1254
            if( std::abs( m[0]*pA[i].x + m[1]*pA[i].y + m[2] - pB[i].x ) +
                std::abs( m[3]*pA[i].x + m[4]*pA[i].y + m[5] - pB[i].y ) < std::max(brect.width,brect.height)*0.05 )
1255 1256 1257 1258 1259 1260 1261 1262
                good_idx[good_count++] = i;
        }

        if( good_count >= count*RANSAC_GOOD_RATIO )
            break;
    }

    if( k >= RANSAC_MAX_ITERS )
1263
        return Mat();
1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274

    if( good_count < count )
    {
        for( i = 0; i < good_count; i++ )
        {
            j = good_idx[i];
            pA[i] = pA[j];
            pB[i] = pB[j];
        }
    }

1275 1276 1277
    getRTMatrix( &pA[0], &pB[0], good_count, M, fullAffine );
    M.at<double>(0, 2) /= scale;
    M.at<double>(1, 2) /= scale;
1278

1279
    return M;
1280 1281 1282
}

/* End of file. */