lkpyramid.cpp 48.0 KB
Newer Older
1 2 3 4 5 6 7 8 9
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
10
//                           License Agreement
11 12 13
//                For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
14
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
15 16 17 18 19 20 21 22 23 24 25 26
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
27
//   * The name of the copyright holders may not be used to endorse or promote products
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <float.h>
#include <stdio.h>
45
#include "lkpyramid.hpp"
46
#include "opencl_kernels.hpp"
47

48 49
#define  CV_DESCALE(x,n)     (((x) + (1 << ((n)-1))) >> (n))

50
namespace
51
{
52
static void calcSharrDeriv(const cv::Mat& src, cv::Mat& dst)
53
{
54 55
    using namespace cv;
    using cv::detail::deriv_type;
56 57 58
    int rows = src.rows, cols = src.cols, cn = src.channels(), colsn = cols*cn, depth = src.depth();
    CV_Assert(depth == CV_8U);
    dst.create(rows, cols, CV_MAKETYPE(DataType<deriv_type>::depth, cn*2));
59

60
#ifdef HAVE_TEGRA_OPTIMIZATION
61 62
    if (tegra::calcSharrDeriv(src, dst))
        return;
63 64
#endif

65 66 67
    int x, y, delta = (int)alignSize((cols + 2)*cn, 16);
    AutoBuffer<deriv_type> _tempBuf(delta*2 + 64);
    deriv_type *trow0 = alignPtr(_tempBuf + cn, 16), *trow1 = alignPtr(trow0 + delta, 16);
68

69 70 71
#if CV_SSE2
    __m128i z = _mm_setzero_si128(), c3 = _mm_set1_epi16(3), c10 = _mm_set1_epi16(10);
#endif
72

73
    for( y = 0; y < rows; y++ )
74
    {
75 76 77 78
        const uchar* srow0 = src.ptr<uchar>(y > 0 ? y-1 : rows > 1 ? 1 : 0);
        const uchar* srow1 = src.ptr<uchar>(y);
        const uchar* srow2 = src.ptr<uchar>(y < rows-1 ? y+1 : rows > 1 ? rows-2 : 0);
        deriv_type* drow = dst.ptr<deriv_type>(y);
79

80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
        // do vertical convolution
        x = 0;
#if CV_SSE2
        for( ; x <= colsn - 8; x += 8 )
        {
            __m128i s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(srow0 + x)), z);
            __m128i s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(srow1 + x)), z);
            __m128i s2 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(srow2 + x)), z);
            __m128i t0 = _mm_add_epi16(_mm_mullo_epi16(_mm_add_epi16(s0, s2), c3), _mm_mullo_epi16(s1, c10));
            __m128i t1 = _mm_sub_epi16(s2, s0);
            _mm_store_si128((__m128i*)(trow0 + x), t0);
            _mm_store_si128((__m128i*)(trow1 + x), t1);
        }
#endif
        for( ; x < colsn; x++ )
        {
            int t0 = (srow0[x] + srow2[x])*3 + srow1[x]*10;
            int t1 = srow2[x] - srow0[x];
            trow0[x] = (deriv_type)t0;
            trow1[x] = (deriv_type)t1;
        }
101

102 103 104 105 106 107 108
        // make border
        int x0 = (cols > 1 ? 1 : 0)*cn, x1 = (cols > 1 ? cols-2 : 0)*cn;
        for( int k = 0; k < cn; k++ )
        {
            trow0[-cn + k] = trow0[x0 + k]; trow0[colsn + k] = trow0[x1 + k];
            trow1[-cn + k] = trow1[x0 + k]; trow1[colsn + k] = trow1[x1 + k];
        }
109

110 111 112 113 114 115 116 117 118 119
        // do horizontal convolution, interleave the results and store them to dst
        x = 0;
#if CV_SSE2
        for( ; x <= colsn - 8; x += 8 )
        {
            __m128i s0 = _mm_loadu_si128((const __m128i*)(trow0 + x - cn));
            __m128i s1 = _mm_loadu_si128((const __m128i*)(trow0 + x + cn));
            __m128i s2 = _mm_loadu_si128((const __m128i*)(trow1 + x - cn));
            __m128i s3 = _mm_load_si128((const __m128i*)(trow1 + x));
            __m128i s4 = _mm_loadu_si128((const __m128i*)(trow1 + x + cn));
120

121 122 123 124 125 126 127 128
            __m128i t0 = _mm_sub_epi16(s1, s0);
            __m128i t1 = _mm_add_epi16(_mm_mullo_epi16(_mm_add_epi16(s2, s4), c3), _mm_mullo_epi16(s3, c10));
            __m128i t2 = _mm_unpacklo_epi16(t0, t1);
            t0 = _mm_unpackhi_epi16(t0, t1);
            // this can probably be replaced with aligned stores if we aligned dst properly.
            _mm_storeu_si128((__m128i*)(drow + x*2), t2);
            _mm_storeu_si128((__m128i*)(drow + x*2 + 8), t0);
        }
129
#endif
130 131 132 133 134 135
        for( ; x < colsn; x++ )
        {
            deriv_type t0 = (deriv_type)(trow0[x+cn] - trow0[x-cn]);
            deriv_type t1 = (deriv_type)((trow1[x+cn] + trow1[x-cn])*3 + trow1[x]*10);
            drow[x*2] = t0; drow[x*2+1] = t1;
        }
136
    }
137
}
138

139
}//namespace
140

141 142
cv::detail::LKTrackerInvoker::LKTrackerInvoker(
                      const Mat& _prevImg, const Mat& _prevDeriv, const Mat& _nextImg,
143 144 145
                      const Point2f* _prevPts, Point2f* _nextPts,
                      uchar* _status, float* _err,
                      Size _winSize, TermCriteria _criteria,
146
                      int _level, int _maxLevel, int _flags, float _minEigThreshold )
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
{
    prevImg = &_prevImg;
    prevDeriv = &_prevDeriv;
    nextImg = &_nextImg;
    prevPts = _prevPts;
    nextPts = _nextPts;
    status = _status;
    err = _err;
    winSize = _winSize;
    criteria = _criteria;
    level = _level;
    maxLevel = _maxLevel;
    flags = _flags;
    minEigThreshold = _minEigThreshold;
}

163 164 165 166 167 168 169 170
#if defined __arm__ && !CV_NEON
typedef int64 acctype;
typedef int itemtype;
#else
typedef float acctype;
typedef float itemtype;
#endif

171
void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
172 173 174 175 176
{
    Point2f halfWin((winSize.width-1)*0.5f, (winSize.height-1)*0.5f);
    const Mat& I = *prevImg;
    const Mat& J = *nextImg;
    const Mat& derivI = *prevDeriv;
177

178 179 180
    int j, cn = I.channels(), cn2 = cn*2;
    cv::AutoBuffer<deriv_type> _buf(winSize.area()*(cn + cn2));
    int derivDepth = DataType<deriv_type>::depth;
181

182 183
    Mat IWinBuf(winSize, CV_MAKETYPE(derivDepth, cn), (deriv_type*)_buf);
    Mat derivIWinBuf(winSize, CV_MAKETYPE(derivDepth, cn2), (deriv_type*)_buf + winSize.area()*cn);
184

185
    for( int ptidx = range.start; ptidx < range.end; ptidx++ )
186
    {
187 188 189 190 191 192 193 194 195 196 197 198
        Point2f prevPt = prevPts[ptidx]*(float)(1./(1 << level));
        Point2f nextPt;
        if( level == maxLevel )
        {
            if( flags & OPTFLOW_USE_INITIAL_FLOW )
                nextPt = nextPts[ptidx]*(float)(1./(1 << level));
            else
                nextPt = prevPt;
        }
        else
            nextPt = nextPts[ptidx]*2.f;
        nextPts[ptidx] = nextPt;
199

200 201 202 203
        Point2i iprevPt, inextPt;
        prevPt -= halfWin;
        iprevPt.x = cvFloor(prevPt.x);
        iprevPt.y = cvFloor(prevPt.y);
204

205 206
        if( iprevPt.x < -winSize.width || iprevPt.x >= derivI.cols ||
            iprevPt.y < -winSize.height || iprevPt.y >= derivI.rows )
207
        {
208
            if( level == 0 )
209
            {
210 211 212 213
                if( status )
                    status[ptidx] = false;
                if( err )
                    err[ptidx] = 0;
214
            }
215 216
            continue;
        }
217

218 219 220 221 222 223 224 225
        float a = prevPt.x - iprevPt.x;
        float b = prevPt.y - iprevPt.y;
        const int W_BITS = 14, W_BITS1 = 14;
        const float FLT_SCALE = 1.f/(1 << 20);
        int iw00 = cvRound((1.f - a)*(1.f - b)*(1 << W_BITS));
        int iw01 = cvRound(a*(1.f - b)*(1 << W_BITS));
        int iw10 = cvRound((1.f - a)*b*(1 << W_BITS));
        int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
226

227
        int dstep = (int)(derivI.step/derivI.elemSize1());
228 229
        int stepI = (int)(I.step/I.elemSize1());
        int stepJ = (int)(J.step/J.elemSize1());
230 231
        acctype iA11 = 0, iA12 = 0, iA22 = 0;
        float A11, A12, A22;
232

233 234 235 236 237 238 239 240
#if CV_SSE2
        __m128i qw0 = _mm_set1_epi32(iw00 + (iw01 << 16));
        __m128i qw1 = _mm_set1_epi32(iw10 + (iw11 << 16));
        __m128i z = _mm_setzero_si128();
        __m128i qdelta_d = _mm_set1_epi32(1 << (W_BITS1-1));
        __m128i qdelta = _mm_set1_epi32(1 << (W_BITS1-5-1));
        __m128 qA11 = _mm_setzero_ps(), qA12 = _mm_setzero_ps(), qA22 = _mm_setzero_ps();
#endif
241

242 243 244 245
        // extract the patch from the first image, compute covariation matrix of derivatives
        int x, y;
        for( y = 0; y < winSize.height; y++ )
        {
246
            const uchar* src = (const uchar*)I.data + (y + iprevPt.y)*stepI + iprevPt.x*cn;
247
            const deriv_type* dsrc = (const deriv_type*)derivI.data + (y + iprevPt.y)*dstep + iprevPt.x*cn2;
248

249 250
            deriv_type* Iptr = (deriv_type*)(IWinBuf.data + y*IWinBuf.step);
            deriv_type* dIptr = (deriv_type*)(derivIWinBuf.data + y*derivIWinBuf.step);
251

252
            x = 0;
253

254 255
#if CV_SSE2
            for( ; x <= winSize.width*cn - 4; x += 4, dsrc += 4*2, dIptr += 4*2 )
256
            {
257
                __m128i v00, v01, v10, v11, t0, t1;
258

259 260
                v00 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x)), z);
                v01 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x + cn)), z);
261 262
                v10 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x + stepI)), z);
                v11 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x + stepI + cn)), z);
263

264 265 266 267
                t0 = _mm_add_epi32(_mm_madd_epi16(_mm_unpacklo_epi16(v00, v01), qw0),
                                   _mm_madd_epi16(_mm_unpacklo_epi16(v10, v11), qw1));
                t0 = _mm_srai_epi32(_mm_add_epi32(t0, qdelta), W_BITS1-5);
                _mm_storel_epi64((__m128i*)(Iptr + x), _mm_packs_epi32(t0,t0));
268

269 270 271 272
                v00 = _mm_loadu_si128((const __m128i*)(dsrc));
                v01 = _mm_loadu_si128((const __m128i*)(dsrc + cn2));
                v10 = _mm_loadu_si128((const __m128i*)(dsrc + dstep));
                v11 = _mm_loadu_si128((const __m128i*)(dsrc + dstep + cn2));
273

274 275 276 277 278 279 280
                t0 = _mm_add_epi32(_mm_madd_epi16(_mm_unpacklo_epi16(v00, v01), qw0),
                                   _mm_madd_epi16(_mm_unpacklo_epi16(v10, v11), qw1));
                t1 = _mm_add_epi32(_mm_madd_epi16(_mm_unpackhi_epi16(v00, v01), qw0),
                                   _mm_madd_epi16(_mm_unpackhi_epi16(v10, v11), qw1));
                t0 = _mm_srai_epi32(_mm_add_epi32(t0, qdelta_d), W_BITS1);
                t1 = _mm_srai_epi32(_mm_add_epi32(t1, qdelta_d), W_BITS1);
                v00 = _mm_packs_epi32(t0, t1); // Ix0 Iy0 Ix1 Iy1 ...
281

282 283 284
                _mm_storeu_si128((__m128i*)dIptr, v00);
                t0 = _mm_srai_epi32(v00, 16); // Iy0 Iy1 Iy2 Iy3
                t1 = _mm_srai_epi32(_mm_slli_epi32(v00, 16), 16); // Ix0 Ix1 Ix2 Ix3
285

286 287
                __m128 fy = _mm_cvtepi32_ps(t0);
                __m128 fx = _mm_cvtepi32_ps(t1);
288

289 290 291
                qA22 = _mm_add_ps(qA22, _mm_mul_ps(fy, fy));
                qA12 = _mm_add_ps(qA12, _mm_mul_ps(fx, fy));
                qA11 = _mm_add_ps(qA11, _mm_mul_ps(fx, fx));
292
            }
293
#endif
294

295 296 297
            for( ; x < winSize.width*cn; x++, dsrc += 2, dIptr += 2 )
            {
                int ival = CV_DESCALE(src[x]*iw00 + src[x+cn]*iw01 +
298
                                      src[x+stepI]*iw10 + src[x+stepI+cn]*iw11, W_BITS1-5);
299 300 301 302
                int ixval = CV_DESCALE(dsrc[0]*iw00 + dsrc[cn2]*iw01 +
                                       dsrc[dstep]*iw10 + dsrc[dstep+cn2]*iw11, W_BITS1);
                int iyval = CV_DESCALE(dsrc[1]*iw00 + dsrc[cn2+1]*iw01 + dsrc[dstep+1]*iw10 +
                                       dsrc[dstep+cn2+1]*iw11, W_BITS1);
303

304 305 306
                Iptr[x] = (short)ival;
                dIptr[0] = (short)ixval;
                dIptr[1] = (short)iyval;
307

308 309 310
                iA11 += (itemtype)(ixval*ixval);
                iA12 += (itemtype)(ixval*iyval);
                iA22 += (itemtype)(iyval*iyval);
311 312
            }
        }
313

314 315 316 317 318
#if CV_SSE2
        float CV_DECL_ALIGNED(16) A11buf[4], A12buf[4], A22buf[4];
        _mm_store_ps(A11buf, qA11);
        _mm_store_ps(A12buf, qA12);
        _mm_store_ps(A22buf, qA22);
319 320 321
        iA11 += A11buf[0] + A11buf[1] + A11buf[2] + A11buf[3];
        iA12 += A12buf[0] + A12buf[1] + A12buf[2] + A12buf[3];
        iA22 += A22buf[0] + A22buf[1] + A22buf[2] + A22buf[3];
322
#endif
323

324 325 326
        A11 = iA11*FLT_SCALE;
        A12 = iA12*FLT_SCALE;
        A22 = iA22*FLT_SCALE;
327

328 329 330
        float D = A11*A22 - A12*A12;
        float minEig = (A22 + A11 - std::sqrt((A11-A22)*(A11-A22) +
                        4.f*A12*A12))/(2*winSize.width*winSize.height);
331

332
        if( err && (flags & OPTFLOW_LK_GET_MIN_EIGENVALS) != 0 )
333
            err[ptidx] = (float)minEig;
334

335 336 337 338 339 340
        if( minEig < minEigThreshold || D < FLT_EPSILON )
        {
            if( level == 0 && status )
                status[ptidx] = false;
            continue;
        }
341

342
        D = 1.f/D;
343

344 345
        nextPt -= halfWin;
        Point2f prevDelta;
346

347 348 349 350
        for( j = 0; j < criteria.maxCount; j++ )
        {
            inextPt.x = cvFloor(nextPt.x);
            inextPt.y = cvFloor(nextPt.y);
351

352 353 354 355 356 357 358
            if( inextPt.x < -winSize.width || inextPt.x >= J.cols ||
               inextPt.y < -winSize.height || inextPt.y >= J.rows )
            {
                if( level == 0 && status )
                    status[ptidx] = false;
                break;
            }
359

360 361 362 363 364 365
            a = nextPt.x - inextPt.x;
            b = nextPt.y - inextPt.y;
            iw00 = cvRound((1.f - a)*(1.f - b)*(1 << W_BITS));
            iw01 = cvRound(a*(1.f - b)*(1 << W_BITS));
            iw10 = cvRound((1.f - a)*b*(1 << W_BITS));
            iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
366 367
            acctype ib1 = 0, ib2 = 0;
            float b1, b2;
368
#if CV_SSE2
369 370 371
            qw0 = _mm_set1_epi32(iw00 + (iw01 << 16));
            qw1 = _mm_set1_epi32(iw10 + (iw11 << 16));
            __m128 qb0 = _mm_setzero_ps(), qb1 = _mm_setzero_ps();
372
#endif
373

374 375
            for( y = 0; y < winSize.height; y++ )
            {
376
                const uchar* Jptr = (const uchar*)J.data + (y + inextPt.y)*stepJ + inextPt.x*cn;
377 378
                const deriv_type* Iptr = (const deriv_type*)(IWinBuf.data + y*IWinBuf.step);
                const deriv_type* dIptr = (const deriv_type*)(derivIWinBuf.data + y*derivIWinBuf.step);
379

380
                x = 0;
381

382
#if CV_SSE2
383
                for( ; x <= winSize.width*cn - 8; x += 8, dIptr += 8*2 )
384
                {
385 386 387
                    __m128i diff0 = _mm_loadu_si128((const __m128i*)(Iptr + x)), diff1;
                    __m128i v00 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x)), z);
                    __m128i v01 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x + cn)), z);
388 389
                    __m128i v10 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x + stepJ)), z);
                    __m128i v11 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x + stepJ + cn)), z);
390

391 392 393 394
                    __m128i t0 = _mm_add_epi32(_mm_madd_epi16(_mm_unpacklo_epi16(v00, v01), qw0),
                                               _mm_madd_epi16(_mm_unpacklo_epi16(v10, v11), qw1));
                    __m128i t1 = _mm_add_epi32(_mm_madd_epi16(_mm_unpackhi_epi16(v00, v01), qw0),
                                               _mm_madd_epi16(_mm_unpackhi_epi16(v10, v11), qw1));
395
                    t0 = _mm_srai_epi32(_mm_add_epi32(t0, qdelta), W_BITS1-5);
396 397 398 399
                    t1 = _mm_srai_epi32(_mm_add_epi32(t1, qdelta), W_BITS1-5);
                    diff0 = _mm_subs_epi16(_mm_packs_epi32(t0, t1), diff0);
                    diff1 = _mm_unpackhi_epi16(diff0, diff0);
                    diff0 = _mm_unpacklo_epi16(diff0, diff0); // It0 It0 It1 It1 ...
400
                    v00 = _mm_loadu_si128((const __m128i*)(dIptr)); // Ix0 Iy0 Ix1 Iy1 ...
401 402 403 404 405 406 407 408 409 410 411 412 413
                    v01 = _mm_loadu_si128((const __m128i*)(dIptr + 8));
                    v10 = _mm_mullo_epi16(v00, diff0);
                    v11 = _mm_mulhi_epi16(v00, diff0);
                    v00 = _mm_unpacklo_epi16(v10, v11);
                    v10 = _mm_unpackhi_epi16(v10, v11);
                    qb0 = _mm_add_ps(qb0, _mm_cvtepi32_ps(v00));
                    qb1 = _mm_add_ps(qb1, _mm_cvtepi32_ps(v10));
                    v10 = _mm_mullo_epi16(v01, diff1);
                    v11 = _mm_mulhi_epi16(v01, diff1);
                    v00 = _mm_unpacklo_epi16(v10, v11);
                    v10 = _mm_unpackhi_epi16(v10, v11);
                    qb0 = _mm_add_ps(qb0, _mm_cvtepi32_ps(v00));
                    qb1 = _mm_add_ps(qb1, _mm_cvtepi32_ps(v10));
414 415
                }
#endif
416

417
                for( ; x < winSize.width*cn; x++, dIptr += 2 )
418
                {
419
                    int diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 +
420
                                          Jptr[x+stepJ]*iw10 + Jptr[x+stepJ+cn]*iw11,
421
                                          W_BITS1-5) - Iptr[x];
422 423
                    ib1 += (itemtype)(diff*dIptr[0]);
                    ib2 += (itemtype)(diff*dIptr[1]);
424 425
                }
            }
426

427
#if CV_SSE2
428 429
            float CV_DECL_ALIGNED(16) bbuf[4];
            _mm_store_ps(bbuf, _mm_add_ps(qb0, qb1));
430 431
            ib1 += bbuf[0] + bbuf[2];
            ib2 += bbuf[1] + bbuf[3];
432
#endif
433

434 435
            b1 = ib1*FLT_SCALE;
            b2 = ib2*FLT_SCALE;
436

437 438 439
            Point2f delta( (float)((A12*b2 - A22*b1) * D),
                          (float)((A12*b1 - A11*b2) * D));
            //delta = -delta;
440

441 442
            nextPt += delta;
            nextPts[ptidx] = nextPt + halfWin;
443

444 445
            if( delta.ddot(delta) <= criteria.epsilon )
                break;
446

447 448
            if( j > 0 && std::abs(delta.x + prevDelta.x) < 0.01 &&
               std::abs(delta.y + prevDelta.y) < 0.01 )
449
            {
450 451
                nextPts[ptidx] -= delta*0.5f;
                break;
452
            }
453 454
            prevDelta = delta;
        }
455

456
        if( status[ptidx] && err && level == 0 && (flags & OPTFLOW_LK_GET_MIN_EIGENVALS) == 0 )
457
        {
458 459 460 461 462 463 464 465
            Point2f nextPoint = nextPts[ptidx] - halfWin;
            Point inextPoint;

            inextPoint.x = cvFloor(nextPoint.x);
            inextPoint.y = cvFloor(nextPoint.y);

            if( inextPoint.x < -winSize.width || inextPoint.x >= J.cols ||
                inextPoint.y < -winSize.height || inextPoint.y >= J.rows )
466
            {
467 468 469
                if( status )
                    status[ptidx] = false;
                continue;
470
            }
471 472 473 474 475 476

            float aa = nextPoint.x - inextPoint.x;
            float bb = nextPoint.y - inextPoint.y;
            iw00 = cvRound((1.f - aa)*(1.f - bb)*(1 << W_BITS));
            iw01 = cvRound(aa*(1.f - bb)*(1 << W_BITS));
            iw10 = cvRound((1.f - aa)*bb*(1 << W_BITS));
477 478
            iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
            float errval = 0.f;
479

480
            for( y = 0; y < winSize.height; y++ )
481
            {
482
                const uchar* Jptr = (const uchar*)J.data + (y + inextPoint.y)*stepJ + inextPoint.x*cn;
483
                const deriv_type* Iptr = (const deriv_type*)(IWinBuf.data + y*IWinBuf.step);
484

485
                for( x = 0; x < winSize.width*cn; x++ )
486
                {
487
                    int diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 +
488
                                          Jptr[x+stepJ]*iw10 + Jptr[x+stepJ+cn]*iw11,
489 490
                                          W_BITS1-5) - Iptr[x];
                    errval += std::abs((float)diff);
491 492
                }
            }
493
            err[ptidx] = errval * 1.f/(32*winSize.width*cn*winSize.height);
494 495 496
        }
    }
}
497

A
Andrey Kamaev 已提交
498 499 500 501 502 503 504 505 506
int cv::buildOpticalFlowPyramid(InputArray _img, OutputArrayOfArrays pyramid, Size winSize, int maxLevel, bool withDerivatives,
                                int pyrBorder, int derivBorder, bool tryReuseInputImage)
{
    Mat img = _img.getMat();
    CV_Assert(img.depth() == CV_8U && winSize.width > 2 && winSize.height > 2 );
    int pyrstep = withDerivatives ? 2 : 1;

    pyramid.create(1, (maxLevel + 1) * pyrstep, 0 /*type*/, -1, true, 0);

507
    int derivType = CV_MAKETYPE(DataType<cv::detail::deriv_type>::depth, img.channels() * 2);
A
Andrey Kamaev 已提交
508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527

    //level 0
    bool lvl0IsSet = false;
    if(tryReuseInputImage && img.isSubmatrix() && (pyrBorder & BORDER_ISOLATED) == 0)
    {
        Size wholeSize;
        Point ofs;
        img.locateROI(wholeSize, ofs);
        if (ofs.x >= winSize.width && ofs.y >= winSize.height
              && ofs.x + img.cols + winSize.width <= wholeSize.width
              && ofs.y + img.rows + winSize.height <= wholeSize.height)
        {
            pyramid.getMatRef(0) = img;
            lvl0IsSet = true;
        }
    }

    if(!lvl0IsSet)
    {
        Mat& temp = pyramid.getMatRef(0);
528

A
Andrey Kamaev 已提交
529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549
        if(!temp.empty())
            temp.adjustROI(winSize.height, winSize.height, winSize.width, winSize.width);
        if(temp.type() != img.type() || temp.cols != winSize.width*2 + img.cols || temp.rows != winSize.height * 2 + img.rows)
            temp.create(img.rows + winSize.height*2, img.cols + winSize.width*2, img.type());

        if(pyrBorder == BORDER_TRANSPARENT)
            img.copyTo(temp(Rect(winSize.width, winSize.height, img.cols, img.rows)));
        else
            copyMakeBorder(img, temp, winSize.height, winSize.height, winSize.width, winSize.width, pyrBorder);
        temp.adjustROI(-winSize.height, -winSize.height, -winSize.width, -winSize.width);
    }

    Size sz = img.size();
    Mat prevLevel = pyramid.getMatRef(0);
    Mat thisLevel = prevLevel;

    for(int level = 0; level <= maxLevel; ++level)
    {
        if (level != 0)
        {
            Mat& temp = pyramid.getMatRef(level * pyrstep);
550

A
Andrey Kamaev 已提交
551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593
            if(!temp.empty())
                temp.adjustROI(winSize.height, winSize.height, winSize.width, winSize.width);
            if(temp.type() != img.type() || temp.cols != winSize.width*2 + sz.width || temp.rows != winSize.height * 2 + sz.height)
                temp.create(sz.height + winSize.height*2, sz.width + winSize.width*2, img.type());

            thisLevel = temp(Rect(winSize.width, winSize.height, sz.width, sz.height));
            pyrDown(prevLevel, thisLevel, sz);

            if(pyrBorder != BORDER_TRANSPARENT)
                copyMakeBorder(thisLevel, temp, winSize.height, winSize.height, winSize.width, winSize.width, pyrBorder|BORDER_ISOLATED);
            temp.adjustROI(-winSize.height, -winSize.height, -winSize.width, -winSize.width);
        }

        if(withDerivatives)
        {
            Mat& deriv = pyramid.getMatRef(level * pyrstep + 1);

            if(!deriv.empty())
                deriv.adjustROI(winSize.height, winSize.height, winSize.width, winSize.width);
            if(deriv.type() != derivType || deriv.cols != winSize.width*2 + sz.width || deriv.rows != winSize.height * 2 + sz.height)
                deriv.create(sz.height + winSize.height*2, sz.width + winSize.width*2, derivType);

            Mat derivI = deriv(Rect(winSize.width, winSize.height, sz.width, sz.height));
            calcSharrDeriv(thisLevel, derivI);

            if(derivBorder != BORDER_TRANSPARENT)
                copyMakeBorder(derivI, deriv, winSize.height, winSize.height, winSize.width, winSize.width, derivBorder|BORDER_ISOLATED);
            deriv.adjustROI(-winSize.height, -winSize.height, -winSize.width, -winSize.width);
        }

        sz = Size((sz.width+1)/2, (sz.height+1)/2);
        if( sz.width <= winSize.width || sz.height <= winSize.height )
        {
            pyramid.create(1, (level + 1) * pyrstep, 0 /*type*/, -1, true, 0);//check this
            return level;
        }

        prevLevel = thisLevel;
    }

    return maxLevel;
}

594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613
namespace cv
{
    class PyrLKOpticalFlow
    {
        struct dim3
        {
            unsigned int x, y, z;
        };
    public:
        PyrLKOpticalFlow()
        {
            winSize = Size(21, 21);
            maxLevel = 3;
            iters = 30;
            derivLambda = 0.5;
            useInitialFlow = false;
            //minEigThreshold = 1e-4f;
            //getMinEigenVals = false;
        }

614
        bool checkParam()
615
        {
616
            iters = std::min(std::max(iters, 0), 100);
617 618 619 620 621 622

            derivLambda = std::min(std::max(derivLambda, 0.0), 1.0);
            if (derivLambda < 0)
                return false;
            if (maxLevel < 0 || winSize.width <= 2 || winSize.height <= 2)
                return false;
V
vbystricky 已提交
623
            calcPatchSize();
624 625 626 627
            if (patch.x <= 0 || patch.x >= 6 || patch.y <= 0 || patch.y >= 6)
                return false;
            if (!initWaveSize())
                return false;
628 629 630 631 632 633 634
            return true;
        }

        bool sparse(const UMat &prevImg, const UMat &nextImg, const UMat &prevPts, UMat &nextPts, UMat &status, UMat &err)
        {
            if (!checkParam())
                return false;
635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657

            UMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
            UMat temp2 = nextPts.reshape(1);
            multiply(1.0f / (1 << maxLevel) /2.0f, temp1, temp2);

            status.setTo(Scalar::all(1));

            // build the image pyramids.
            std::vector<UMat> prevPyr; prevPyr.resize(maxLevel + 1);
            std::vector<UMat> nextPyr; nextPyr.resize(maxLevel + 1);

            prevImg.convertTo(prevPyr[0], CV_32F);
            nextImg.convertTo(nextPyr[0], CV_32F);

            for (int level = 1; level <= maxLevel; ++level)
            {
                pyrDown(prevPyr[level - 1], prevPyr[level]);
                pyrDown(nextPyr[level - 1], nextPyr[level]);
            }

            // dI/dx ~ Ix, dI/dy ~ Iy
            for (int level = maxLevel; level >= 0; level--)
            {
658 659
                if (!lkSparse_run(prevPyr[level], nextPyr[level], prevPts,
                                  nextPts, status, err,
V
vbystricky 已提交
660
                                  prevPts.cols, level))
661
                    return false;
662 663 664 665 666 667 668 669 670 671 672 673 674
            }
            return true;
        }

        Size winSize;
        int maxLevel;
        int iters;
        double derivLambda;
        bool useInitialFlow;
        //float minEigThreshold;
        //bool getMinEigenVals;

    private:
V
vbystricky 已提交
675 676 677 678 679 680 681 682 683 684 685 686 687 688 689
        int waveSize;
        bool initWaveSize()
        {
            waveSize = 1;
            if (isDeviceCPU())
                return true;

            ocl::Kernel kernel;
            if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, ""))
                return false;
            waveSize = (int)kernel.preferedWorkGroupSizeMultiple();
            return true;
        }
        dim3 patch;
        void calcPatchSize()
690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710
        {
            dim3 block;
            //winSize.width *= cn;

            if (winSize.width > 32 && winSize.width > 2 * winSize.height)
            {
                block.x = 32;
                block.y = 8;
            }
            else
            {
                block.x = 16;
                block.y = 16;
            }

            patch.x = (winSize.width  + block.x - 1) / block.x;
            patch.y = (winSize.height + block.y - 1) / block.y;

            block.z = patch.z = 1;
        }

711 712 713 714 715
        #define SAFE_KERNEL_SET_ARG(idx, arg) \
        {\
            int idxNew = kernel.set(idx, arg);\
            if (-1 == idxNew)\
            {\
V
vbystricky 已提交
716
                printf("lkSparse_run can't setup argument index = %d to kernel\n", idx);\
717 718 719 720
                return false;\
            }\
            idx = idxNew;\
        }
721
        bool lkSparse_run(UMat &I, UMat &J, const UMat &prevPts, UMat &nextPts, UMat &status, UMat& err,
V
vbystricky 已提交
722
            int ptcount, int level)
723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740
        {
            size_t localThreads[3]  = { 8, 8};
            size_t globalThreads[3] = { 8 * ptcount, 8};
            char calcErr = (0 == level) ? 1 : 0;

            cv::String build_options;
            if (isDeviceCPU())
                build_options = " -D CPU";
            else
                build_options = cv::format("-D WAVE_SIZE=%d", waveSize);

            ocl::Kernel kernel;
            if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, build_options))
                return false;

            ocl::Image2D imageI(I);
            ocl::Image2D imageJ(J);
            int idxArg = 0;
741
#if 0
742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758
            idxArg = kernel.set(idxArg, imageI); //image2d_t I
            idxArg = kernel.set(idxArg, imageJ); //image2d_t J
            idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(prevPts)); // __global const float2* prevPts
            idxArg = kernel.set(idxArg, (int)prevPts.step); // int prevPtsStep
            idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(nextPts)); // __global const float2* nextPts
            idxArg = kernel.set(idxArg, (int)nextPts.step); //  int nextPtsStep
            idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(status)); // __global uchar* status
            idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(err)); // __global float* err
            idxArg = kernel.set(idxArg, (int)level); // const int level
            idxArg = kernel.set(idxArg, (int)I.rows); // const int rows
            idxArg = kernel.set(idxArg, (int)I.cols); // const int cols
            idxArg = kernel.set(idxArg, (int)patch.x); // int PATCH_X
            idxArg = kernel.set(idxArg, (int)patch.y); // int PATCH_Y
            idxArg = kernel.set(idxArg, (int)winSize.width); // int c_winSize_x
            idxArg = kernel.set(idxArg, (int)winSize.height); // int c_winSize_y
            idxArg = kernel.set(idxArg, (int)iters); // int c_iters
            idxArg = kernel.set(idxArg, (char)calcErr); //char calcErr
759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777
#else
            SAFE_KERNEL_SET_ARG(idxArg, imageI); //image2d_t I
            SAFE_KERNEL_SET_ARG(idxArg, imageJ); //image2d_t J
            SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(prevPts)); // __global const float2* prevPts
            SAFE_KERNEL_SET_ARG(idxArg, (int)prevPts.step); // int prevPtsStep
            SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadWrite(nextPts)); // __global const float2* nextPts
            SAFE_KERNEL_SET_ARG(idxArg, (int)nextPts.step); //  int nextPtsStep
            SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadWrite(status)); // __global uchar* status
            SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadWrite(err)); // __global float* err
            SAFE_KERNEL_SET_ARG(idxArg, (int)level); // const int level
            SAFE_KERNEL_SET_ARG(idxArg, (int)I.rows); // const int rows
            SAFE_KERNEL_SET_ARG(idxArg, (int)I.cols); // const int cols
            SAFE_KERNEL_SET_ARG(idxArg, (int)patch.x); // int PATCH_X
            SAFE_KERNEL_SET_ARG(idxArg, (int)patch.y); // int PATCH_Y
            SAFE_KERNEL_SET_ARG(idxArg, (int)winSize.width); // int c_winSize_x
            SAFE_KERNEL_SET_ARG(idxArg, (int)winSize.height); // int c_winSize_y
            SAFE_KERNEL_SET_ARG(idxArg, (int)iters); // int c_iters
            SAFE_KERNEL_SET_ARG(idxArg, (char)calcErr); //char calcErr
#endif
778 779 780 781 782 783 784 785 786 787 788

            return kernel.run(2, globalThreads, localThreads, true);
        }
    private:
        inline static bool isDeviceCPU()
        {
            return (cv::ocl::Device::TYPE_CPU == cv::ocl::Device::getDefault().type());
        }
    };


789
    static bool ocl_calcOpticalFlowPyrLK(InputArray _prevImg, InputArray _nextImg,
790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808
                                  InputArray _prevPts, InputOutputArray _nextPts,
                                  OutputArray _status, OutputArray _err,
                                  Size winSize, int maxLevel,
                                  TermCriteria criteria,
                                  int flags/*, double minEigThreshold*/ )
    {
        if (0 != (OPTFLOW_LK_GET_MIN_EIGENVALS & flags))
            return false;
        if (!cv::ocl::Device::getDefault().imageSupport())
            return false;
        if (_nextImg.size() != _prevImg.size())
            return false;
        int typePrev = _prevImg.type();
        int typeNext = _nextImg.type();
        if ((1 != CV_MAT_CN(typePrev)) || (1 != CV_MAT_CN(typeNext)))
            return false;
        if ((0 != CV_MAT_DEPTH(typePrev)) || (0 != CV_MAT_DEPTH(typeNext)))
            return false;

809 810 811 812 813 814 815 816 817
        if (_prevPts.empty() || _prevPts.size().height != 1 || _prevPts.type() != CV_32FC2)
            return false;
        bool useInitialFlow  = (0 != (flags & OPTFLOW_USE_INITIAL_FLOW));
        if (useInitialFlow)
        {
            if (_nextPts.size() != _prevPts.size() || _nextPts.type() != CV_32FC2)
                return false;
        }

818 819 820 821 822
        PyrLKOpticalFlow opticalFlow;
        opticalFlow.winSize     = winSize;
        opticalFlow.maxLevel    = maxLevel;
        opticalFlow.iters       = criteria.maxCount;
        opticalFlow.derivLambda = criteria.epsilon;
823 824 825 826
        opticalFlow.useInitialFlow  = useInitialFlow;

        if (!opticalFlow.checkParam())
            return false;
827 828 829 830

        UMat umatErr;
        if (_err.needed())
        {
831
            _err.create(_prevPts.size(), CV_32FC1);
832 833
            umatErr = _err.getUMat();
        }
834 835
        else
            umatErr.create(_prevPts.size(), CV_32FC1);
836 837 838 839 840 841 842 843 844

        _nextPts.create(_prevPts.size(), _prevPts.type());
        _status.create(_prevPts.size(), CV_8UC1);
        UMat umatNextPts = _nextPts.getUMat();
        UMat umatStatus = _status.getUMat();
        return opticalFlow.sparse(_prevImg.getUMat(), _nextImg.getUMat(), _prevPts.getUMat(), umatNextPts, umatStatus, umatErr);
    }
};

845 846 847 848 849
void cv::calcOpticalFlowPyrLK( InputArray _prevImg, InputArray _nextImg,
                           InputArray _prevPts, InputOutputArray _nextPts,
                           OutputArray _status, OutputArray _err,
                           Size winSize, int maxLevel,
                           TermCriteria criteria,
850
                           int flags, double minEigThreshold )
851
{
852 853 854 855
    bool use_opencl = ocl::useOpenCL() && (_prevImg.isUMat() || _nextImg.isUMat());
    if ( use_opencl && ocl_calcOpticalFlowPyrLK(_prevImg, _nextImg, _prevPts, _nextPts, _status, _err, winSize, maxLevel, criteria, flags/*, minEigThreshold*/))
        return;

A
Andrey Kamaev 已提交
856
    Mat prevPtsMat = _prevPts.getMat();
857
    const int derivDepth = DataType<cv::detail::deriv_type>::depth;
858

859
    CV_Assert( maxLevel >= 0 && winSize.width > 2 && winSize.height > 2 );
860

A
Andrey Kamaev 已提交
861
    int level=0, i, npoints;
862
    CV_Assert( (npoints = prevPtsMat.checkVector(2, CV_32F, true)) >= 0 );
863

864 865 866 867 868 869 870
    if( npoints == 0 )
    {
        _nextPts.release();
        _status.release();
        _err.release();
        return;
    }
871

872 873
    if( !(flags & OPTFLOW_USE_INITIAL_FLOW) )
        _nextPts.create(prevPtsMat.size(), prevPtsMat.type(), -1, true);
874

875 876
    Mat nextPtsMat = _nextPts.getMat();
    CV_Assert( nextPtsMat.checkVector(2, CV_32F, true) == npoints );
877

878 879
    const Point2f* prevPts = (const Point2f*)prevPtsMat.data;
    Point2f* nextPts = (Point2f*)nextPtsMat.data;
880

881 882 883 884 885
    _status.create((int)npoints, 1, CV_8U, -1, true);
    Mat statusMat = _status.getMat(), errMat;
    CV_Assert( statusMat.isContinuous() );
    uchar* status = statusMat.data;
    float* err = 0;
886

887 888
    for( i = 0; i < npoints; i++ )
        status[i] = true;
889

890 891 892 893 894 895 896
    if( _err.needed() )
    {
        _err.create((int)npoints, 1, CV_32F, -1, true);
        errMat = _err.getMat();
        CV_Assert( errMat.isContinuous() );
        err = (float*)errMat.data;
    }
897

898
    std::vector<Mat> prevPyr, nextPyr;
A
Andrey Kamaev 已提交
899 900 901 902 903 904
    int levels1 = -1;
    int lvlStep1 = 1;
    int levels2 = -1;
    int lvlStep2 = 1;

    if(_prevImg.kind() == _InputArray::STD_VECTOR_MAT)
905
    {
A
Andrey Kamaev 已提交
906 907 908 909 910 911
        _prevImg.getMatVector(prevPyr);

        levels1 = int(prevPyr.size()) - 1;
        CV_Assert(levels1 >= 0);

        if (levels1 % 2 == 1 && prevPyr[0].channels() * 2 == prevPyr[1].channels() && prevPyr[1].depth() == derivDepth)
912
        {
A
Andrey Kamaev 已提交
913 914 915 916 917 918 919 920 921 922 923 924 925
            lvlStep1 = 2;
            levels1 /= 2;
        }

        // ensure that pyramid has reqired padding
        if(levels1 > 0)
        {
            Size fullSize;
            Point ofs;
            prevPyr[lvlStep1].locateROI(fullSize, ofs);
            CV_Assert(ofs.x >= winSize.width && ofs.y >= winSize.height
                && ofs.x + prevPyr[lvlStep1].cols + winSize.width <= fullSize.width
                && ofs.y + prevPyr[lvlStep1].rows + winSize.height <= fullSize.height);
926
        }
927 928 929

        if(levels1 < maxLevel)
            maxLevel = levels1;
930
    }
A
Andrey Kamaev 已提交
931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955

    if(_nextImg.kind() == _InputArray::STD_VECTOR_MAT)
    {
        _nextImg.getMatVector(nextPyr);

        levels2 = int(nextPyr.size()) - 1;
        CV_Assert(levels2 >= 0);

        if (levels2 % 2 == 1 && nextPyr[0].channels() * 2 == nextPyr[1].channels() && nextPyr[1].depth() == derivDepth)
        {
            lvlStep2 = 2;
            levels2 /= 2;
        }

        // ensure that pyramid has reqired padding
        if(levels2 > 0)
        {
            Size fullSize;
            Point ofs;
            nextPyr[lvlStep2].locateROI(fullSize, ofs);
            CV_Assert(ofs.x >= winSize.width && ofs.y >= winSize.height
                && ofs.x + nextPyr[lvlStep2].cols + winSize.width <= fullSize.width
                && ofs.y + nextPyr[lvlStep2].rows + winSize.height <= fullSize.height);
        }

956 957 958
        if(levels2 < maxLevel)
            maxLevel = levels2;
    }
A
Andrey Kamaev 已提交
959 960

    if (levels1 < 0)
961
        maxLevel = buildOpticalFlowPyramid(_prevImg, prevPyr, winSize, maxLevel, false);
A
Andrey Kamaev 已提交
962 963

    if (levels2 < 0)
964
        maxLevel = buildOpticalFlowPyramid(_nextImg, nextPyr, winSize, maxLevel, false);
965

966 967 968 969 970 971 972 973 974 975
    if( (criteria.type & TermCriteria::COUNT) == 0 )
        criteria.maxCount = 30;
    else
        criteria.maxCount = std::min(std::max(criteria.maxCount, 0), 100);
    if( (criteria.type & TermCriteria::EPS) == 0 )
        criteria.epsilon = 0.01;
    else
        criteria.epsilon = std::min(std::max(criteria.epsilon, 0.), 10.);
    criteria.epsilon *= criteria.epsilon;

A
Andrey Kamaev 已提交
976 977 978 979 980
    // dI/dx ~ Ix, dI/dy ~ Iy
    Mat derivIBuf;
    if(lvlStep1 == 1)
        derivIBuf.create(prevPyr[0].rows + winSize.height*2, prevPyr[0].cols + winSize.width*2, CV_MAKETYPE(derivDepth, prevPyr[0].channels() * 2));

981 982
    for( level = maxLevel; level >= 0; level-- )
    {
A
Andrey Kamaev 已提交
983 984 985 986 987 988 989 990 991 992 993 994
        Mat derivI;
        if(lvlStep1 == 1)
        {
            Size imgSize = prevPyr[level * lvlStep1].size();
            Mat _derivI( imgSize.height + winSize.height*2,
                imgSize.width + winSize.width*2, derivIBuf.type(), derivIBuf.data );
            derivI = _derivI(Rect(winSize.width, winSize.height, imgSize.width, imgSize.height));
            calcSharrDeriv(prevPyr[level * lvlStep1], derivI);
            copyMakeBorder(derivI, _derivI, winSize.height, winSize.height, winSize.width, winSize.width, BORDER_CONSTANT|BORDER_ISOLATED);
        }
        else
            derivI = prevPyr[level * lvlStep1 + 1];
995

A
Andrey Kamaev 已提交
996 997 998
        CV_Assert(prevPyr[level * lvlStep1].size() == nextPyr[level * lvlStep2].size());
        CV_Assert(prevPyr[level * lvlStep1].type() == nextPyr[level * lvlStep2].type());

999 1000 1001
#ifdef HAVE_TEGRA_OPTIMIZATION
        typedef tegra::LKTrackerInvoker<cv::detail::LKTrackerInvoker> LKTrackerInvoker;
#else
1002
        typedef cv::detail::LKTrackerInvoker LKTrackerInvoker;
1003 1004
#endif

1005 1006 1007 1008 1009
        parallel_for_(Range(0, npoints), LKTrackerInvoker(prevPyr[level * lvlStep1], derivI,
                                                          nextPyr[level * lvlStep2], prevPts, nextPts,
                                                          status, err,
                                                          winSize, criteria, level, maxLevel,
                                                          flags, (float)minEigThreshold));
1010
    }
1011 1012
}

1013
namespace cv
1014 1015 1016
{

static void
1017 1018
getRTMatrix( const Point2f* a, const Point2f* b,
             int count, Mat& M, bool fullAffine )
1019
{
1020
    CV_Assert( M.isContinuous() );
1021

1022
    if( fullAffine )
1023
    {
1024 1025 1026
        double sa[6][6]={{0.}}, sb[6]={0.};
        Mat A( 6, 6, CV_64F, &sa[0][0] ), B( 6, 1, CV_64F, sb );
        Mat MM = M.reshape(1, 6);
1027

1028
        for( int i = 0; i < count; i++ )
1029
        {
1030 1031 1032
            sa[0][0] += a[i].x*a[i].x;
            sa[0][1] += a[i].y*a[i].x;
            sa[0][2] += a[i].x;
1033

1034 1035
            sa[1][1] += a[i].y*a[i].y;
            sa[1][2] += a[i].y;
1036

1037
            sa[2][2] += 1;
1038 1039 1040 1041 1042 1043 1044 1045 1046

            sb[0] += a[i].x*b[i].x;
            sb[1] += a[i].y*b[i].x;
            sb[2] += b[i].x;
            sb[3] += a[i].x*b[i].y;
            sb[4] += a[i].y*b[i].y;
            sb[5] += b[i].y;
        }

1047 1048 1049 1050 1051 1052 1053 1054 1055
        sa[3][4] = sa[4][3] = sa[1][0] = sa[0][1];
        sa[3][5] = sa[5][3] = sa[2][0] = sa[0][2];
        sa[4][5] = sa[5][4] = sa[2][1] = sa[1][2];

        sa[3][3] = sa[0][0];
        sa[4][4] = sa[1][1];
        sa[5][5] = sa[2][2];

        solve( A, B, MM, DECOMP_EIG );
1056 1057 1058
    }
    else
    {
1059 1060 1061
        double sa[4][4]={{0.}}, sb[4]={0.}, m[4];
        Mat A( 4, 4, CV_64F, sa ), B( 4, 1, CV_64F, sb );
        Mat MM( 4, 1, CV_64F, m );
1062

1063 1064 1065 1066 1067
        for( int i = 0; i < count; i++ )
        {
            sa[0][0] += a[i].x*a[i].x + a[i].y*a[i].y;
            sa[0][2] += a[i].x;
            sa[0][3] += a[i].y;
1068 1069


1070 1071 1072 1073 1074 1075
            sa[2][1] += -a[i].y;
            sa[2][2] += 1;

            sa[3][0] += a[i].y;
            sa[3][1] += a[i].x;
            sa[3][3] += 1;
1076 1077 1078 1079 1080 1081 1082

            sb[0] += a[i].x*b[i].x + a[i].y*b[i].y;
            sb[1] += a[i].x*b[i].y - a[i].y*b[i].x;
            sb[2] += b[i].x;
            sb[3] += b[i].y;
        }

1083 1084 1085 1086 1087 1088 1089
        sa[1][1] = sa[0][0];
        sa[2][1] = sa[1][2] = -sa[0][3];
        sa[3][1] = sa[1][3] = sa[2][0] = sa[0][2];
        sa[2][2] = sa[3][3] = count;
        sa[3][0] = sa[0][3];

        solve( A, B, MM, DECOMP_EIG );
1090

1091
        double* om = M.ptr<double>();
1092 1093 1094 1095 1096 1097 1098 1099
        om[0] = om[4] = m[0];
        om[1] = -m[1];
        om[3] = m[1];
        om[2] = m[2];
        om[5] = m[3];
    }
}

1100
}
1101

1102
cv::Mat cv::estimateRigidTransform( InputArray src1, InputArray src2, bool fullAffine )
1103
{
1104 1105
    Mat M(2, 3, CV_64F), A = src1.getMat(), B = src2.getMat();

1106 1107 1108 1109 1110 1111
    const int COUNT = 15;
    const int WIDTH = 160, HEIGHT = 120;
    const int RANSAC_MAX_ITERS = 500;
    const int RANSAC_SIZE0 = 3;
    const double RANSAC_GOOD_RATIO = 0.5;

1112 1113 1114
    std::vector<Point2f> pA, pB;
    std::vector<int> good_idx;
    std::vector<uchar> status;
1115

1116
    double scale = 1.;
1117 1118
    int i, j, k, k1;

1119 1120
    RNG rng((uint64)-1);
    int good_count = 0;
1121

1122
    if( A.size() != B.size() )
1123
        CV_Error( Error::StsUnmatchedSizes, "Both input images must have the same size" );
1124

1125
    if( A.type() != B.type() )
1126
        CV_Error( Error::StsUnmatchedFormats, "Both input images must have the same data type" );
1127

1128 1129 1130
    int count = A.checkVector(2);

    if( count > 0 )
1131
    {
1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142
        A.reshape(2, count).convertTo(pA, CV_32F);
        B.reshape(2, count).convertTo(pB, CV_32F);
    }
    else if( A.depth() == CV_8U )
    {
        int cn = A.channels();
        CV_Assert( cn == 1 || cn == 3 || cn == 4 );
        Size sz0 = A.size();
        Size sz1(WIDTH, HEIGHT);

        scale = std::max(1., std::max( (double)sz1.width/sz0.width, (double)sz1.height/sz0.height ));
1143 1144 1145 1146

        sz1.width = cvRound( sz0.width * scale );
        sz1.height = cvRound( sz0.height * scale );

1147
        bool equalSizes = sz1.width == sz0.width && sz1.height == sz0.height;
1148

1149
        if( !equalSizes || cn != 1 )
1150
        {
1151
            Mat sA, sB;
1152 1153 1154

            if( cn != 1 )
            {
1155 1156 1157 1158 1159
                Mat gray;
                cvtColor(A, gray, COLOR_BGR2GRAY);
                resize(gray, sA, sz1, 0., 0., INTER_AREA);
                cvtColor(B, gray, COLOR_BGR2GRAY);
                resize(gray, sB, sz1, 0., 0., INTER_AREA);
1160 1161 1162
            }
            else
            {
1163 1164
                resize(A, sA, sz1, 0., 0., INTER_AREA);
                resize(B, sB, sz1, 0., 0., INTER_AREA);
1165
            }
1166

1167 1168 1169 1170
            A = sA;
            B = sB;
        }

1171 1172
        int count_y = COUNT;
        int count_x = cvRound((double)COUNT*sz1.width/sz1.height);
1173 1174
        count = count_x * count_y;

1175 1176 1177
        pA.resize(count);
        pB.resize(count);
        status.resize(count);
1178 1179 1180 1181 1182 1183 1184 1185 1186

        for( i = 0, k = 0; i < count_y; i++ )
            for( j = 0; j < count_x; j++, k++ )
            {
                pA[k].x = (j+0.5f)*sz1.width/count_x;
                pA[k].y = (i+0.5f)*sz1.height/count_y;
            }

        // find the corresponding points in B
1187 1188
        calcOpticalFlowPyrLK(A, B, pA, pB, status, noArray(), Size(21, 21), 3,
                             TermCriteria(TermCriteria::MAX_ITER,40,0.1));
1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201

        // repack the remained points
        for( i = 0, k = 0; i < count; i++ )
            if( status[i] )
            {
                if( i > k )
                {
                    pA[k] = pA[i];
                    pB[k] = pB[i];
                }
                k++;
            }
        count = k;
1202 1203
        pA.resize(count);
        pB.resize(count);
1204 1205
    }
    else
1206
        CV_Error( Error::StsUnsupportedFormat, "Both input images must have either 8uC1 or 8uC3 type" );
1207

1208
    good_idx.resize(count);
1209 1210

    if( count < RANSAC_SIZE0 )
1211
        return Mat();
1212

1213
    Rect brect = boundingRect(pB);
1214 1215 1216 1217 1218 1219

    // RANSAC stuff:
    // 1. find the consensus
    for( k = 0; k < RANSAC_MAX_ITERS; k++ )
    {
        int idx[RANSAC_SIZE0];
1220 1221
        Point2f a[RANSAC_SIZE0];
        Point2f b[RANSAC_SIZE0];
1222 1223 1224 1225 1226 1227

        // choose random 3 non-complanar points from A & B
        for( i = 0; i < RANSAC_SIZE0; i++ )
        {
            for( k1 = 0; k1 < RANSAC_MAX_ITERS; k1++ )
            {
1228
                idx[i] = rng.uniform(0, count);
1229

1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255
                for( j = 0; j < i; j++ )
                {
                    if( idx[j] == idx[i] )
                        break;
                    // check that the points are not very close one each other
                    if( fabs(pA[idx[i]].x - pA[idx[j]].x) +
                        fabs(pA[idx[i]].y - pA[idx[j]].y) < FLT_EPSILON )
                        break;
                    if( fabs(pB[idx[i]].x - pB[idx[j]].x) +
                        fabs(pB[idx[i]].y - pB[idx[j]].y) < FLT_EPSILON )
                        break;
                }

                if( j < i )
                    continue;

                if( i+1 == RANSAC_SIZE0 )
                {
                    // additional check for non-complanar vectors
                    a[0] = pA[idx[0]];
                    a[1] = pA[idx[1]];
                    a[2] = pA[idx[2]];

                    b[0] = pB[idx[0]];
                    b[1] = pB[idx[1]];
                    b[2] = pB[idx[2]];
1256

1257 1258
                    double dax1 = a[1].x - a[0].x, day1 = a[1].y - a[0].y;
                    double dax2 = a[2].x - a[0].x, day2 = a[2].y - a[0].y;
1259
                    double dbx1 = b[1].x - b[0].x, dby1 = b[1].y - b[0].y;
1260 1261 1262
                    double dbx2 = b[2].x - b[0].x, dby2 = b[2].y - b[0].y;
                    const double eps = 0.01;

1263 1264
                    if( fabs(dax1*day2 - day1*dax2) < eps*std::sqrt(dax1*dax1+day1*day1)*std::sqrt(dax2*dax2+day2*day2) ||
                        fabs(dbx1*dby2 - dby1*dbx2) < eps*std::sqrt(dbx1*dbx1+dby1*dby1)*std::sqrt(dbx2*dbx2+dby2*dby2) )
1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277
                        continue;
                }
                break;
            }

            if( k1 >= RANSAC_MAX_ITERS )
                break;
        }

        if( i < RANSAC_SIZE0 )
            continue;

        // estimate the transformation using 3 points
1278
        getRTMatrix( a, b, 3, M, fullAffine );
1279

1280
        const double* m = M.ptr<double>();
1281 1282
        for( i = 0, good_count = 0; i < count; i++ )
        {
1283 1284
            if( std::abs( m[0]*pA[i].x + m[1]*pA[i].y + m[2] - pB[i].x ) +
                std::abs( m[3]*pA[i].x + m[4]*pA[i].y + m[5] - pB[i].y ) < std::max(brect.width,brect.height)*0.05 )
1285 1286 1287 1288 1289 1290 1291 1292
                good_idx[good_count++] = i;
        }

        if( good_count >= count*RANSAC_GOOD_RATIO )
            break;
    }

    if( k >= RANSAC_MAX_ITERS )
1293
        return Mat();
1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304

    if( good_count < count )
    {
        for( i = 0; i < good_count; i++ )
        {
            j = good_idx[i];
            pA[i] = pA[j];
            pB[i] = pB[j];
        }
    }

1305 1306 1307
    getRTMatrix( &pA[0], &pB[0], good_count, M, fullAffine );
    M.at<double>(0, 2) /= scale;
    M.at<double>(1, 2) /= scale;
1308

1309
    return M;
1310 1311 1312
}

/* End of file. */