lapack.cpp 55.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                           License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#include "precomp.hpp"

namespace cv
{
47

48 49 50
/****************************************************************************************\
*                     LU & Cholesky implementation for small matrices                    *
\****************************************************************************************/
51

V
Vadim Pisarevsky 已提交
52 53
template<typename _Tp> static inline int
LUImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n)
54 55
{
    int i, j, k, p = 1;
V
Vadim Pisarevsky 已提交
56 57
    astep /= sizeof(A[0]);
    bstep /= sizeof(b[0]);
58

59 60 61
    for( i = 0; i < m; i++ )
    {
        k = i;
62

63
        for( j = i+1; j < m; j++ )
V
Vadim Pisarevsky 已提交
64
            if( std::abs(A[j*astep + i]) > std::abs(A[k*astep + i]) )
65
                k = j;
66

V
Vadim Pisarevsky 已提交
67
        if( std::abs(A[k*astep + i]) < std::numeric_limits<_Tp>::epsilon() )
68
            return 0;
69

70 71 72
        if( k != i )
        {
            for( j = i; j < m; j++ )
V
Vadim Pisarevsky 已提交
73
                std::swap(A[i*astep + j], A[k*astep + j]);
74 75
            if( b )
                for( j = 0; j < n; j++ )
V
Vadim Pisarevsky 已提交
76
                    std::swap(b[i*bstep + j], b[k*bstep + j]);
77 78
            p = -p;
        }
79

V
Vadim Pisarevsky 已提交
80
        _Tp d = -1/A[i*astep + i];
81

82 83
        for( j = i+1; j < m; j++ )
        {
V
Vadim Pisarevsky 已提交
84
            _Tp alpha = A[j*astep + i]*d;
85

86
            for( k = i+1; k < m; k++ )
V
Vadim Pisarevsky 已提交
87
                A[j*astep + k] += alpha*A[i*astep + k];
88

89 90
            if( b )
                for( k = 0; k < n; k++ )
V
Vadim Pisarevsky 已提交
91
                    b[j*bstep + k] += alpha*b[i*bstep + k];
92
        }
93

V
Vadim Pisarevsky 已提交
94
        A[i*astep + i] = -d;
95
    }
96

97 98 99 100 101
    if( b )
    {
        for( i = m-1; i >= 0; i-- )
            for( j = 0; j < n; j++ )
            {
V
Vadim Pisarevsky 已提交
102
                _Tp s = b[i*bstep + j];
103
                for( k = i+1; k < m; k++ )
V
Vadim Pisarevsky 已提交
104 105
                    s -= A[i*astep + k]*b[k*bstep + j];
                b[i*bstep + j] = s*A[i*astep + i];
106 107
            }
    }
108

109 110 111
    return p;
}

112

V
Vadim Pisarevsky 已提交
113
int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n)
114
{
V
Vadim Pisarevsky 已提交
115
    return LUImpl(A, astep, m, b, bstep, n);
116
}
117

118

V
Vadim Pisarevsky 已提交
119
int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n)
120
{
V
Vadim Pisarevsky 已提交
121
    return LUImpl(A, astep, m, b, bstep, n);
122
}
123

124

V
Vadim Pisarevsky 已提交
125 126
template<typename _Tp> static inline bool
CholImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n)
127 128 129 130
{
    _Tp* L = A;
    int i, j, k;
    double s;
V
Vadim Pisarevsky 已提交
131 132
    astep /= sizeof(A[0]);
    bstep /= sizeof(b[0]);
133

134 135 136 137
    for( i = 0; i < m; i++ )
    {
        for( j = 0; j < i; j++ )
        {
V
Vadim Pisarevsky 已提交
138
            s = A[i*astep + j];
139
            for( k = 0; k < j; k++ )
V
Vadim Pisarevsky 已提交
140 141
                s -= L[i*astep + k]*L[j*astep + k];
            L[i*astep + j] = (_Tp)(s*L[j*astep + j]);
142
        }
V
Vadim Pisarevsky 已提交
143
        s = A[i*astep + i];
144 145
        for( k = 0; k < j; k++ )
        {
V
Vadim Pisarevsky 已提交
146
            double t = L[i*astep + k];
147 148 149
            s -= t*t;
        }
        if( s < std::numeric_limits<_Tp>::epsilon() )
V
Vadim Pisarevsky 已提交
150 151
            return false;
        L[i*astep + i] = (_Tp)(1./std::sqrt(s));
152
    }
153

154
    if( !b )
V
Vadim Pisarevsky 已提交
155
        return true;
156

157 158 159
    // LLt x = b
    // 1: L y = b
    // 2. Lt x = y
160

161 162 163 164 165
    /*
     [ L00             ]  y0   b0
     [ L10 L11         ]  y1 = b1
     [ L20 L21 L22     ]  y2   b2
     [ L30 L31 L32 L33 ]  y3   b3
166

167 168 169 170 171
     [ L00 L10 L20 L30 ]  x0   y0
     [     L11 L21 L31 ]  x1 = y1
     [         L22 L32 ]  x2   y2
     [             L33 ]  x3   y3
    */
172

173 174 175 176
    for( i = 0; i < m; i++ )
    {
        for( j = 0; j < n; j++ )
        {
V
Vadim Pisarevsky 已提交
177
            s = b[i*bstep + j];
178
            for( k = 0; k < i; k++ )
V
Vadim Pisarevsky 已提交
179 180
                s -= L[i*astep + k]*b[k*bstep + j];
            b[i*bstep + j] = (_Tp)(s*L[i*astep + i]);
181 182
        }
    }
183

184 185 186 187
    for( i = m-1; i >= 0; i-- )
    {
        for( j = 0; j < n; j++ )
        {
V
Vadim Pisarevsky 已提交
188
            s = b[i*bstep + j];
189
            for( k = m-1; k > i; k-- )
V
Vadim Pisarevsky 已提交
190 191 192 193
                s -= L[k*astep + i]*b[k*bstep + j];
            b[i*bstep + j] = (_Tp)(s*L[i*astep + i]);
        }
    }
194

V
Vadim Pisarevsky 已提交
195 196
    return true;
}
197 198


V
Vadim Pisarevsky 已提交
199 200 201 202
bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n)
{
    return CholImpl(A, astep, m, b, bstep, n);
}
203

V
Vadim Pisarevsky 已提交
204 205 206 207 208
bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n)
{
    return CholImpl(A, astep, m, b, bstep, n);
}

209

V
Vadim Pisarevsky 已提交
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
template<typename _Tp> static inline _Tp hypot(_Tp a, _Tp b)
{
    a = std::abs(a);
    b = std::abs(b);
    if( a > b )
    {
        b /= a;
        return a*std::sqrt(1 + b*b);
    }
    if( b > 0 )
    {
        a /= b;
        return b*std::sqrt(1 + a*a);
    }
    return 0;
}


template<typename _Tp> bool
JacobiImpl_( _Tp* A, size_t astep, _Tp* W, _Tp* V, size_t vstep, int n, uchar* buf )
{
    const _Tp eps = std::numeric_limits<_Tp>::epsilon();
    int i, j, k, m;
233

V
Vadim Pisarevsky 已提交
234 235 236 237 238 239 240 241 242 243 244
    astep /= sizeof(A[0]);
    if( V )
    {
        vstep /= sizeof(V[0]);
        for( i = 0; i < n; i++ )
        {
            for( j = 0; j < n; j++ )
                V[i*vstep + j] = (_Tp)0;
            V[i*vstep + i] = (_Tp)1;
        }
    }
245

V
Vadim Pisarevsky 已提交
246
    int iters, maxIters = n*n*30;
247

248
    int* indR = (int*)alignPtr(buf, sizeof(int));
V
Vadim Pisarevsky 已提交
249 250
    int* indC = indR + n;
    _Tp mv = (_Tp)0;
251

V
Vadim Pisarevsky 已提交
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
    for( k = 0; k < n; k++ )
    {
        W[k] = A[(astep + 1)*k];
        if( k < n - 1 )
        {
            for( m = k+1, mv = std::abs(A[astep*k + m]), i = k+2; i < n; i++ )
            {
                _Tp val = std::abs(A[astep*k+i]);
                if( mv < val )
                    mv = val, m = i;
            }
            indR[k] = m;
        }
        if( k > 0 )
        {
            for( m = 0, mv = std::abs(A[k]), i = 1; i < k; i++ )
            {
                _Tp val = std::abs(A[astep*i+k]);
                if( mv < val )
                    mv = val, m = i;
            }
            indC[k] = m;
        }
    }
276

277
    if( n > 1 ) for( iters = 0; iters < maxIters; iters++ )
V
Vadim Pisarevsky 已提交
278 279
    {
        // find index (k,l) of pivot p
280
        for( k = 0, mv = std::abs(A[indR[0]]), i = 1; i < n-1; i++ )
V
Vadim Pisarevsky 已提交
281
        {
282
            _Tp val = std::abs(A[astep*i + indR[i]]);
V
Vadim Pisarevsky 已提交
283 284 285 286 287 288
            if( mv < val )
                mv = val, k = i;
        }
        int l = indR[k];
        for( i = 1; i < n; i++ )
        {
289
            _Tp val = std::abs(A[astep*indC[i] + i]);
V
Vadim Pisarevsky 已提交
290 291 292
            if( mv < val )
                mv = val, k = indC[i], l = i;
        }
293

V
Vadim Pisarevsky 已提交
294 295 296 297 298 299 300 301 302 303 304
        _Tp p = A[astep*k + l];
        if( std::abs(p) <= eps )
            break;
        _Tp y = (_Tp)((W[l] - W[k])*0.5);
        _Tp t = std::abs(y) + hypot(p, y);
        _Tp s = hypot(p, t);
        _Tp c = t/s;
        s = p/s; t = (p/t)*p;
        if( y < 0 )
            s = -s, t = -t;
        A[astep*k + l] = 0;
305

V
Vadim Pisarevsky 已提交
306 307
        W[k] -= t;
        W[l] += t;
308

V
Vadim Pisarevsky 已提交
309
        _Tp a0, b0;
310

V
Vadim Pisarevsky 已提交
311 312
#undef rotate
#define rotate(v0, v1) a0 = v0, b0 = v1, v0 = a0*c - b0*s, v1 = a0*s + b0*c
313

V
Vadim Pisarevsky 已提交
314 315 316 317 318 319 320
        // rotate rows and columns k and l
        for( i = 0; i < k; i++ )
            rotate(A[astep*i+k], A[astep*i+l]);
        for( i = k+1; i < l; i++ )
            rotate(A[astep*k+i], A[astep*i+l]);
        for( i = l+1; i < n; i++ )
            rotate(A[astep*k+i], A[astep*l+i]);
321

V
Vadim Pisarevsky 已提交
322 323 324 325
        // rotate eigenvectors
        if( V )
            for( i = 0; i < n; i++ )
                rotate(V[vstep*k+i], V[vstep*l+i]);
326

V
Vadim Pisarevsky 已提交
327
#undef rotate
328

V
Vadim Pisarevsky 已提交
329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353
        for( j = 0; j < 2; j++ )
        {
            int idx = j == 0 ? k : l;
            if( idx < n - 1 )
            {
                for( m = idx+1, mv = std::abs(A[astep*idx + m]), i = idx+2; i < n; i++ )
                {
                    _Tp val = std::abs(A[astep*idx+i]);
                    if( mv < val )
                        mv = val, m = i;
                }
                indR[idx] = m;
            }
            if( idx > 0 )
            {
                for( m = 0, mv = std::abs(A[idx]), i = 1; i < idx; i++ )
                {
                    _Tp val = std::abs(A[astep*i+idx]);
                    if( mv < val )
                        mv = val, m = i;
                }
                indC[idx] = m;
            }
        }
    }
354

V
Vadim Pisarevsky 已提交
355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
    // sort eigenvalues & eigenvectors
    for( k = 0; k < n-1; k++ )
    {
        m = k;
        for( i = k+1; i < n; i++ )
        {
            if( W[m] < W[i] )
                m = i;
        }
        if( k != m )
        {
            std::swap(W[m], W[k]);
            if( V )
                for( i = 0; i < n; i++ )
                    std::swap(V[vstep*m + i], V[vstep*k + i]);
        }
    }
372

V
Vadim Pisarevsky 已提交
373 374
    return true;
}
375

V
Vadim Pisarevsky 已提交
376 377 378 379 380 381 382 383 384
static bool Jacobi( float* S, size_t sstep, float* e, float* E, size_t estep, int n, uchar* buf )
{
    return JacobiImpl_(S, sstep, e, E, estep, n, buf);
}

static bool Jacobi( double* S, size_t sstep, double* e, double* E, size_t estep, int n, uchar* buf )
{
    return JacobiImpl_(S, sstep, e, E, estep, n, buf);
}
385 386


V
Vadim Pisarevsky 已提交
387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404
template<typename T> struct VBLAS
{
    int dot(const T*, const T*, int, T*) const { return 0; }
    int givens(T*, T*, int, T, T) const { return 0; }
    int givensx(T*, T*, int, T, T, T*, T*) const { return 0; }
};

#if CV_SSE2
template<> inline int VBLAS<float>::dot(const float* a, const float* b, int n, float* result) const
{
    if( n < 8 )
        return 0;
    int k = 0;
    __m128 s0 = _mm_setzero_ps(), s1 = _mm_setzero_ps();
    for( ; k <= n - 8; k += 8 )
    {
        __m128 a0 = _mm_load_ps(a + k), a1 = _mm_load_ps(a + k + 4);
        __m128 b0 = _mm_load_ps(b + k), b1 = _mm_load_ps(b + k + 4);
405

V
Vadim Pisarevsky 已提交
406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473
        s0 = _mm_add_ps(s0, _mm_mul_ps(a0, b0));
        s1 = _mm_add_ps(s1, _mm_mul_ps(a1, b1));
    }
    s0 = _mm_add_ps(s0, s1);
    float sbuf[4];
    _mm_storeu_ps(sbuf, s0);
    *result = sbuf[0] + sbuf[1] + sbuf[2] + sbuf[3];
    return k;
}


template<> inline int VBLAS<float>::givens(float* a, float* b, int n, float c, float s) const
{
    if( n < 4 )
        return 0;
    int k = 0;
    __m128 c4 = _mm_set1_ps(c), s4 = _mm_set1_ps(s);
    for( ; k <= n - 4; k += 4 )
    {
        __m128 a0 = _mm_load_ps(a + k);
        __m128 b0 = _mm_load_ps(b + k);
        __m128 t0 = _mm_add_ps(_mm_mul_ps(a0, c4), _mm_mul_ps(b0, s4));
        __m128 t1 = _mm_sub_ps(_mm_mul_ps(b0, c4), _mm_mul_ps(a0, s4));
        _mm_store_ps(a + k, t0);
        _mm_store_ps(b + k, t1);
    }
    return k;
}


template<> inline int VBLAS<float>::givensx(float* a, float* b, int n, float c, float s,
                                             float* anorm, float* bnorm) const
{
    if( n < 4 )
        return 0;
    int k = 0;
    __m128 c4 = _mm_set1_ps(c), s4 = _mm_set1_ps(s);
    __m128 sa = _mm_setzero_ps(), sb = _mm_setzero_ps();
    for( ; k <= n - 4; k += 4 )
    {
        __m128 a0 = _mm_load_ps(a + k);
        __m128 b0 = _mm_load_ps(b + k);
        __m128 t0 = _mm_add_ps(_mm_mul_ps(a0, c4), _mm_mul_ps(b0, s4));
        __m128 t1 = _mm_sub_ps(_mm_mul_ps(b0, c4), _mm_mul_ps(a0, s4));
        _mm_store_ps(a + k, t0);
        _mm_store_ps(b + k, t1);
        sa = _mm_add_ps(sa, _mm_mul_ps(t0, t0));
        sb = _mm_add_ps(sb, _mm_mul_ps(t1, t1));
    }
    float abuf[4], bbuf[4];
    _mm_storeu_ps(abuf, sa);
    _mm_storeu_ps(bbuf, sb);
    *anorm = abuf[0] + abuf[1] + abuf[2] + abuf[3];
    *bnorm = bbuf[0] + bbuf[1] + bbuf[2] + bbuf[3];
    return k;
}


template<> inline int VBLAS<double>::dot(const double* a, const double* b, int n, double* result) const
{
    if( n < 4 )
        return 0;
    int k = 0;
    __m128d s0 = _mm_setzero_pd(), s1 = _mm_setzero_pd();
    for( ; k <= n - 4; k += 4 )
    {
        __m128d a0 = _mm_load_pd(a + k), a1 = _mm_load_pd(a + k + 2);
        __m128d b0 = _mm_load_pd(b + k), b1 = _mm_load_pd(b + k + 2);
474

V
Vadim Pisarevsky 已提交
475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529
        s0 = _mm_add_pd(s0, _mm_mul_pd(a0, b0));
        s1 = _mm_add_pd(s1, _mm_mul_pd(a1, b1));
    }
    s0 = _mm_add_pd(s0, s1);
    double sbuf[2];
    _mm_storeu_pd(sbuf, s0);
    *result = sbuf[0] + sbuf[1];
    return k;
}


template<> inline int VBLAS<double>::givens(double* a, double* b, int n, double c, double s) const
{
    int k = 0;
    __m128d c2 = _mm_set1_pd(c), s2 = _mm_set1_pd(s);
    for( ; k <= n - 2; k += 2 )
    {
        __m128d a0 = _mm_load_pd(a + k);
        __m128d b0 = _mm_load_pd(b + k);
        __m128d t0 = _mm_add_pd(_mm_mul_pd(a0, c2), _mm_mul_pd(b0, s2));
        __m128d t1 = _mm_sub_pd(_mm_mul_pd(b0, c2), _mm_mul_pd(a0, s2));
        _mm_store_pd(a + k, t0);
        _mm_store_pd(b + k, t1);
    }
    return k;
}


template<> inline int VBLAS<double>::givensx(double* a, double* b, int n, double c, double s,
                                              double* anorm, double* bnorm) const
{
    int k = 0;
    __m128d c2 = _mm_set1_pd(c), s2 = _mm_set1_pd(s);
    __m128d sa = _mm_setzero_pd(), sb = _mm_setzero_pd();
    for( ; k <= n - 2; k += 2 )
    {
        __m128d a0 = _mm_load_pd(a + k);
        __m128d b0 = _mm_load_pd(b + k);
        __m128d t0 = _mm_add_pd(_mm_mul_pd(a0, c2), _mm_mul_pd(b0, s2));
        __m128d t1 = _mm_sub_pd(_mm_mul_pd(b0, c2), _mm_mul_pd(a0, s2));
        _mm_store_pd(a + k, t0);
        _mm_store_pd(b + k, t1);
        sa = _mm_add_pd(sa, _mm_mul_pd(t0, t0));
        sb = _mm_add_pd(sb, _mm_mul_pd(t1, t1));
    }
    double abuf[2], bbuf[2];
    _mm_storeu_pd(abuf, sa);
    _mm_storeu_pd(bbuf, sb);
    *anorm = abuf[0] + abuf[1];
    *bnorm = bbuf[0] + bbuf[1];
    return k;
}
#endif

template<typename _Tp> void
530
JacobiSVDImpl_(_Tp* At, size_t astep, _Tp* _W, _Tp* Vt, size_t vstep, int m, int n, int n1, double minval)
V
Vadim Pisarevsky 已提交
531 532
{
    VBLAS<_Tp> vblas;
533 534 535
    AutoBuffer<double> Wbuf(n);
    double* W = Wbuf;
    _Tp eps = DBL_EPSILON*10;
V
Vadim Pisarevsky 已提交
536 537 538 539 540
    int i, j, k, iter, max_iter = std::max(m, 30);
    _Tp c, s;
    double sd;
    astep /= sizeof(At[0]);
    vstep /= sizeof(Vt[0]);
541

V
Vadim Pisarevsky 已提交
542 543
    for( i = 0; i < n; i++ )
    {
544
        for( k = 0, sd = 0; k < m; k++ )
V
Vadim Pisarevsky 已提交
545 546
        {
            _Tp t = At[i*astep + k];
547
            sd += (double)t*t;
V
Vadim Pisarevsky 已提交
548
        }
549
        W[i] = sd;
550

V
Vadim Pisarevsky 已提交
551 552 553 554 555 556 557
        if( Vt )
        {
            for( k = 0; k < n; k++ )
                Vt[i*vstep + k] = 0;
            Vt[i*vstep + i] = 1;
        }
    }
558

V
Vadim Pisarevsky 已提交
559 560 561
    for( iter = 0; iter < max_iter; iter++ )
    {
        bool changed = false;
562

V
Vadim Pisarevsky 已提交
563 564 565
        for( i = 0; i < n-1; i++ )
            for( j = i+1; j < n; j++ )
            {
566 567
                _Tp *Ai = At + i*astep, *Aj = At + j*astep;
                double a = W[i], p = 0, b = W[j];
568

569 570
                for( k = 0; k < m; k++ )
                    p += (double)Ai[k]*Aj[k];
571

V
Vadim Pisarevsky 已提交
572
                if( std::abs(p) <= eps*std::sqrt((double)a*b) )
573 574
                    continue;

V
Vadim Pisarevsky 已提交
575 576 577 578
                p *= 2;
                double beta = a - b, gamma = hypot((double)p, beta), delta;
                if( beta < 0 )
                {
579
                    delta = (gamma - beta)*0.5;
V
Vadim Pisarevsky 已提交
580 581 582 583 584 585 586
                    s = (_Tp)std::sqrt(delta/gamma);
                    c = (_Tp)(p/(gamma*s*2));
                }
                else
                {
                    c = (_Tp)std::sqrt((gamma + beta)/(gamma*2));
                    s = (_Tp)(p/(gamma*c*2));
587
                    delta = p*p*0.5/(gamma + beta);
V
Vadim Pisarevsky 已提交
588
                }
589

590 591
                W[i] += delta;
                W[j] -= delta;
592

593
                if( iter % 2 != 0 && W[i] > 0 && W[j] > 0 )
V
Vadim Pisarevsky 已提交
594 595
                {
                    k = vblas.givens(Ai, Aj, m, c, s);
596

V
Vadim Pisarevsky 已提交
597 598 599 600 601 602 603 604 605 606
                    for( ; k < m; k++ )
                    {
                        _Tp t0 = c*Ai[k] + s*Aj[k];
                        _Tp t1 = -s*Ai[k] + c*Aj[k];
                        Ai[k] = t0; Aj[k] = t1;
                    }
                }
                else
                {
                    a = b = 0;
607
                    for( k = 0; k < m; k++ )
V
Vadim Pisarevsky 已提交
608 609 610 611
                    {
                        _Tp t0 = c*Ai[k] + s*Aj[k];
                        _Tp t1 = -s*Ai[k] + c*Aj[k];
                        Ai[k] = t0; Aj[k] = t1;
612

613
                        a += (double)t0*t0; b += (double)t1*t1;
V
Vadim Pisarevsky 已提交
614 615 616
                    }
                    W[i] = a; W[j] = b;
                }
617

V
Vadim Pisarevsky 已提交
618
                changed = true;
619

V
Vadim Pisarevsky 已提交
620 621 622 623
                if( Vt )
                {
                    _Tp *Vi = Vt + i*vstep, *Vj = Vt + j*vstep;
                    k = vblas.givens(Vi, Vj, n, c, s);
624

V
Vadim Pisarevsky 已提交
625 626 627 628 629 630 631 632 633 634 635
                    for( ; k < n; k++ )
                    {
                        _Tp t0 = c*Vi[k] + s*Vj[k];
                        _Tp t1 = -s*Vi[k] + c*Vj[k];
                        Vi[k] = t0; Vj[k] = t1;
                    }
                }
            }
        if( !changed )
            break;
    }
636

V
Vadim Pisarevsky 已提交
637 638 639 640 641 642 643
    for( i = 0; i < n; i++ )
    {
        for( k = 0, sd = 0; k < m; k++ )
        {
            _Tp t = At[i*astep + k];
            sd += (double)t*t;
        }
644
        W[i] = std::sqrt(sd);
V
Vadim Pisarevsky 已提交
645
    }
646

V
Vadim Pisarevsky 已提交
647 648 649 650 651 652 653 654 655 656 657 658 659 660 661
    for( i = 0; i < n-1; i++ )
    {
        j = i;
        for( k = i+1; k < n; k++ )
        {
            if( W[j] < W[k] )
                j = k;
        }
        if( i != j )
        {
            std::swap(W[i], W[j]);
            if( Vt )
            {
                for( k = 0; k < m; k++ )
                    std::swap(At[i*astep + k], At[j*astep + k]);
662

V
Vadim Pisarevsky 已提交
663 664 665 666 667
                for( k = 0; k < n; k++ )
                    std::swap(Vt[i*vstep + k], Vt[j*vstep + k]);
            }
        }
    }
668

669 670
    for( i = 0; i < n; i++ )
        _W[i] = (_Tp)W[i];
671

V
Vadim Pisarevsky 已提交
672 673
    if( !Vt )
        return;
674

V
Vadim Pisarevsky 已提交
675
    RNG rng(0x12345678);
V
Vadim Pisarevsky 已提交
676 677
    for( i = 0; i < n1; i++ )
    {
678
        sd = i < n ? W[i] : 0;
679

680
        while( sd <= minval )
V
Vadim Pisarevsky 已提交
681 682 683 684 685 686 687
        {
            // if we got a zero singular value, then in order to get the corresponding left singular vector
            // we generate a random vector, project it to the previously computed left singular vectors,
            // subtract the projection and normalize the difference.
            const _Tp val0 = (_Tp)(1./m);
            for( k = 0; k < m; k++ )
            {
V
Vadim Pisarevsky 已提交
688
                _Tp val = (rng.next() & 256) != 0 ? val0 : -val0;
V
Vadim Pisarevsky 已提交
689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715
                At[i*astep + k] = val;
            }
            for( iter = 0; iter < 2; iter++ )
            {
                for( j = 0; j < i; j++ )
                {
                    sd = 0;
                    for( k = 0; k < m; k++ )
                        sd += At[i*astep + k]*At[j*astep + k];
                    _Tp asum = 0;
                    for( k = 0; k < m; k++ )
                    {
                        _Tp t = (_Tp)(At[i*astep + k] - sd*At[j*astep + k]);
                        At[i*astep + k] = t;
                        asum += std::abs(t);
                    }
                    asum = asum ? 1/asum : 0;
                    for( k = 0; k < m; k++ )
                        At[i*astep + k] *= asum;
                }
            }
            sd = 0;
            for( k = 0; k < m; k++ )
            {
                _Tp t = At[i*astep + k];
                sd += (double)t*t;
            }
716
            sd = std::sqrt(sd);
V
Vadim Pisarevsky 已提交
717
        }
718

719
        s = (_Tp)(1/sd);
V
Vadim Pisarevsky 已提交
720 721 722 723 724
        for( k = 0; k < m; k++ )
            At[i*astep + k] *= s;
    }
}

725

V
Vadim Pisarevsky 已提交
726 727
static void JacobiSVD(float* At, size_t astep, float* W, float* Vt, size_t vstep, int m, int n, int n1=-1)
{
728
    JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, FLT_MIN);
V
Vadim Pisarevsky 已提交
729 730 731 732
}

static void JacobiSVD(double* At, size_t astep, double* W, double* Vt, size_t vstep, int m, int n, int n1=-1)
{
733
    JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, DBL_MIN);
V
Vadim Pisarevsky 已提交
734
}
735

V
Vadim Pisarevsky 已提交
736 737 738 739 740 741 742 743 744
/* y[0:m,0:n] += diag(a[0:1,0:m]) * x[0:m,0:n] */
template<typename T1, typename T2, typename T3> static void
MatrAXPY( int m, int n, const T1* x, int dx,
         const T2* a, int inca, T3* y, int dy )
{
    int i, j;
    for( i = 0; i < m; i++, x += dx, y += dy )
    {
        T2 s = a[i*inca];
745 746
        j=0;
         #if CV_ENABLE_UNROLLED
V
Victoria Zhislina 已提交
747
        for(; j <= n - 4; j += 4 )
V
Vadim Pisarevsky 已提交
748 749 750 751 752 753 754 755 756 757
        {
            T3 t0 = (T3)(y[j]   + s*x[j]);
            T3 t1 = (T3)(y[j+1] + s*x[j+1]);
            y[j]   = t0;
            y[j+1] = t1;
            t0 = (T3)(y[j+2] + s*x[j+2]);
            t1 = (T3)(y[j+3] + s*x[j+3]);
            y[j+2] = t0;
            y[j+3] = t1;
        }
V
Victoria Zhislina 已提交
758
        #endif
V
Vadim Pisarevsky 已提交
759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774
        for( ; j < n; j++ )
            y[j] = (T3)(y[j] + s*x[j]);
    }
}

template<typename T> static void
SVBkSbImpl_( int m, int n, const T* w, int incw,
       const T* u, int ldu, bool uT,
       const T* v, int ldv, bool vT,
       const T* b, int ldb, int nb,
       T* x, int ldx, double* buffer, T eps )
{
    double threshold = 0;
    int udelta0 = uT ? ldu : 1, udelta1 = uT ? 1 : ldu;
    int vdelta0 = vT ? ldv : 1, vdelta1 = vT ? 1 : ldv;
    int i, j, nm = std::min(m, n);
775

V
Vadim Pisarevsky 已提交
776 777
    if( !b )
        nb = m;
778

V
Vadim Pisarevsky 已提交
779 780 781
    for( i = 0; i < n; i++ )
        for( j = 0; j < nb; j++ )
            x[i*ldx + j] = 0;
782

V
Vadim Pisarevsky 已提交
783 784 785
    for( i = 0; i < nm; i++ )
        threshold += w[i*incw];
    threshold *= eps;
786

V
Vadim Pisarevsky 已提交
787 788 789 790 791 792 793
    // v * inv(w) * uT * b
    for( i = 0; i < nm; i++, u += udelta0, v += vdelta0 )
    {
        double wi = w[i*incw];
        if( (double)std::abs(wi) <= threshold )
            continue;
        wi = 1/wi;
794

V
Vadim Pisarevsky 已提交
795 796 797 798 799 800 801 802 803
        if( nb == 1 )
        {
            double s = 0;
            if( b )
                for( j = 0; j < m; j++ )
                    s += u[j*udelta1]*b[j*ldb];
            else
                s = u[0];
            s *= wi;
804

V
Vadim Pisarevsky 已提交
805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823
            for( j = 0; j < n; j++ )
                x[j*ldx] = (T)(x[j*ldx] + s*v[j*vdelta1]);
        }
        else
        {
            if( b )
            {
                for( j = 0; j < nb; j++ )
                    buffer[j] = 0;
                MatrAXPY( m, nb, b, ldb, u, udelta1, buffer, 0 );
                for( j = 0; j < nb; j++ )
                    buffer[j] *= wi;
            }
            else
            {
                for( j = 0; j < nb; j++ )
                    buffer[j] = u[j*udelta1]*wi;
            }
            MatrAXPY( n, nb, buffer, 0, v, vdelta1, x, ldx );
824 825 826
        }
    }
}
827

V
Vadim Pisarevsky 已提交
828 829 830 831 832 833
static void
SVBkSb( int m, int n, const float* w, size_t wstep,
        const float* u, size_t ustep, bool uT,
        const float* v, size_t vstep, bool vT,
        const float* b, size_t bstep, int nb,
        float* x, size_t xstep, uchar* buffer )
834
{
V
Vadim Pisarevsky 已提交
835 836 837 838 839
    SVBkSbImpl_(m, n, w, wstep ? (int)(wstep/sizeof(w[0])) : 1,
                u, (int)(ustep/sizeof(u[0])), uT,
                v, (int)(vstep/sizeof(v[0])), vT,
                b, (int)(bstep/sizeof(b[0])), nb,
                x, (int)(xstep/sizeof(x[0])),
840
                (double*)alignPtr(buffer, sizeof(double)), (float)(DBL_EPSILON*2) );
841
}
V
Vadim Pisarevsky 已提交
842 843 844 845 846 847 848

static void
SVBkSb( int m, int n, const double* w, size_t wstep,
       const double* u, size_t ustep, bool uT,
       const double* v, size_t vstep, bool vT,
       const double* b, size_t bstep, int nb,
       double* x, size_t xstep, uchar* buffer )
849
{
V
Vadim Pisarevsky 已提交
850 851 852 853 854 855
    SVBkSbImpl_(m, n, w, wstep ? (int)(wstep/sizeof(w[0])) : 1,
                u, (int)(ustep/sizeof(u[0])), uT,
                v, (int)(vstep/sizeof(v[0])), vT,
                b, (int)(bstep/sizeof(b[0])), nb,
                x, (int)(xstep/sizeof(x[0])),
                (double*)alignPtr(buffer, sizeof(double)), DBL_EPSILON*2 );
856
}
857 858

}
859

860 861 862 863
/****************************************************************************************\
*                                 Determinant of the matrix                              *
\****************************************************************************************/

864 865 866 867
#define det2(m)   ((double)m(0,0)*m(1,1) - (double)m(0,1)*m(1,0))
#define det3(m)   (m(0,0)*((double)m(1,1)*m(2,2) - (double)m(1,2)*m(2,1)) -  \
                   m(0,1)*((double)m(1,0)*m(2,2) - (double)m(1,2)*m(2,0)) +  \
                   m(0,2)*((double)m(1,0)*m(2,1) - (double)m(1,1)*m(2,0)))
868

869
double cv::determinant( InputArray _mat )
870
{
871
    Mat mat = _mat.getMat();
872 873 874 875 876
    double result = 0;
    int type = mat.type(), rows = mat.rows;
    size_t step = mat.step;
    const uchar* m = mat.data;

877
    CV_Assert( mat.rows == mat.cols && (type == CV_32F || type == CV_64F));
878 879 880 881

    #define Mf(y, x) ((float*)(m + y*step))[x]
    #define Md(y, x) ((double*)(m + y*step))[x]

V
Vadim Pisarevsky 已提交
882
    if( type == CV_32F )
883
    {
V
Vadim Pisarevsky 已提交
884 885 886 887 888 889
        if( rows == 2 )
            result = det2(Mf);
        else if( rows == 3 )
            result = det3(Mf);
        else if( rows == 1 )
            result = Mf(0,0);
890 891
        else
        {
V
Vadim Pisarevsky 已提交
892 893 894 895
            size_t bufSize = rows*rows*sizeof(float);
            AutoBuffer<uchar> buffer(bufSize);
            Mat a(rows, rows, CV_32F, (uchar*)buffer);
            mat.copyTo(a);
896

V
Vadim Pisarevsky 已提交
897 898
            result = LU((float*)a.data, a.step, rows, 0, 0, 0);
            if( result )
899
            {
V
Vadim Pisarevsky 已提交
900 901 902
                for( int i = 0; i < rows; i++ )
                    result *= ((const float*)(a.data + a.step*i))[i];
                result = 1./result;
903 904 905
            }
        }
    }
906
    else
907
    {
V
Vadim Pisarevsky 已提交
908 909 910 911 912 913 914
        if( rows == 2 )
            result = det2(Md);
        else if( rows == 3 )
            result = det3(Md);
        else if( rows == 1 )
            result = Md(0,0);
        else
915
        {
V
Vadim Pisarevsky 已提交
916 917 918 919
            size_t bufSize = rows*rows*sizeof(double);
            AutoBuffer<uchar> buffer(bufSize);
            Mat a(rows, rows, CV_64F, (uchar*)buffer);
            mat.copyTo(a);
920

V
Vadim Pisarevsky 已提交
921 922
            result = LU((double*)a.data, a.step, rows, 0, 0, 0);
            if( result )
923
            {
V
Vadim Pisarevsky 已提交
924 925 926
                for( int i = 0; i < rows; i++ )
                    result *= ((const double*)(a.data + a.step*i))[i];
                result = 1./result;
927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945
            }
        }
    }

    #undef Mf
    #undef Md

    return result;
}

/****************************************************************************************\
*                          Inverse (or pseudo-inverse) of a matrix                       *
\****************************************************************************************/

#define Sf( y, x ) ((float*)(srcdata + y*srcstep))[x]
#define Sd( y, x ) ((double*)(srcdata + y*srcstep))[x]
#define Df( y, x ) ((float*)(dstdata + y*dststep))[x]
#define Dd( y, x ) ((double*)(dstdata + y*dststep))[x]

946
double cv::invert( InputArray _src, OutputArray _dst, int method )
947
{
V
Vadim Pisarevsky 已提交
948
    bool result = false;
949
    Mat src = _src.getMat();
950
    int type = src.type();
951 952 953

    CV_Assert(type == CV_32F || type == CV_64F);

954 955
    size_t esz = CV_ELEM_SIZE(type);
    int m = src.rows, n = src.cols;
956

957
    if( method == DECOMP_SVD )
958
    {
959
        int nm = std::min(m, n);
960

961 962 963 964 965
        AutoBuffer<uchar> _buf((m*nm + nm + nm*n)*esz + sizeof(double));
        uchar* buf = alignPtr((uchar*)_buf, (int)esz);
        Mat u(m, nm, type, buf);
        Mat w(nm, 1, type, u.data + m*nm*esz);
        Mat vt(nm, n, type, w.data + nm*esz);
966

967 968
        SVD::compute(src, w, u, vt);
        SVD::backSubst(w, u, vt, Mat(), _dst);
969
        return type == CV_32F ?
970 971 972 973
            (((float*)w.data)[0] >= FLT_EPSILON ?
             ((float*)w.data)[n-1]/((float*)w.data)[0] : 0) :
            (((double*)w.data)[0] >= DBL_EPSILON ?
             ((double*)w.data)[n-1]/((double*)w.data)[0] : 0);
974
    }
975

976
    CV_Assert( m == n );
977

978 979 980 981 982 983 984
    if( method == DECOMP_EIG )
    {
        AutoBuffer<uchar> _buf((n*n*2 + n)*esz + sizeof(double));
        uchar* buf = alignPtr((uchar*)_buf, (int)esz);
        Mat u(n, n, type, buf);
        Mat w(n, 1, type, u.data + n*n*esz);
        Mat vt(n, n, type, w.data + n*esz);
985

986 987 988 989 990 991 992 993 994
        eigen(src, w, vt);
        transpose(vt, u);
        SVD::backSubst(w, u, vt, Mat(), _dst);
        return type == CV_32F ?
        (((float*)w.data)[0] >= FLT_EPSILON ?
         ((float*)w.data)[n-1]/((float*)w.data)[0] : 0) :
        (((double*)w.data)[0] >= DBL_EPSILON ?
         ((double*)w.data)[n-1]/((double*)w.data)[0] : 0);
    }
995

996
    CV_Assert( method == DECOMP_LU || method == DECOMP_CHOLESKY );
997

998 999 1000 1001
    _dst.create( n, n, type );
    Mat dst = _dst.getMat();

    if( n <= 3 )
1002
    {
1003 1004 1005 1006
        uchar* srcdata = src.data;
        uchar* dstdata = dst.data;
        size_t srcstep = src.step;
        size_t dststep = dst.step;
1007

1008
        if( n == 2 )
1009 1010
        {
            if( type == CV_32FC1 )
1011
            {
1012
                double d = det2(Sf);
1013
                if( d != 0. )
1014
                {
1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048
                    result = true;
                    d = 1./d;

                    #if CV_SSE2
                        if(USE_SSE2)
                        {
                            __m128 zero = _mm_setzero_ps();
                            __m128 t0 = _mm_loadl_pi(zero, (const __m64*)srcdata); //t0 = sf(0,0) sf(0,1)
                            __m128 t1 = _mm_loadh_pi(zero, (const __m64*)(srcdata+srcstep)); //t1 = sf(1,0) sf(1,1)
                            __m128 s0 = _mm_or_ps(t0, t1);
                            __m128 det =_mm_set1_ps((float)d);
                            s0 =  _mm_mul_ps(s0, det);
                            static const uchar CV_DECL_ALIGNED(16) inv[16] = {0,0,0,0,0,0,0,0x80,0,0,0,0x80,0,0,0,0};
                            __m128 pattern = _mm_load_ps((const float*)inv);
                            s0 = _mm_xor_ps(s0, pattern);//==-1*s0
                            s0 = _mm_shuffle_ps(s0, s0, _MM_SHUFFLE(0,2,1,3));
                            _mm_storel_pi((__m64*)dstdata, s0);
                            _mm_storeh_pi((__m64*)((float*)(dstdata+dststep)), s0);
                        }
                        else
                    #endif
                        {
                        double t0, t1;
                        t0 = Sf(0,0)*d;
                        t1 = Sf(1,1)*d;
                        Df(1,1) = (float)t0;
                        Df(0,0) = (float)t1;
                        t0 = -Sf(0,1)*d;
                        t1 = -Sf(1,0)*d;
                        Df(0,1) = (float)t0;
                        Df(1,0) = (float)t1;
                        }

                }
1049
            }
1050
            else
1051
            {
1052 1053
                double d = det2(Sd);
                if( d != 0. )
1054
                {
1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090
                    result = true;
                    d = 1./d;
                    #if CV_SSE2
                        if(USE_SSE2)
                        {
                            __m128d s0 = _mm_loadu_pd((const double*)srcdata); //s0 = sf(0,0) sf(0,1)
                            __m128d s1 = _mm_loadu_pd ((const double*)(srcdata+srcstep));//s1 = sf(1,0) sf(1,1)
                            __m128d sm = _mm_unpacklo_pd(s0, _mm_load_sd((const double*)(srcdata+srcstep)+1)); //sm = sf(0,0) sf(1,1) - main diagonal
                            __m128d ss = _mm_shuffle_pd(s0, s1, _MM_SHUFFLE2(0,1)); //ss = sf(0,1) sf(1,0) - secondary diagonal
                            __m128d det = _mm_load1_pd((const double*)&d);
                            sm =  _mm_mul_pd(sm, det);

                            static const uchar CV_DECL_ALIGNED(16) inv[8] = {0,0,0,0,0,0,0,0x80};
                            __m128d pattern = _mm_load1_pd((double*)inv);
                            ss = _mm_mul_pd(ss, det);
                            ss = _mm_xor_pd(ss, pattern);//==-1*ss

                            s0 = _mm_shuffle_pd(sm, ss, _MM_SHUFFLE2(0,1));
                            s1 = _mm_shuffle_pd(ss, sm, _MM_SHUFFLE2(0,1));
                            _mm_storeu_pd((double*)dstdata, s0);
                            _mm_storeu_pd((double*)(dstdata+dststep), s1);
                        }
                        else
                    #endif
                        {
                            double t0, t1;
                            t0 = Sd(0,0)*d;
                            t1 = Sd(1,1)*d;
                            Dd(1,1) = t0;
                            Dd(0,0) = t1;
                            t0 = -Sd(0,1)*d;
                            t1 = -Sd(1,0)*d;
                            Dd(0,1) = t0;
                            Dd(1,0) = t1;
                        }
                }
1091 1092
            }
        }
1093
        else if( n == 3 )
1094 1095 1096 1097 1098
        {
            if( type == CV_32FC1 )
            {
                double d = det3(Sf);
                if( d != 0. )
1099
                {
1100
                    float CV_DECL_ALIGNED(16) t[12];
1101

V
Vadim Pisarevsky 已提交
1102
                    result = true;
1103
                    d = 1./d;
1104 1105 1106
                    t[0] = (float)(((double)Sf(1,1) * Sf(2,2) - (double)Sf(1,2) * Sf(2,1)) * d);
                    t[1] = (float)(((double)Sf(0,2) * Sf(2,1) - (double)Sf(0,1) * Sf(2,2)) * d);
                    t[2] = (float)(((double)Sf(0,1) * Sf(1,2) - (double)Sf(0,2) * Sf(1,1)) * d);
1107

1108 1109 1110
                    t[3] = (float)(((double)Sf(1,2) * Sf(2,0) - (double)Sf(1,0) * Sf(2,2)) * d);
                    t[4] = (float)(((double)Sf(0,0) * Sf(2,2) - (double)Sf(0,2) * Sf(2,0)) * d);
                    t[5] = (float)(((double)Sf(0,2) * Sf(1,0) - (double)Sf(0,0) * Sf(1,2)) * d);
1111

1112 1113 1114
                    t[6] = (float)(((double)Sf(1,0) * Sf(2,1) - (double)Sf(1,1) * Sf(2,0)) * d);
                    t[7] = (float)(((double)Sf(0,1) * Sf(2,0) - (double)Sf(0,0) * Sf(2,1)) * d);
                    t[8] = (float)(((double)Sf(0,0) * Sf(1,1) - (double)Sf(0,1) * Sf(1,0)) * d);
1115

1116 1117 1118
                    Df(0,0) = t[0]; Df(0,1) = t[1]; Df(0,2) = t[2];
                    Df(1,0) = t[3]; Df(1,1) = t[4]; Df(1,2) = t[5];
                    Df(2,0) = t[6]; Df(2,1) = t[7]; Df(2,2) = t[8];
1119 1120 1121 1122
                }
            }
            else
            {
1123 1124
                double d = det3(Sd);
                if( d != 0. )
1125
                {
1126 1127
                    result = true;
                    d = 1./d;
1128 1129 1130 1131 1132
                    double t[9];

                    t[0] = (Sd(1,1) * Sd(2,2) - Sd(1,2) * Sd(2,1)) * d;
                    t[1] = (Sd(0,2) * Sd(2,1) - Sd(0,1) * Sd(2,2)) * d;
                    t[2] = (Sd(0,1) * Sd(1,2) - Sd(0,2) * Sd(1,1)) * d;
1133

1134 1135 1136
                    t[3] = (Sd(1,2) * Sd(2,0) - Sd(1,0) * Sd(2,2)) * d;
                    t[4] = (Sd(0,0) * Sd(2,2) - Sd(0,2) * Sd(2,0)) * d;
                    t[5] = (Sd(0,2) * Sd(1,0) - Sd(0,0) * Sd(1,2)) * d;
1137

1138 1139 1140 1141 1142 1143 1144
                    t[6] = (Sd(1,0) * Sd(2,1) - Sd(1,1) * Sd(2,0)) * d;
                    t[7] = (Sd(0,1) * Sd(2,0) - Sd(0,0) * Sd(2,1)) * d;
                    t[8] = (Sd(0,0) * Sd(1,1) - Sd(0,1) * Sd(1,0)) * d;

                    Dd(0,0) = t[0]; Dd(0,1) = t[1]; Dd(0,2) = t[2];
                    Dd(1,0) = t[3]; Dd(1,1) = t[4]; Dd(1,2) = t[5];
                    Dd(2,0) = t[6]; Dd(2,1) = t[7]; Dd(2,2) = t[8];
1145 1146 1147
                }
            }
        }
1148
        else
1149
        {
1150
            assert( n == 1 );
1151

1152
            if( type == CV_32FC1 )
1153
            {
1154 1155 1156
                double d = Sf(0,0);
                if( d != 0. )
                {
V
Vadim Pisarevsky 已提交
1157
                    result = true;
1158 1159
                    Df(0,0) = (float)(1./d);
                }
1160 1161 1162
            }
            else
            {
1163 1164 1165
                double d = Sd(0,0);
                if( d != 0. )
                {
V
Vadim Pisarevsky 已提交
1166
                    result = true;
1167 1168
                    Dd(0,0) = 1./d;
                }
1169
            }
1170
        }
V
Vadim Pisarevsky 已提交
1171 1172
        if( !result )
            dst = Scalar(0);
1173 1174
        return result;
    }
1175

1176
   int elem_size = CV_ELEM_SIZE(type);
V
Vadim Pisarevsky 已提交
1177 1178 1179 1180
    AutoBuffer<uchar> buf(n*n*elem_size);
    Mat src1(n, n, type, (uchar*)buf);
    src.copyTo(src1);
    setIdentity(dst);
1181

V
Vadim Pisarevsky 已提交
1182
    if( method == DECOMP_LU && type == CV_32F )
1183
        result = LU((float*)src1.data, src1.step, n, (float*)dst.data, dst.step, n) != 0;
V
Vadim Pisarevsky 已提交
1184
    else if( method == DECOMP_LU && type == CV_64F )
1185
        result = LU((double*)src1.data, src1.step, n, (double*)dst.data, dst.step, n) != 0;
V
Vadim Pisarevsky 已提交
1186 1187
    else if( method == DECOMP_CHOLESKY && type == CV_32F )
        result = Cholesky((float*)src1.data, src1.step, n, (float*)dst.data, dst.step, n);
1188
    else
V
Vadim Pisarevsky 已提交
1189
        result = Cholesky((double*)src1.data, src1.step, n, (double*)dst.data, dst.step, n);
1190 1191 1192 1193 1194 1195 1196

    if( !result )
        dst = Scalar(0);

    return result;
}

1197

1198

1199 1200 1201 1202
/****************************************************************************************\
*                              Solving a linear system                                   *
\****************************************************************************************/

1203
bool cv::solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int method )
1204 1205
{
    bool result = true;
1206
    Mat src = _src.getMat(), _src2 = _src2arg.getMat();
1207 1208 1209 1210 1211 1212 1213 1214 1215 1216
    int type = src.type();
    bool is_normal = (method & DECOMP_NORMAL) != 0;

    CV_Assert( type == _src2.type() && (type == CV_32F || type == CV_64F) );

    method &= ~DECOMP_NORMAL;
    CV_Assert( (method != DECOMP_LU && method != DECOMP_CHOLESKY) ||
        is_normal || src.rows == src.cols );

    // check case of a single equation and small matrix
V
Vadim Pisarevsky 已提交
1217
    if( (method == DECOMP_LU || method == DECOMP_CHOLESKY) && !is_normal &&
1218 1219
        src.rows <= 3 && src.rows == src.cols && _src2.cols == 1 )
    {
1220 1221
        _dst.create( src.cols, _src2.cols, src.type() );
        Mat dst = _dst.getMat();
1222

1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239
        #define bf(y) ((float*)(bdata + y*src2step))[0]
        #define bd(y) ((double*)(bdata + y*src2step))[0]

        uchar* srcdata = src.data;
        uchar* bdata = _src2.data;
        uchar* dstdata = dst.data;
        size_t srcstep = src.step;
        size_t src2step = _src2.step;
        size_t dststep = dst.step;

        if( src.rows == 2 )
        {
            if( type == CV_32FC1 )
            {
                double d = det2(Sf);
                if( d != 0. )
                {
1240
                    double t;
1241
                    d = 1./d;
1242 1243
                    t = (float)(((double)bf(0)*Sf(1,1) - (double)bf(1)*Sf(0,1))*d);
                    Df(1,0) = (float)(((double)bf(1)*Sf(0,0) - (double)bf(0)*Sf(1,0))*d);
1244
                    Df(0,0) = (float)t;
1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274
                }
                else
                    result = false;
            }
            else
            {
                double d = det2(Sd);
                if( d != 0. )
                {
                    double t;
                    d = 1./d;
                    t = (bd(0)*Sd(1,1) - bd(1)*Sd(0,1))*d;
                    Dd(1,0) = (bd(1)*Sd(0,0) - bd(0)*Sd(1,0))*d;
                    Dd(0,0) = t;
                }
                else
                    result = false;
            }
        }
        else if( src.rows == 3 )
        {
            if( type == CV_32FC1 )
            {
                double d = det3(Sf);
                if( d != 0. )
                {
                    float t[3];
                    d = 1./d;

                    t[0] = (float)(d*
1275 1276 1277
                           (bf(0)*((double)Sf(1,1)*Sf(2,2) - (double)Sf(1,2)*Sf(2,1)) -
                            Sf(0,1)*((double)bf(1)*Sf(2,2) - (double)Sf(1,2)*bf(2)) +
                            Sf(0,2)*((double)bf(1)*Sf(2,1) - (double)Sf(1,1)*bf(2))));
1278 1279

                    t[1] = (float)(d*
1280 1281 1282
                           (Sf(0,0)*(double)(bf(1)*Sf(2,2) - (double)Sf(1,2)*bf(2)) -
                            bf(0)*((double)Sf(1,0)*Sf(2,2) - (double)Sf(1,2)*Sf(2,0)) +
                            Sf(0,2)*((double)Sf(1,0)*bf(2) - (double)bf(1)*Sf(2,0))));
1283 1284

                    t[2] = (float)(d*
1285 1286 1287
                           (Sf(0,0)*((double)Sf(1,1)*bf(2) - (double)bf(1)*Sf(2,1)) -
                            Sf(0,1)*((double)Sf(1,0)*bf(2) - (double)bf(1)*Sf(2,0)) +
                            bf(0)*((double)Sf(1,0)*Sf(2,1) - (double)Sf(1,1)*Sf(2,0))));
1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303

                    Df(0,0) = t[0];
                    Df(1,0) = t[1];
                    Df(2,0) = t[2];
                }
                else
                    result = false;
            }
            else
            {
                double d = det3(Sd);
                if( d != 0. )
                {
                    double t[9];

                    d = 1./d;
1304

1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330
                    t[0] = ((Sd(1,1) * Sd(2,2) - Sd(1,2) * Sd(2,1))*bd(0) +
                            (Sd(0,2) * Sd(2,1) - Sd(0,1) * Sd(2,2))*bd(1) +
                            (Sd(0,1) * Sd(1,2) - Sd(0,2) * Sd(1,1))*bd(2))*d;

                    t[1] = ((Sd(1,2) * Sd(2,0) - Sd(1,0) * Sd(2,2))*bd(0) +
                            (Sd(0,0) * Sd(2,2) - Sd(0,2) * Sd(2,0))*bd(1) +
                            (Sd(0,2) * Sd(1,0) - Sd(0,0) * Sd(1,2))*bd(2))*d;

                    t[2] = ((Sd(1,0) * Sd(2,1) - Sd(1,1) * Sd(2,0))*bd(0) +
                            (Sd(0,1) * Sd(2,0) - Sd(0,0) * Sd(2,1))*bd(1) +
                            (Sd(0,0) * Sd(1,1) - Sd(0,1) * Sd(1,0))*bd(2))*d;

                    Dd(0,0) = t[0];
                    Dd(1,0) = t[1];
                    Dd(2,0) = t[2];
                }
                else
                    result = false;
            }
        }
        else
        {
            assert( src.rows == 1 );

            if( type == CV_32FC1 )
            {
V
Vadim Pisarevsky 已提交
1331 1332 1333 1334 1335
                double d = Sf(0,0);
                if( d != 0. )
                    Df(0,0) = (float)(bf(0)/d);
                else
                    result = false;
1336
            }
V
Vadim Pisarevsky 已提交
1337
            else
1338
            {
V
Vadim Pisarevsky 已提交
1339 1340 1341 1342 1343
                double d = Sd(0,0);
                if( d != 0. )
                    Dd(0,0) = (bd(0)/d);
                else
                    result = false;
1344 1345
            }
        }
V
Vadim Pisarevsky 已提交
1346
        return result;
1347
    }
V
Vadim Pisarevsky 已提交
1348 1349 1350

    if( method == DECOMP_QR )
        method = DECOMP_SVD;
1351

V
Vadim Pisarevsky 已提交
1352 1353 1354 1355 1356 1357 1358 1359 1360
    int m = src.rows, m_ = m, n = src.cols, nb = _src2.cols;
    size_t esz = CV_ELEM_SIZE(type), bufsize = 0;
    size_t vstep = alignSize(n*esz, 16);
    size_t astep = method == DECOMP_SVD && !is_normal ? alignSize(m*esz, 16) : vstep;
    AutoBuffer<uchar> buffer;

    Mat src2 = _src2;
    _dst.create( src.cols, src2.cols, src.type() );
    Mat dst = _dst.getMat();
1361

V
Vadim Pisarevsky 已提交
1362 1363
    if( m < n )
        CV_Error(CV_StsBadArg, "The function can not solve under-determined linear systems" );
1364

V
Vadim Pisarevsky 已提交
1365 1366 1367 1368 1369 1370 1371 1372
    if( m == n )
        is_normal = false;
    else if( is_normal )
    {
        m_ = n;
        if( method == DECOMP_SVD )
            method = DECOMP_EIG;
    }
1373

V
Vadim Pisarevsky 已提交
1374
    size_t asize = astep*(method == DECOMP_SVD || is_normal ? n : m);
1375
    bufsize += asize + 32;
1376

V
Vadim Pisarevsky 已提交
1377 1378
    if( is_normal )
        bufsize += n*nb*esz;
1379

V
Vadim Pisarevsky 已提交
1380 1381
    if( method == DECOMP_SVD || method == DECOMP_EIG )
        bufsize += n*5*esz + n*vstep + nb*sizeof(double) + 32;
1382

V
Vadim Pisarevsky 已提交
1383
    buffer.allocate(bufsize);
1384
    uchar* ptr = alignPtr((uchar*)buffer, 16);
V
Vadim Pisarevsky 已提交
1385 1386

    Mat a(m_, n, type, ptr, astep);
1387

V
Vadim Pisarevsky 已提交
1388 1389 1390 1391 1392 1393 1394 1395 1396 1397
    if( is_normal )
        mulTransposed(src, a, true);
    else if( method != DECOMP_SVD )
        src.copyTo(a);
    else
    {
        a = Mat(n, m_, type, ptr, astep);
        transpose(src, a);
    }
    ptr += asize;
1398

V
Vadim Pisarevsky 已提交
1399
    if( !is_normal )
1400
    {
V
Vadim Pisarevsky 已提交
1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415
        if( method == DECOMP_LU || method == DECOMP_CHOLESKY )
            src2.copyTo(dst);
    }
    else
    {
        // a'*b
        if( method == DECOMP_LU || method == DECOMP_CHOLESKY )
            gemm( src, src2, 1, Mat(), 0, dst, GEMM_1_T );
        else
        {
            Mat tmp(n, nb, type, ptr);
            ptr += n*nb*esz;
            gemm( src, src2, 1, Mat(), 0, tmp, GEMM_1_T );
            src2 = tmp;
        }
1416
    }
1417

V
Vadim Pisarevsky 已提交
1418
    if( method == DECOMP_LU )
1419 1420
    {
        if( type == CV_32F )
1421
            result = LU(a.ptr<float>(), a.step, n, dst.ptr<float>(), dst.step, nb) != 0;
1422
        else
1423
            result = LU(a.ptr<double>(), a.step, n, dst.ptr<double>(), dst.step, nb) != 0;
1424
    }
V
Vadim Pisarevsky 已提交
1425
    else if( method == DECOMP_CHOLESKY )
1426
    {
V
Vadim Pisarevsky 已提交
1427 1428
        if( type == CV_32F )
            result = Cholesky(a.ptr<float>(), a.step, n, dst.ptr<float>(), dst.step, nb);
1429
        else
V
Vadim Pisarevsky 已提交
1430
            result = Cholesky(a.ptr<double>(), a.step, n, dst.ptr<double>(), dst.step, nb);
1431 1432 1433
    }
    else
    {
V
Vadim Pisarevsky 已提交
1434 1435 1436
        ptr = alignPtr(ptr, 16);
        Mat v(n, n, type, ptr, vstep), w(n, 1, type, ptr + vstep*n), u;
        ptr += n*(vstep + esz);
1437

V
Vadim Pisarevsky 已提交
1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453
        if( method == DECOMP_EIG )
        {
            if( type == CV_32F )
                Jacobi(a.ptr<float>(), a.step, w.ptr<float>(), v.ptr<float>(), v.step, n, ptr);
            else
                Jacobi(a.ptr<double>(), a.step, w.ptr<double>(), v.ptr<double>(), v.step, n, ptr);
            u = v;
        }
        else
        {
            if( type == CV_32F )
                JacobiSVD(a.ptr<float>(), a.step, w.ptr<float>(), v.ptr<float>(), v.step, m_, n);
            else
                JacobiSVD(a.ptr<double>(), a.step, w.ptr<double>(), v.ptr<double>(), v.step, m_, n);
            u = a;
        }
1454

V
Vadim Pisarevsky 已提交
1455 1456 1457 1458 1459 1460
        if( type == CV_32F )
        {
            SVBkSb(m_, n, w.ptr<float>(), 0, u.ptr<float>(), u.step, true,
                   v.ptr<float>(), v.step, true, src2.ptr<float>(),
                   src2.step, nb, dst.ptr<float>(), dst.step, ptr);
        }
1461
        else
V
Vadim Pisarevsky 已提交
1462 1463 1464 1465 1466 1467 1468
        {
            SVBkSb(m_, n, w.ptr<double>(), 0, u.ptr<double>(), u.step, true,
                   v.ptr<double>(), v.step, true, src2.ptr<double>(),
                   src2.step, nb, dst.ptr<double>(), dst.step, ptr);
        }
        result = true;
    }
1469

V
Vadim Pisarevsky 已提交
1470 1471
    if( !result )
        dst = Scalar(0);
1472

V
Vadim Pisarevsky 已提交
1473 1474
    return result;
}
1475 1476


V
Vadim Pisarevsky 已提交
1477
/////////////////// finding eigenvalues and eigenvectors of a symmetric matrix ///////////////
1478

1479
bool cv::eigen( InputArray _src, bool computeEvects, OutputArray _evals, OutputArray _evects )
V
Vadim Pisarevsky 已提交
1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493
{
    Mat src = _src.getMat();
    int type = src.type();
    int n = src.rows;

    CV_Assert( src.rows == src.cols );
    CV_Assert (type == CV_32F || type == CV_64F);

    Mat v;
    if( computeEvects )
    {
        _evects.create(n, n, type);
        v = _evects.getMat();
    }
1494

1495 1496 1497 1498 1499
    size_t elemSize = src.elemSize(), astep = alignSize(n*elemSize, 16);
    AutoBuffer<uchar> buf(n*astep + n*5*elemSize + 32);
    uchar* ptr = alignPtr((uchar*)buf, 16);
    Mat a(n, n, type, ptr, astep), w(n, 1, type, ptr + astep*n);
    ptr += astep*n + elemSize*n;
V
Vadim Pisarevsky 已提交
1500 1501 1502 1503
    src.copyTo(a);
    bool ok = type == CV_32F ?
        Jacobi(a.ptr<float>(), a.step, w.ptr<float>(), v.ptr<float>(), v.step, n, ptr) :
        Jacobi(a.ptr<double>(), a.step, w.ptr<double>(), v.ptr<double>(), v.step, n, ptr);
1504

V
Vadim Pisarevsky 已提交
1505 1506
    w.copyTo(_evals);
    return ok;
1507 1508
}

1509
bool cv::eigen( InputArray src, OutputArray evals, int, int )
1510
{
1511
    return eigen(src, false, evals, noArray());
1512 1513
}

1514
bool cv::eigen( InputArray src, OutputArray evals, OutputArray evects, int, int)
1515
{
1516
    return eigen(src, true, evals, evects);
1517 1518
}

1519 1520
namespace cv
{
1521

1522
static void _SVDcompute( InputArray _aarr, OutputArray _w,
1523
                         OutputArray _u, OutputArray _vt, int flags )
1524
{
V
Vadim Pisarevsky 已提交
1525 1526 1527
    Mat src = _aarr.getMat();
    int m = src.rows, n = src.cols;
    int type = src.type();
1528
    bool compute_uv = _u.needed() || _vt.needed();
1529
    bool full_uv = (flags & SVD::FULL_UV) != 0;
1530

V
Vadim Pisarevsky 已提交
1531
    CV_Assert( type == CV_32F || type == CV_64F );
1532

1533
    if( flags & SVD::NO_UV )
1534
    {
1535 1536
        _u.release();
        _vt.release();
V
Vadim Pisarevsky 已提交
1537
        compute_uv = full_uv = false;
1538
    }
1539

V
Vadim Pisarevsky 已提交
1540 1541
    bool at = false;
    if( m < n )
1542
    {
V
Vadim Pisarevsky 已提交
1543 1544
        std::swap(m, n);
        at = true;
1545
    }
1546

V
Vadim Pisarevsky 已提交
1547 1548 1549
    int urows = full_uv ? m : n;
    size_t esz = src.elemSize(), astep = alignSize(m*esz, 16), vstep = alignSize(n*esz, 16);
    AutoBuffer<uchar> _buf(urows*astep + n*vstep + n*esz + 32);
1550
    uchar* buf = alignPtr((uchar*)_buf, 16);
V
Vadim Pisarevsky 已提交
1551 1552 1553
    Mat temp_a(n, m, type, buf, astep);
    Mat temp_w(n, 1, type, buf + urows*astep);
    Mat temp_u(urows, m, type, buf, astep), temp_v;
1554

V
Vadim Pisarevsky 已提交
1555 1556
    if( compute_uv )
        temp_v = Mat(n, n, type, alignPtr(buf + urows*astep + n*esz, 16), vstep);
1557

V
Vadim Pisarevsky 已提交
1558 1559
    if( urows > n )
        temp_u = Scalar::all(0);
1560

V
Vadim Pisarevsky 已提交
1561 1562 1563 1564
    if( !at )
        transpose(src, temp_a);
    else
        src.copyTo(temp_a);
1565

1566 1567
    if( type == CV_32F )
    {
V
Vadim Pisarevsky 已提交
1568
        JacobiSVD(temp_a.ptr<float>(), temp_u.step, temp_w.ptr<float>(),
V
Vadim Pisarevsky 已提交
1569
              temp_v.ptr<float>(), temp_v.step, m, n, compute_uv ? urows : 0);
1570 1571 1572
    }
    else
    {
V
Vadim Pisarevsky 已提交
1573
        JacobiSVD(temp_a.ptr<double>(), temp_u.step, temp_w.ptr<double>(),
V
Vadim Pisarevsky 已提交
1574
              temp_v.ptr<double>(), temp_v.step, m, n, compute_uv ? urows : 0);
1575
    }
V
Vadim Pisarevsky 已提交
1576 1577
    temp_w.copyTo(_w);
    if( compute_uv )
1578
    {
V
Vadim Pisarevsky 已提交
1579
        if( !at )
1580
        {
V
Vadim Pisarevsky 已提交
1581 1582
            transpose(temp_u, _u);
            temp_v.copyTo(_vt);
1583
        }
V
Vadim Pisarevsky 已提交
1584
        else
1585
        {
V
Vadim Pisarevsky 已提交
1586 1587
            transpose(temp_v, _u);
            temp_u.copyTo(_vt);
1588 1589
        }
    }
1590 1591 1592
}


1593
void SVD::compute( InputArray a, OutputArray w, OutputArray u, OutputArray vt, int flags )
1594
{
1595
    _SVDcompute(a, w, u, vt, flags);
1596 1597
}

1598
void SVD::compute( InputArray a, OutputArray w, int flags )
1599
{
1600
    _SVDcompute(a, w, noArray(), noArray(), flags);
1601
}
1602

1603 1604
void SVD::backSubst( InputArray _w, InputArray _u, InputArray _vt,
                     InputArray _rhs, OutputArray _dst )
1605
{
1606
    Mat w = _w.getMat(), u = _u.getMat(), vt = _vt.getMat(), rhs = _rhs.getMat();
1607
    int type = w.type(), esz = (int)w.elemSize();
V
Vadim Pisarevsky 已提交
1608
    int m = u.rows, n = vt.cols, nb = rhs.data ? rhs.cols : m, nm = std::min(m, n);
1609
    size_t wstep = w.rows == 1 ? (size_t)esz : w.cols == 1 ? (size_t)w.step : (size_t)w.step + esz;
V
Vadim Pisarevsky 已提交
1610 1611 1612 1613
    AutoBuffer<uchar> buffer(nb*sizeof(double) + 16);
    CV_Assert( w.type() == u.type() && u.type() == vt.type() && u.data && vt.data && w.data );
    CV_Assert( u.cols >= nm && vt.rows >= nm &&
              (w.size() == Size(nm, 1) || w.size() == Size(1, nm) || w.size() == Size(vt.rows, u.cols)) );
V
Vadim Pisarevsky 已提交
1614
    CV_Assert( rhs.data == 0 || (rhs.type() == type && rhs.rows == m) );
1615

1616 1617
    _dst.create( n, nb, type );
    Mat dst = _dst.getMat();
1618
    if( type == CV_32F )
V
Vadim Pisarevsky 已提交
1619 1620 1621
        SVBkSb(m, n, w.ptr<float>(), wstep, u.ptr<float>(), u.step, false,
               vt.ptr<float>(), vt.step, true, rhs.ptr<float>(), rhs.step, nb,
               dst.ptr<float>(), dst.step, buffer);
1622
    else if( type == CV_64F )
V
Vadim Pisarevsky 已提交
1623 1624 1625
        SVBkSb(m, n, w.ptr<double>(), wstep, u.ptr<double>(), u.step, false,
               vt.ptr<double>(), vt.step, true, rhs.ptr<double>(), rhs.step, nb,
               dst.ptr<double>(), dst.step, buffer);
1626 1627 1628 1629
    else
        CV_Error( CV_StsUnsupportedFormat, "" );
}

1630

1631
SVD& SVD::operator ()(InputArray a, int flags)
1632
{
1633
    _SVDcompute(a, w, u, vt, flags);
1634 1635 1636 1637
    return *this;
}


1638
void SVD::backSubst( InputArray rhs, OutputArray dst ) const
1639 1640 1641 1642
{
    backSubst( w, u, vt, rhs, dst );
}

1643 1644 1645
}


1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656
void cv::SVDecomp(InputArray src, OutputArray w, OutputArray u, OutputArray vt, int flags)
{
    SVD::compute(src, w, u, vt, flags);
}

void cv::SVBackSubst(InputArray w, InputArray u, InputArray vt, InputArray rhs, OutputArray dst)
{
    SVD::backSubst(w, u, vt, rhs, dst);
}


1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698
CV_IMPL double
cvDet( const CvArr* arr )
{
    if( CV_IS_MAT(arr) && ((CvMat*)arr)->rows <= 3 )
    {
        CvMat* mat = (CvMat*)arr;
        int type = CV_MAT_TYPE(mat->type);
        int rows = mat->rows;
        uchar* m = mat->data.ptr;
        int step = mat->step;
        CV_Assert( rows == mat->cols );

        #define Mf(y, x) ((float*)(m + y*step))[x]
        #define Md(y, x) ((double*)(m + y*step))[x]

        if( type == CV_32F )
        {
            if( rows == 2 )
                return det2(Mf);
            if( rows == 3 )
                return det3(Mf);
        }
        else if( type == CV_64F )
        {
            if( rows == 2 )
                return det2(Md);
            if( rows == 3 )
                return det3(Md);
        }
        return cv::determinant(cv::Mat(mat));
    }
    return cv::determinant(cv::cvarrToMat(arr));
}


CV_IMPL double
cvInvert( const CvArr* srcarr, CvArr* dstarr, int method )
{
    cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);

    CV_Assert( src.type() == dst.type() && src.rows == dst.cols && src.cols == dst.rows );
    return cv::invert( src, dst, method == CV_CHOLESKY ? cv::DECOMP_CHOLESKY :
1699 1700
                      method == CV_SVD ? cv::DECOMP_SVD :
                      method == CV_SVD_SYM ? cv::DECOMP_EIG : cv::DECOMP_LU );
1701 1702 1703 1704 1705 1706 1707 1708 1709
}


CV_IMPL int
cvSolve( const CvArr* Aarr, const CvArr* barr, CvArr* xarr, int method )
{
    cv::Mat A = cv::cvarrToMat(Aarr), b = cv::cvarrToMat(barr), x = cv::cvarrToMat(xarr);

    CV_Assert( A.type() == x.type() && A.cols == x.rows && x.cols == b.cols );
V
Vadim Pisarevsky 已提交
1710 1711 1712
    bool is_normal = (method & CV_NORMAL) != 0;
    method &= ~CV_NORMAL;
    return cv::solve( A, b, x, (method == CV_CHOLESKY ? cv::DECOMP_CHOLESKY :
1713 1714
                                method == CV_SVD ? cv::DECOMP_SVD :
                                method == CV_SVD_SYM ? cv::DECOMP_EIG :
V
Vadim Pisarevsky 已提交
1715
        A.rows > A.cols ? cv::DECOMP_QR : cv::DECOMP_LU) + (is_normal ? cv::DECOMP_NORMAL : 0) );
1716 1717 1718 1719 1720 1721 1722
}


CV_IMPL void
cvEigenVV( CvArr* srcarr, CvArr* evectsarr, CvArr* evalsarr, double,
           int lowindex, int highindex)
{
1723
    cv::Mat src = cv::cvarrToMat(srcarr), evals0 = cv::cvarrToMat(evalsarr), evals = evals0;
1724 1725
    if( evectsarr )
    {
1726
        cv::Mat evects0 = cv::cvarrToMat(evectsarr), evects = evects0;
1727
        eigen(src, evals, evects, lowindex, highindex);
1728 1729 1730 1731 1732 1733
        if( evects0.data != evects.data )
        {
            uchar* p = evects0.data;
            evects.convertTo(evects0, evects0.type());
            CV_Assert( p == evects0.data );
        }
1734 1735 1736
    }
    else
        eigen(src, evals, lowindex, highindex);
1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747
    if( evals0.data != evals.data )
    {
        uchar* p = evals0.data;
        if( evals0.size() == evals.size() )
            evals.convertTo(evals0, evals0.type());
        else if( evals0.type() == evals.type() )
            cv::transpose(evals, evals0);
        else
            cv::Mat(evals.t()).convertTo(evals0, evals0.type());
        CV_Assert( p == evals0.data );
    }
1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828
}


CV_IMPL void
cvSVD( CvArr* aarr, CvArr* warr, CvArr* uarr, CvArr* varr, int flags )
{
    cv::Mat a = cv::cvarrToMat(aarr), w = cv::cvarrToMat(warr), u, v;
    int m = a.rows, n = a.cols, type = a.type(), mn = std::max(m, n), nm = std::min(m, n);

    CV_Assert( w.type() == type &&
        (w.size() == cv::Size(nm,1) || w.size() == cv::Size(1, nm) ||
        w.size() == cv::Size(nm, nm) || w.size() == cv::Size(n, m)) );

    cv::SVD svd;

    if( w.size() == cv::Size(nm, 1) )
        svd.w = cv::Mat(nm, 1, type, w.data );
    else if( w.isContinuous() )
        svd.w = w;

    if( uarr )
    {
        u = cv::cvarrToMat(uarr);
        CV_Assert( u.type() == type );
        svd.u = u;
    }

    if( varr )
    {
        v = cv::cvarrToMat(varr);
        CV_Assert( v.type() == type );
        svd.vt = v;
    }

    svd(a, ((flags & CV_SVD_MODIFY_A) ? cv::SVD::MODIFY_A : 0) |
        ((!svd.u.data && !svd.vt.data) ? cv::SVD::NO_UV : 0) |
        ((m != n && (svd.u.size() == cv::Size(mn, mn) ||
        svd.vt.size() == cv::Size(mn, mn))) ? cv::SVD::FULL_UV : 0));

    if( u.data )
    {
        if( flags & CV_SVD_U_T )
            cv::transpose( svd.u, u );
        else if( u.data != svd.u.data )
        {
            CV_Assert( u.size() == svd.u.size() );
            svd.u.copyTo(u);
        }
    }

    if( v.data )
    {
        if( !(flags & CV_SVD_V_T) )
            cv::transpose( svd.vt, v );
        else if( v.data != svd.vt.data )
        {
            CV_Assert( v.size() == svd.vt.size() );
            svd.vt.copyTo(v);
        }
    }

    if( w.data != svd.w.data )
    {
        if( w.size() == svd.w.size() )
            svd.w.copyTo(w);
        else
        {
            w = cv::Scalar(0);
            cv::Mat wd = w.diag();
            svd.w.copyTo(wd);
        }
    }
}


CV_IMPL void
cvSVBkSb( const CvArr* warr, const CvArr* uarr,
          const CvArr* varr, const CvArr* rhsarr,
          CvArr* dstarr, int flags )
{
    cv::Mat w = cv::cvarrToMat(warr), u = cv::cvarrToMat(uarr),
V
Vadim Pisarevsky 已提交
1829 1830 1831
        v = cv::cvarrToMat(varr), rhs,
        dst = cv::cvarrToMat(dstarr), dst0 = dst;
    if( flags & CV_SVD_U_T )
1832
    {
V
Vadim Pisarevsky 已提交
1833 1834 1835
        cv::Mat tmp;
        transpose(u, tmp);
        u = tmp;
1836
    }
V
Vadim Pisarevsky 已提交
1837 1838 1839 1840 1841 1842 1843 1844
    if( !(flags & CV_SVD_V_T) )
    {
        cv::Mat tmp;
        transpose(v, tmp);
        v = tmp;
    }
    if( rhsarr )
        rhs = cv::cvarrToMat(rhsarr);
1845

V
Vadim Pisarevsky 已提交
1846 1847
    cv::SVD::backSubst(w, u, v, rhs, dst);
    CV_Assert( dst.data == dst0.data );
1848
}