diff --git a/doc/opencv.bib b/doc/opencv.bib index dea93f8b1f2a1a50ae1f450601ea8371687d8024..29a2ae4512f8e1d7e1d05e69c6df9dc75cf81d0b 100644 --- a/doc/opencv.bib +++ b/doc/opencv.bib @@ -457,6 +457,11 @@ title = {ROF and TV-L1 denoising with Primal-Dual algorithm}, url = {http://znah.net/rof-and-tv-l1-denoising-with-primal-dual-algorithm.html} } +@MISC{VandLec, + author = {Vandenberghe, Lieven}, + title = {QR Factorization}, + url = {http://www.seas.ucla.edu/~vandenbe/133A/lectures/qr.pdf} +} @ARTICLE{MHT2011, author = {Getreuer, Pascal}, title = {Malvar-He-Cutler Linear Image Demosaicking}, diff --git a/modules/calib3d/include/opencv2/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d.hpp index 654cc4d3bcbd52a012bfd9f2f23c98b8e287bb56..fcca409e55849173a0c48204c4ae05b18e0997e0 100644 --- a/modules/calib3d/include/opencv2/calib3d.hpp +++ b/modules/calib3d/include/opencv2/calib3d.hpp @@ -263,6 +263,7 @@ enum { CALIB_USE_INTRINSIC_GUESS = 0x00001, CALIB_FIX_S1_S2_S3_S4 = 0x10000, CALIB_TILTED_MODEL = 0x40000, CALIB_FIX_TAUX_TAUY = 0x80000, + CALIB_USE_QR = 0x100000, //!< use QR instead of SVD decomposition for solving. Faster but potentially less precise // only for stereo CALIB_FIX_INTRINSIC = 0x00100, CALIB_SAME_FOCAL_LENGTH = 0x00200, diff --git a/modules/calib3d/src/calibration.cpp b/modules/calib3d/src/calibration.cpp index fae4f601d7458abee8345654411491ccfd631980..0abbee65e0740a3e275321dbd523dc4143676694 100644 --- a/modules/calib3d/src/calibration.cpp +++ b/modules/calib3d/src/calibration.cpp @@ -1485,6 +1485,9 @@ static double cvCalibrateCamera2Internal( const CvMat* objectPoints, if(flags & CALIB_USE_LU) { solver.solveMethod = DECOMP_LU; } + else if(flags & CALIB_USE_QR) { + solver.solveMethod = DECOMP_QR; + } { double* param = solver.param->data.db; diff --git a/modules/core/include/opencv2/core/hal/hal.hpp b/modules/core/include/opencv2/core/hal/hal.hpp index f254b585829542b1f2939c1e896371ecb5ba0ec1..63d766585beef705c1a50aa789ecc29f9aac00c1 100644 --- a/modules/core/include/opencv2/core/hal/hal.hpp +++ b/modules/core/include/opencv2/core/hal/hal.hpp @@ -66,6 +66,8 @@ CV_EXPORTS bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bste CV_EXPORTS bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n); CV_EXPORTS void SVD32f(float* At, size_t astep, float* W, float* U, size_t ustep, float* Vt, size_t vstep, int m, int n, int flags); CV_EXPORTS void SVD64f(double* At, size_t astep, double* W, double* U, size_t ustep, double* Vt, size_t vstep, int m, int n, int flags); +CV_EXPORTS int QR32f(float* A, size_t astep, int m, int n, int k, float* b, size_t bstep, float* hFactors); +CV_EXPORTS int QR64f(double* A, size_t astep, int m, int n, int k, double* b, size_t bstep, double* hFactors); CV_EXPORTS void gemm32f(const float* src1, size_t src1_step, const float* src2, size_t src2_step, float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step, diff --git a/modules/core/src/hal_internal.cpp b/modules/core/src/hal_internal.cpp index 5b83522eedef7b8c646a578eb7673e0f76b925fc..0c6b7c01f8b7ae09e16dcefd5adcabfa874d1360 100644 --- a/modules/core/src/hal_internal.cpp +++ b/modules/core/src/hal_internal.cpp @@ -61,10 +61,12 @@ #include #include #include +#include #define HAL_GEMM_SMALL_COMPLEX_MATRIX_THRESH 100 #define HAL_GEMM_SMALL_MATRIX_THRESH 100 #define HAL_SVD_SMALL_MATRIX_THRESH 25 +#define HAL_QR_SMALL_MATRIX_THRESH 30 #define HAL_LU_SMALL_MATRIX_THRESH 100 #define HAL_CHOLESKY_SMALL_MATRIX_THRESH 100 @@ -259,6 +261,106 @@ lapack_SVD(fptype* a, size_t a_step, fptype *w, fptype* u, size_t u_step, fptype return CV_HAL_ERROR_OK; } +template static inline int +lapack_QR(fptype* a, size_t a_step, int m, int n, int k, fptype* b, size_t b_step, fptype* dst, int* info) +{ + int lda = a_step / sizeof(fptype); + char mode[] = { 'N', '\0' }; + if(m < n) + return CV_HAL_ERROR_NOT_IMPLEMENTED; + + std::vector tmpAMemHolder; + fptype* tmpA; + int ldtmpA; + if (m == n) + { + transpose_square_inplace(a, lda, m); + tmpA = a; + ldtmpA = lda; + } + else + { + tmpAMemHolder.resize(m*n); + tmpA = &tmpAMemHolder.front(); + ldtmpA = m; + transpose(a, lda, tmpA, m, m, n); + } + + int lwork = -1; + fptype work1 = 0.; + + if (b) + { + if (k == 1 && b_step == sizeof(fptype)) + { + if (typeid(fptype) == typeid(float)) + sgels_(mode, &m, &n, &k, (float*)tmpA, &ldtmpA, (float*)b, &m, (float*)&work1, &lwork, info); + else if (typeid(fptype) == typeid(double)) + dgels_(mode, &m, &n, &k, (double*)tmpA, &ldtmpA, (double*)b, &m, (double*)&work1, &lwork, info); + + lwork = (int)round(work1); //optimal buffer size + std::vector workBufMemHolder(lwork + 1); + fptype* buffer = &workBufMemHolder.front(); + + if (typeid(fptype) == typeid(float)) + sgels_(mode, &m, &n, &k, (float*)tmpA, &ldtmpA, (float*)b, &m, (float*)buffer, &lwork, info); + else if (typeid(fptype) == typeid(double)) + dgels_(mode, &m, &n, &k, (double*)tmpA, &ldtmpA, (double*)b, &m, (double*)buffer, &lwork, info); + } + else + { + std::vector tmpBMemHolder(m*k); + fptype* tmpB = &tmpBMemHolder.front(); + int ldb = b_step / sizeof(fptype); + transpose(b, ldb, tmpB, m, m, k); + + if (typeid(fptype) == typeid(float)) + sgels_(mode, &m, &n, &k, (float*)tmpA, &ldtmpA, (float*)tmpB, &m, (float*)&work1, &lwork, info); + else if (typeid(fptype) == typeid(double)) + dgels_(mode, &m, &n, &k, (double*)tmpA, &ldtmpA, (double*)tmpB, &m, (double*)&work1, &lwork, info); + + lwork = (int)round(work1); //optimal buffer size + std::vector workBufMemHolder(lwork + 1); + fptype* buffer = &workBufMemHolder.front(); + + if (typeid(fptype) == typeid(float)) + sgels_(mode, &m, &n, &k, (float*)tmpA, &ldtmpA, (float*)tmpB, &m, (float*)buffer, &lwork, info); + else if (typeid(fptype) == typeid(double)) + dgels_(mode, &m, &n, &k, (double*)tmpA, &ldtmpA, (double*)tmpB, &m, (double*)buffer, &lwork, info); + + transpose(tmpB, m, b, ldb, k, m); + } + } + else + { + if (typeid(fptype) == typeid(float)) + sgeqrf_(&m, &n, (float*)tmpA, &ldtmpA, (float*)dst, (float*)&work1, &lwork, info); + else if (typeid(fptype) == typeid(double)) + dgeqrf_(&m, &n, (double*)tmpA, &ldtmpA, (double*)dst, (double*)&work1, &lwork, info); + + lwork = (int)round(work1); //optimal buffer size + std::vector workBufMemHolder(lwork + 1); + fptype* buffer = &workBufMemHolder.front(); + + if (typeid(fptype) == typeid(float)) + sgeqrf_(&m, &n, (float*)tmpA, &ldtmpA, (float*)dst, (float*)buffer, &lwork, info); + else if (typeid(fptype) == typeid(double)) + dgeqrf_(&m, &n, (double*)tmpA, &ldtmpA, (double*)dst, (double*)buffer, &lwork, info); + } + + if (m == n) + transpose_square_inplace(a, lda, m); + else + transpose(tmpA, m, a, lda, n, m); + + if (*info != 0) + *info = 0; + else + *info = 1; + + return CV_HAL_ERROR_OK; +} + template static inline int lapack_gemm(const fptype *src1, size_t src1_step, const fptype *src2, size_t src2_step, fptype alpha, const fptype *src3, size_t src3_step, fptype beta, fptype *dst, size_t dst_step, int a_m, int a_n, int d_n, int flags) @@ -459,6 +561,20 @@ int lapack_SVD64f(double* a, size_t a_step, double *w, double* u, size_t u_step, return lapack_SVD(a, a_step, w, u, u_step, vt, v_step, m, n, flags, &info); } +int lapack_QR32f(float* src1, size_t src1_step, int m, int n, int k, float* src2, size_t src2_step, float* dst, int* info) +{ + if (m < HAL_QR_SMALL_MATRIX_THRESH) + return CV_HAL_ERROR_NOT_IMPLEMENTED; + return lapack_QR(src1, src1_step, m, n, k, src2, src2_step, dst, info); +} + +int lapack_QR64f(double* src1, size_t src1_step, int m, int n, int k, double* src2, size_t src2_step, double* dst, int* info) +{ + if (m < HAL_QR_SMALL_MATRIX_THRESH) + return CV_HAL_ERROR_NOT_IMPLEMENTED; + return lapack_QR(src1, src1_step, m, n, k, src2, src2_step, dst, info); +} + int lapack_gemm32f(const float *src1, size_t src1_step, const float *src2, size_t src2_step, float alpha, const float *src3, size_t src3_step, float beta, float *dst, size_t dst_step, int m, int n, int k, int flags) { diff --git a/modules/core/src/hal_internal.hpp b/modules/core/src/hal_internal.hpp index 26bc142e04635c0d736d46ced3760274147c9b50..129a71014541fa25a1db570c34fdceb21cf44f50 100644 --- a/modules/core/src/hal_internal.hpp +++ b/modules/core/src/hal_internal.hpp @@ -55,6 +55,8 @@ int lapack_Cholesky32f(float* a, size_t a_step, int m, float* b, size_t b_step, int lapack_Cholesky64f(double* a, size_t a_step, int m, double* b, size_t b_step, int n, bool* info); int lapack_SVD32f(float* a, size_t a_step, float* w, float* u, size_t u_step, float* vt, size_t v_step, int m, int n, int flags); int lapack_SVD64f(double* a, size_t a_step, double* w, double* u, size_t u_step, double* vt, size_t v_step, int m, int n, int flags); +int lapack_QR32f(float* src1, size_t src1_step, int m, int n, int k, float* src2, size_t src2_step, float* dst, int* info); +int lapack_QR64f(double* src1, size_t src1_step, int m, int n, int k, double* src2, size_t src2_step, double* dst, int* info); int lapack_gemm32f(const float* src1, size_t src1_step, const float* src2, size_t src2_step, float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step, int m, int n, int k, int flags); @@ -83,6 +85,11 @@ int lapack_gemm64fc(const double* src1, size_t src1_step, const double* src2, si #undef cv_hal_SVD64f #define cv_hal_SVD64f lapack_SVD64f +#undef cv_hal_QR32f +#define cv_hal_QR32f lapack_QR32f +#undef cv_hal_QR64f +#define cv_hal_QR64f lapack_QR64f + #undef cv_hal_gemm32f #define cv_hal_gemm32f lapack_gemm32f #undef cv_hal_gemm64f diff --git a/modules/core/src/hal_replacement.hpp b/modules/core/src/hal_replacement.hpp index 2121261f6d335661dc5ade4b1b6ba7b4a5d89dbb..9a1177e9c29cb36a751fa6bb2f328bbc2aa57097 100644 --- a/modules/core/src/hal_replacement.hpp +++ b/modules/core/src/hal_replacement.hpp @@ -634,6 +634,27 @@ inline int hal_ni_SVD32f(float* src, size_t src_step, float* w, float* u, size_t inline int hal_ni_SVD64f(double* src, size_t src_step, double* w, double* u, size_t u_step, double* vt, size_t vt_step, int m, int n, int flags) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } //! @} +/** +Performs QR decomposition of \f$M\times N\f$(\f$M>N\f$) matrix \f$A = Q*R\f$ and solves matrix equation \f$A*X=B\f$. +@param src1 pointer to input matrix \f$A\f$ stored in row major order. After finish of work src1 contains upper triangular \f$N\times N\f$ matrix \f$R\f$. +Lower triangle of src1 will be filled with vectors of elementary reflectors. See @cite VandLec and Lapack's DGEQRF documentation for details. +@param src1_step number of bytes between two consequent rows of matrix \f$A\f$. +@param m number fo rows in matrix \f$A\f$. +@param n number of columns in matrix \f$A\f$. +@param k number of right-hand vectors in \f$M\times K\f$ matrix \f$B\f$. +@param src2 pointer to \f$M\times K\f$ matrix \f$B\f$ which is the right-hand side of system \f$A*X=B\f$. \f$B\f$ stored in row major order. +If src2 is null pointer only QR decomposition will be performed. Otherwise system will be solved and src1 will be used as temporary buffer, so +after finish of work src2 contains solution \f$X\f$ of system \f$A*X=B\f$. +@param src2_step number of bytes between two consequent rows of matrix \f$B\f$. +@param dst pointer to continiuos \f$N\times 1\f$ array for scalar factors of elementary reflectors. See @cite VandLec for details. +@param info indicates success of decomposition. If *info is zero decomposition failed. +*/ +//! @addtogroup core_hal_interface_decomp_qr QR matrix decomposition +//! @{ +inline int hal_ni_QR32f(float* src1, size_t src1_step, int m, int n, int k, float* src2, size_t src2_step, float* dst, int* info) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_QR64f(double* src1, size_t src1_step, int m, int n, int k, double* src2, size_t src2_step, double* dst, int* info) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +//! @} + //! @cond IGNORED @@ -643,6 +664,8 @@ inline int hal_ni_SVD64f(double* src, size_t src_step, double* w, double* u, siz #define cv_hal_Cholesky64f hal_ni_Cholesky64f #define cv_hal_SVD32f hal_ni_SVD32f #define cv_hal_SVD64f hal_ni_SVD64f +#define cv_hal_QR32f hal_ni_QR32f +#define cv_hal_QR64f hal_ni_QR64f //! @endcond diff --git a/modules/core/src/lapack.cpp b/modules/core/src/lapack.cpp index eed86b4d55ea6d9cca18dcf32c607bba0da23028..a602eecd806451d50a4551d98a3d6da2eff4b24a 100644 --- a/modules/core/src/lapack.cpp +++ b/modules/core/src/lapack.cpp @@ -1238,9 +1238,6 @@ bool cv::solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int meth return result; } - if( method == DECOMP_QR ) - method = DECOMP_SVD; - int m = src.rows, m_ = m, n = src.cols, nb = _src2.cols; size_t esz = CV_ELEM_SIZE(type), bufsize = 0; size_t vstep = alignSize(n*esz, 16); @@ -1268,7 +1265,6 @@ bool cv::solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int meth if( is_normal ) bufsize += n*nb*esz; - if( method == DECOMP_SVD || method == DECOMP_EIG ) bufsize += n*5*esz + n*vstep + nb*sizeof(double) + 32; @@ -1321,6 +1317,28 @@ bool cv::solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int meth else result = hal::Cholesky64f(a.ptr(), a.step, n, dst.ptr(), dst.step, nb); } + else if( method == DECOMP_QR ) + { + Mat rhsMat; + if( is_normal || m == n ) + { + src2.copyTo(dst); + rhsMat = dst; + } + else + { + rhsMat = Mat(m, nb, type); + src2.copyTo(rhsMat); + } + + if( type == CV_32F ) + result = hal::QR32f(a.ptr(), a.step, a.rows, a.cols, rhsMat.cols, rhsMat.ptr(), rhsMat.step, NULL) != 0; + else + result = hal::QR64f(a.ptr(), a.step, a.rows, a.cols, rhsMat.cols, rhsMat.ptr(), rhsMat.step, NULL) != 0; + + if (rhsMat.rows != dst.rows) + rhsMat.rowRange(0, dst.rows).copyTo(dst); + } else { ptr = alignPtr(ptr, 16); diff --git a/modules/core/src/matrix_decomp.cpp b/modules/core/src/matrix_decomp.cpp index 58829b56f88f7cd18bdca700a511791226606155..66c584a10300f912b979fa1fa0e65821cdd30630 100644 --- a/modules/core/src/matrix_decomp.cpp +++ b/modules/core/src/matrix_decomp.cpp @@ -225,6 +225,129 @@ bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n) return CholImpl(A, astep, m, b, bstep, n); } +template inline static int +sign(_Tp x) +{ + if (x >= (_Tp)0) + return 1; + else + return -1; +} + +template static inline int +QRImpl(_Tp* A, size_t astep, int m, int n, int k, _Tp* b, size_t bstep, _Tp* hFactors, _Tp eps) +{ + astep /= sizeof(_Tp); + bstep /= sizeof(_Tp); + + cv::AutoBuffer<_Tp> buffer; + size_t buf_size = m ? m + n : hFactors != NULL; + buffer.allocate(buf_size); + _Tp* vl = buffer; + if (hFactors == NULL) + hFactors = vl + m; + + for (int l = 0; l < n; l++) + { + //generate vl + int vlSize = m - l; + _Tp vlNorm = (_Tp)0; + for (int i = 0; i < vlSize; i++) + { + vl[i] = A[(l + i)*astep + l]; + vlNorm += vl[i] * vl[i]; + } + _Tp tmpV = vl[0]; + vl[0] = vl[0] + sign(vl[0])*std::sqrt(vlNorm); + vlNorm = std::sqrt(vlNorm + vl[0] * vl[0] - tmpV*tmpV); + for (int i = 0; i < vlSize; i++) + { + vl[i] /= vlNorm; + } + //multiply A_l*vl + for (int j = l; j < n; j++) + { + _Tp v_lA = (_Tp)0; + for (int i = l; i < m; i++) + { + v_lA += vl[i - l] * A[i*astep + j]; + } + + for (int i = l; i < m; i++) + { + A[i*astep + j] -= 2 * vl[i - l] * v_lA; + } + } + + //save vl and factors + hFactors[l] = vl[0] * vl[0]; + for (int i = 1; i < vlSize; i++) + { + A[(l + i)*astep + l] = vl[i] / vl[0]; + } + } + + if (b) + { + //generate new rhs + for (int l = 0; l < n; l++) + { + //unpack vl + vl[0] = (_Tp)1; + for (int j = 1; j < m - l; j++) + { + vl[j] = A[(j + l)*astep + l]; + } + + //h_l*x + for (int j = 0; j < k; j++) + { + _Tp v_lB = (_Tp)0; + for (int i = l; i < m; i++) + v_lB += vl[i - l] * b[i*bstep + j]; + + for (int i = l; i < m; i++) + b[i*bstep + j] -= 2 * vl[i - l] * v_lB * hFactors[l]; + } + } + //do back substitution + for (int i = n - 1; i >= 0; i--) + { + for (int j = n - 1; j > i; j--) + { + for (int p = 0; p < k; p++) + b[i*bstep + p] -= b[j*bstep + p] * A[i*astep + j]; + } + if (std::abs(A[i*astep + i]) < eps) + return 0; + for (int p = 0; p < k; p++) + b[i*bstep + p] /= A[i*astep + i]; + } + } + + return 1; +} + +int QR32f(float* A, size_t astep, int m, int n, int k, float* b, size_t bstep, float* hFactors) +{ + CV_INSTRUMENT_REGION() + + int output; + CALL_HAL_RET(QR32f, cv_hal_QR32f, output, A, astep, m, n, k, b, bstep, hFactors); + output = QRImpl(A, astep, m, n, k, b, bstep, hFactors, FLT_EPSILON * 10); + return output; +} + +int QR64f(double* A, size_t astep, int m, int n, int k, double* b, size_t bstep, double* hFactors) +{ + CV_INSTRUMENT_REGION() + + int output; + CALL_HAL_RET(QR64f, cv_hal_QR64f, output, A, astep, m, n, k, b, bstep, hFactors) + output = QRImpl(A, astep, m, n, k, b, bstep, hFactors, DBL_EPSILON * 100); + return output; +} + //============================================================================= // for compatibility with 3.0 diff --git a/modules/core/test/test_math.cpp b/modules/core/test/test_math.cpp index 32888ff734956a895a07b1b898f3431d79967620..3870d31cd53e91a2c8feb7b0bbbf1a63dc7d2d51 100644 --- a/modules/core/test/test_math.cpp +++ b/modules/core/test/test_math.cpp @@ -2994,6 +2994,51 @@ TEST(Core_Cholesky, accuracy64f) for (int i = 0; i < A.rows; i++) for (int j = i + 1; j < A.cols; j++) A.at(i, j) = 0.0; - EXPECT_TRUE(norm(refA - A*A.t()) < 10e-5); + EXPECT_LE(norm(refA, A*A.t(), CV_RELATIVE_L2), FLT_EPSILON); } + +TEST(Core_QR_Solver, accuracy64f) +{ + int m = 20, n = 18; + Mat A(m, m, CV_64F); + Mat B(m, n, CV_64F); + Mat mean(1, 1, CV_64F); + *mean.ptr() = 10.0; + Mat dev(1, 1, CV_64F); + *dev.ptr() = 10.0; + RNG rng(10); + rng.fill(A, RNG::NORMAL, mean, dev); + rng.fill(B, RNG::NORMAL, mean, dev); + A = A*A.t(); + Mat solutionQR; + + //solve system with square matrix + solve(A, B, solutionQR, DECOMP_QR); + EXPECT_LE(norm(A*solutionQR, B, CV_RELATIVE_L2), FLT_EPSILON); + + A = Mat(m, n, CV_64F); + B = Mat(m, n, CV_64F); + rng.fill(A, RNG::NORMAL, mean, dev); + rng.fill(B, RNG::NORMAL, mean, dev); + + //solve normal system + solve(A, B, solutionQR, DECOMP_QR | DECOMP_NORMAL); + EXPECT_LE(norm(A.t()*(A*solutionQR), A.t()*B, CV_RELATIVE_L2), FLT_EPSILON); + + //solve overdeterminated system as a least squares problem + Mat solutionSVD; + solve(A, B, solutionQR, DECOMP_QR); + solve(A, B, solutionSVD, DECOMP_SVD); + EXPECT_LE(norm(solutionQR, solutionSVD, CV_RELATIVE_L2), FLT_EPSILON); + + //solve system with singular matrix + A = Mat(10, 10, CV_64F); + B = Mat(10, 1, CV_64F); + rng.fill(A, RNG::NORMAL, mean, dev); + rng.fill(B, RNG::NORMAL, mean, dev); + for (int i = 0; i < A.cols; i++) + A.at(0, i) = A.at(1, i); + ASSERT_FALSE(solve(A, B, solutionQR, DECOMP_QR)); +} + /* End of file. */