diff --git a/modules/features2d/src/kaze/AKAZEFeatures.cpp b/modules/features2d/src/kaze/AKAZEFeatures.cpp index 16c2cad0322113c735ce48ed8edf99f5e5de321a..024a5cad2eb8719b3bcc6e1b9d35ee7f70c17734 100644 --- a/modules/features2d/src/kaze/AKAZEFeatures.cpp +++ b/modules/features2d/src/kaze/AKAZEFeatures.cpp @@ -15,6 +15,10 @@ #include +#ifdef HAVE_OPENCL // OpenCL is not well supported +#undef HAVE_OPENCL +#endif + // Namespaces namespace cv { @@ -251,38 +255,41 @@ private: #ifdef HAVE_OPENCL static inline bool -ocl_non_linear_diffusion_step(const UMat& Lt, const UMat& Lf, UMat& Lstep, float step_size) +ocl_non_linear_diffusion_step(InputArray Lt_, InputArray Lf_, OutputArray Lstep_, float step_size) { - if(!Lt.isContinuous()) - return false; + if (!Lt_.isContinuous()) + return false; + + UMat Lt = Lt_.getUMat(), Lf = Lf_.getUMat(), Lstep = Lstep_.getUMat(); - size_t globalSize[] = {(size_t)Lt.cols, (size_t)Lt.rows}; + size_t globalSize[] = {(size_t)Lt.cols, (size_t)Lt.rows}; - ocl::Kernel ker("AKAZE_nld_step_scalar", ocl::features2d::akaze_oclsrc); - if( ker.empty() ) - return false; + ocl::Kernel ker("AKAZE_nld_step_scalar", ocl::features2d::akaze_oclsrc); + if (ker.empty()) + return false; - return ker.args( - ocl::KernelArg::ReadOnly(Lt), - ocl::KernelArg::PtrReadOnly(Lf), - ocl::KernelArg::PtrWriteOnly(Lstep), - step_size).run(2, globalSize, 0, true); + return ker.args( + ocl::KernelArg::ReadOnly(Lt), + ocl::KernelArg::PtrReadOnly(Lf), + ocl::KernelArg::PtrWriteOnly(Lstep), + step_size) + .run(2, globalSize, 0, true); } #endif // HAVE_OPENCL static inline void -non_linear_diffusion_step(const UMat& Lt, const UMat& Lf, UMat& Lstep, float step_size) +non_linear_diffusion_step(InputArray Lt, InputArray Lf, OutputArray Lstep, float step_size) { CV_INSTRUMENT_REGION() Lstep.create(Lt.size(), Lt.type()); - CV_OCL_RUN(true, ocl_non_linear_diffusion_step(Lt, Lf, Lstep, step_size)); +#ifdef HAVE_OPENCL + CV_OCL_RUN(OCL_PERFORMANCE_CHECK(Lstep.isUMat()), ocl_non_linear_diffusion_step(Lt, Lf, Lstep, step_size)); +#endif - // when on CPU UMats should be already allocated on CPU so getMat here is basicallly no-op - Mat Mstep = Lstep.getMat(ACCESS_WRITE); - parallel_for_(Range(0, Lt.rows), NonLinearScalarDiffusionStep(Lt.getMat(ACCESS_READ), - Lf.getMat(ACCESS_READ), Mstep, step_size)); + Mat Mstep = Lstep.getMat(); + parallel_for_(Range(0, Lt.rows()), NonLinearScalarDiffusionStep(Lt.getMat(), Lf.getMat(), Mstep, step_size)); } /** @@ -347,25 +354,28 @@ compute_kcontrast(const cv::Mat& Lx, const cv::Mat& Ly, float perc, int nbins) #ifdef HAVE_OPENCL static inline bool -ocl_pm_g2(const UMat& Lx, const UMat& Ly, UMat& Lflow, float kcontrast) +ocl_pm_g2(InputArray Lx_, InputArray Ly_, OutputArray Lflow_, float kcontrast) { - int total = Lx.rows * Lx.cols; - size_t globalSize[] = {(size_t)total}; - - ocl::Kernel ker("AKAZE_pm_g2", ocl::features2d::akaze_oclsrc); - if( ker.empty() ) - return false; - - return ker.args( - ocl::KernelArg::PtrReadOnly(Lx), - ocl::KernelArg::PtrReadOnly(Ly), - ocl::KernelArg::PtrWriteOnly(Lflow), - kcontrast, total).run(1, globalSize, 0, true); + UMat Lx = Lx_.getUMat(), Ly = Ly_.getUMat(), Lflow = Lflow_.getUMat(); + + int total = Lx.rows * Lx.cols; + size_t globalSize[] = {(size_t)total}; + + ocl::Kernel ker("AKAZE_pm_g2", ocl::features2d::akaze_oclsrc); + if (ker.empty()) + return false; + + return ker.args( + ocl::KernelArg::PtrReadOnly(Lx), + ocl::KernelArg::PtrReadOnly(Ly), + ocl::KernelArg::PtrWriteOnly(Lflow), + kcontrast, total) + .run(1, globalSize, 0, true); } #endif // HAVE_OPENCL static inline void -compute_diffusivity(const UMat& Lx, const UMat& Ly, UMat& Lflow, float kcontrast, int diffusivity) +compute_diffusivity(InputArray Lx, InputArray Ly, OutputArray Lflow, float kcontrast, int diffusivity) { CV_INSTRUMENT_REGION() @@ -376,7 +386,9 @@ compute_diffusivity(const UMat& Lx, const UMat& Ly, UMat& Lflow, float kcontrast pm_g1(Lx, Ly, Lflow, kcontrast); break; case KAZE::DIFF_PM_G2: - CV_OCL_RUN(true, ocl_pm_g2(Lx, Ly, Lflow, kcontrast)); +#ifdef HAVE_OPENCL + CV_OCL_RUN(OCL_PERFORMANCE_CHECK(Lflow.isUMat()), ocl_pm_g2(Lx, Ly, Lflow, kcontrast)); +#endif pm_g2(Lx, Ly, Lflow, kcontrast); break; case KAZE::DIFF_WEICKERT: @@ -391,32 +403,6 @@ compute_diffusivity(const UMat& Lx, const UMat& Ly, UMat& Lflow, float kcontrast } } -/** - * @brief Fetches pyramid from the gpu. - * @details Setups mapping for matrices that might be probably on the GPU, if the - * code executes with OpenCL. This will setup MLx, MLy, Mdet members in the pyramid with - * mapping to respective UMats. This must be called before CPU-only parts of AKAZE, that work - * only on these Mats. - * - * This prevents mapping/unmapping overhead (and possible uploads/downloads) that would occur, if - * we just create Mats from UMats each time we need it later. This has devastating effects on OCL - * performace. - * - * @param evolution Pyramid to download - */ -static inline void downloadPyramid(std::vector& evolution) -{ - CV_INSTRUMENT_REGION() - - for (size_t i = 0; i < evolution.size(); ++i) { - Evolution& e = evolution[i]; - e.Mx = e.Lx.getMat(ACCESS_READ); - e.My = e.Ly.getMat(ACCESS_READ); - e.Mt = e.Lt.getMat(ACCESS_READ); - e.Mdet = e.Ldet.getMat(ACCESS_READ); - } -} - /** * @brief This method creates the nonlinear scale space for a given image * @param img Input image for which the nonlinear scale space needs to be created @@ -435,12 +421,11 @@ void AKAZEFeatures::Create_Nonlinear_Scale_Space(InputArray img) if (evolution_.size() == 1) { // we don't need to compute kcontrast factor Compute_Determinant_Hessian_Response(); - downloadPyramid(evolution_); return; } // derivatives, flow and diffusion step - UMat Lx, Ly, Lsmooth, Lflow, Lstep; + Mat Lx, Ly, Lsmooth, Lflow, Lstep; // compute derivatives for computing k contrast GaussianBlur(img, Lsmooth, Size(5, 5), 1.0f, 1.0f, BORDER_REPLICATE); @@ -448,8 +433,7 @@ void AKAZEFeatures::Create_Nonlinear_Scale_Space(InputArray img) Scharr(Lsmooth, Ly, CV_32F, 0, 1, 1, 0, BORDER_DEFAULT); Lsmooth.release(); // compute the kcontrast factor - float kcontrast = compute_kcontrast(Lx.getMat(ACCESS_READ), Ly.getMat(ACCESS_READ), - options_.kcontrast_percentile, options_.kcontrast_nbins); + float kcontrast = compute_kcontrast(Lx, Ly, options_.kcontrast_percentile, options_.kcontrast_nbins); // Now generate the rest of evolution levels for (size_t i = 1; i < evolution_.size(); i++) { @@ -483,31 +467,30 @@ void AKAZEFeatures::Create_Nonlinear_Scale_Space(InputArray img) } Compute_Determinant_Hessian_Response(); - downloadPyramid(evolution_); - - return; } /* ************************************************************************* */ #ifdef HAVE_OPENCL static inline bool -ocl_compute_determinant(const UMat& Lxx, const UMat& Lxy, const UMat& Lyy, - UMat& Ldet, float sigma) +ocl_compute_determinant(InputArray Lxx_, InputArray Lxy_, InputArray Lyy_, OutputArray Ldet_, float sigma) { - const int total = Lxx.rows * Lxx.cols; - size_t globalSize[] = {(size_t)total}; - - ocl::Kernel ker("AKAZE_compute_determinant", ocl::features2d::akaze_oclsrc); - if( ker.empty() ) - return false; - - return ker.args( - ocl::KernelArg::PtrReadOnly(Lxx), - ocl::KernelArg::PtrReadOnly(Lxy), - ocl::KernelArg::PtrReadOnly(Lyy), - ocl::KernelArg::PtrWriteOnly(Ldet), - sigma, total).run(1, globalSize, 0, true); + UMat Lxx = Lxx_.getUMat(), Lxy = Lxy_.getUMat(), Lyy = Lyy_.getUMat(), Ldet = Ldet_.getUMat(); + + const int total = Lxx.rows * Lxx.cols; + size_t globalSize[] = {(size_t)total}; + + ocl::Kernel ker("AKAZE_compute_determinant", ocl::features2d::akaze_oclsrc); + if (ker.empty()) + return false; + + return ker.args( + ocl::KernelArg::PtrReadOnly(Lxx), + ocl::KernelArg::PtrReadOnly(Lxy), + ocl::KernelArg::PtrReadOnly(Lyy), + ocl::KernelArg::PtrWriteOnly(Ldet), + sigma, total) + .run(1, globalSize, 0, true); } #endif // HAVE_OPENCL @@ -521,27 +504,30 @@ ocl_compute_determinant(const UMat& Lxx, const UMat& Lxy, const UMat& Lyy, * @param Ldet output determinant * @param sigma determinant will be scaled by this sigma */ -static inline void compute_determinant(const UMat& Lxx, const UMat& Lxy, const UMat& Lyy, - UMat& Ldet, float sigma) +static inline void compute_determinant(InputArray Lxx, InputArray Lxy, InputArray Lyy, OutputArray Ldet, float sigma) { - CV_INSTRUMENT_REGION() + CV_INSTRUMENT_REGION() - Ldet.create(Lxx.size(), Lxx.type()); + Ldet.create(Lxx.size(), Lxx.type()); - CV_OCL_RUN(true, ocl_compute_determinant(Lxx, Lxy, Lyy, Ldet, sigma)); - - // output determinant - Mat Mxx = Lxx.getMat(ACCESS_READ), Mxy = Lxy.getMat(ACCESS_READ), Myy = Lyy.getMat(ACCESS_READ); - Mat Mdet = Ldet.getMat(ACCESS_WRITE); - float *lxx = Mxx.ptr(); - float *lxy = Mxy.ptr(); - float *lyy = Myy.ptr(); - float *ldet = Mdet.ptr(); - const int total = Lxx.cols * Lxx.rows; - for (int j = 0; j < total; j++) { - ldet[j] = (lxx[j] * lyy[j] - lxy[j] * lxy[j]) * sigma; - } +#ifdef HAVE_OPENCL + CV_OCL_RUN(OCL_PERFORMANCE_CHECK(Ldet.isUMat()), ocl_compute_determinant(Lxx, Lxy, Lyy, Ldet, sigma)); +#endif + // output determinant + Mat Mxx = Lxx.getMat(), Mxy = Lxy.getMat(), Myy = Lyy.getMat(), Mdet = Ldet.getMat(); + const int W = Mxx.cols, H = Mxx.rows; + for (int y = 0; y < H; y++) + { + float *lxx = Mxx.ptr(y); + float *lxy = Mxy.ptr(y); + float *lyy = Myy.ptr(y); + float *ldet = Mdet.ptr(y); + for (int x = 0; x < W; x++) + { + ldet[x] = (lxx[x] * lyy[x] - lxy[x] * lxy[x]) * sigma; + } + } } class DeterminantHessianResponse : public ParallelLoopBody @@ -554,7 +540,7 @@ public: void operator()(const Range& range) const { - UMat Lxx, Lxy, Lyy; + Mat Lxx, Lxy, Lyy; for (int i = range.start; i < range.end; i++) { @@ -670,16 +656,16 @@ public: const Evolution &e = (*evolution_)[i]; Mat &kpts = (*keypoints_by_layers_)[i]; // this mask will hold positions of keypoints in this level - kpts = Mat::zeros(e.Mdet.size(), CV_8UC1); + kpts = Mat::zeros(e.Ldet.size(), CV_8UC1); // if border is too big we shouldn't search any keypoints if (e.border + 1 >= e.Ldet.rows) continue; - const float * prev = e.Mdet.ptr(e.border - 1); - const float * curr = e.Mdet.ptr(e.border ); - const float * next = e.Mdet.ptr(e.border + 1); - const float * ldet = e.Mdet.ptr(); + const float * prev = e.Ldet.ptr(e.border - 1); + const float * curr = e.Ldet.ptr(e.border ); + const float * next = e.Ldet.ptr(e.border + 1); + const float * ldet = e.Ldet.ptr(); uchar *mask = kpts.ptr(); const int search_radius = e.sigma_size; // size of keypoint in this level @@ -743,8 +729,8 @@ void AKAZEFeatures::Find_Scale_Space_Extrema(std::vector& keypoints_by_laye const Mat &keypoints = keypoints_by_layers[i]; const uchar *const kpts = keypoints_by_layers[i].ptr(); uchar *const kpts_prev = keypoints_by_layers[i-1].ptr(); - const float *const ldet = evolution_[i].Mdet.ptr(); - const float *const ldet_prev = evolution_[i-1].Mdet.ptr(); + const float *const ldet = evolution_[i].Ldet.ptr(); + const float *const ldet_prev = evolution_[i-1].Ldet.ptr(); // ratios are just powers of 2 const int diff_ratio = (int)evolution_[i].octave_ratio / (int)evolution_[i-1].octave_ratio; const int search_radius = evolution_[i].sigma_size * diff_ratio; // size of keypoint in this level @@ -775,8 +761,8 @@ void AKAZEFeatures::Find_Scale_Space_Extrema(std::vector& keypoints_by_laye const Mat &keypoints = keypoints_by_layers[i]; const uchar *const kpts = keypoints_by_layers[i].ptr(); uchar *const kpts_next = keypoints_by_layers[i+1].ptr(); - const float *const ldet = evolution_[i].Mdet.ptr(); - const float *const ldet_next = evolution_[i+1].Mdet.ptr(); + const float *const ldet = evolution_[i].Ldet.ptr(); + const float *const ldet_next = evolution_[i+1].Ldet.ptr(); // ratios are just powers of 2, i+1 ratio is always greater or equal to i const int diff_ratio = (int)evolution_[i+1].octave_ratio / (int)evolution_[i].octave_ratio; const int search_radius = evolution_[i+1].sigma_size; // size of keypoints in upper level @@ -814,7 +800,7 @@ void AKAZEFeatures::Do_Subpixel_Refinement( for (size_t i = 0; i < keypoints_by_layers.size(); i++) { const Evolution &e = evolution_[i]; - const float * const ldet = e.Mdet.ptr(); + const float * const ldet = e.Ldet.ptr(); const float ratio = e.octave_ratio; const int cols = e.Ldet.cols; const Mat& keypoints = keypoints_by_layers[i]; @@ -1308,7 +1294,7 @@ void Compute_Main_Orientation(KeyPoint& kpt, const std::vector& evolu // Sample derivatives responses for the points within radius of 6*scale const int ang_size = 109; float resX[ang_size], resY[ang_size]; - Sample_Derivative_Response_Radius6(e.Mx, e.My, x0, y0, scale, resX, resY); + Sample_Derivative_Response_Radius6(e.Lx, e.Ly, x0, y0, scale, resX, resY); // Compute the angle of each gradient vector float Ang[ang_size]; @@ -1445,8 +1431,8 @@ void MSURF_Upright_Descriptor_64_Invoker::Get_MSURF_Upright_Descriptor_64(const ratio = (float)(1 << kpt.octave); scale = cvRound(0.5f*kpt.size / ratio); const int level = kpt.class_id; - Mat Lx = evolution[level].Mx; - Mat Ly = evolution[level].My; + const Mat Lx = evolution[level].Lx; + const Mat Ly = evolution[level].Ly; yf = kpt.pt.y / ratio; xf = kpt.pt.x / ratio; @@ -1480,25 +1466,28 @@ void MSURF_Upright_Descriptor_64_Invoker::Get_MSURF_Upright_Descriptor_64(const //Get the gaussian weighted x and y responses gauss_s1 = gaussian(xs - sample_x, ys - sample_y, 2.50f*scale); - y1 = (int)(sample_y - .5f); - x1 = (int)(sample_x - .5f); + y1 = cvFloor(sample_y); + x1 = cvFloor(sample_x); + + y2 = y1 + 1; + x2 = x1 + 1; - y2 = (int)(sample_y + .5f); - x2 = (int)(sample_x + .5f); + if (x1 < 0 || y1 < 0 || x2 >= Lx.cols || y2 >= Lx.rows) + continue; // FIXIT Boundaries fx = sample_x - x1; fy = sample_y - y1; - res1 = *(Lx.ptr(y1)+x1); - res2 = *(Lx.ptr(y1)+x2); - res3 = *(Lx.ptr(y2)+x1); - res4 = *(Lx.ptr(y2)+x2); + res1 = Lx.at(y1, x1); + res2 = Lx.at(y1, x2); + res3 = Lx.at(y2, x1); + res4 = Lx.at(y2, x2); rx = (1.0f - fx)*(1.0f - fy)*res1 + fx*(1.0f - fy)*res2 + (1.0f - fx)*fy*res3 + fx*fy*res4; - res1 = *(Ly.ptr(y1)+x1); - res2 = *(Ly.ptr(y1)+x2); - res3 = *(Ly.ptr(y2)+x1); - res4 = *(Ly.ptr(y2)+x2); + res1 = Ly.at(y1, x1); + res2 = Ly.at(y1, x2); + res3 = Ly.at(y2, x1); + res4 = Ly.at(y2, x2); ry = (1.0f - fx)*(1.0f - fy)*res1 + fx*(1.0f - fy)*res2 + (1.0f - fx)*fy*res3 + fx*fy*res4; rx = gauss_s1*rx; @@ -1533,8 +1522,9 @@ void MSURF_Upright_Descriptor_64_Invoker::Get_MSURF_Upright_Descriptor_64(const // convert to unit vector len = sqrt(len); + const float len_inv = 1.0f / len; for (i = 0; i < dsize; i++) { - desc[i] /= len; + desc[i] *= len_inv; } } @@ -1575,8 +1565,8 @@ void MSURF_Descriptor_64_Invoker::Get_MSURF_Descriptor_64(const KeyPoint& kpt, f scale = cvRound(0.5f*kpt.size / ratio); angle = kpt.angle * static_cast(CV_PI / 180.f); const int level = kpt.class_id; - Mat Lx = evolution[level].Mx; - Mat Ly = evolution[level].My; + const Mat Lx = evolution[level].Lx; + const Mat Ly = evolution[level].Ly; yf = kpt.pt.y / ratio; xf = kpt.pt.x / ratio; co = cos(angle); @@ -1613,34 +1603,28 @@ void MSURF_Descriptor_64_Invoker::Get_MSURF_Descriptor_64(const KeyPoint& kpt, f // Get the gaussian weighted x and y responses gauss_s1 = gaussian(xs - sample_x, ys - sample_y, 2.5f*scale); - y1 = cvRound(sample_y - 0.5f); - x1 = cvRound(sample_x - 0.5f); + y1 = cvFloor(sample_y); + x1 = cvFloor(sample_x); - y2 = cvRound(sample_y + 0.5f); - x2 = cvRound(sample_x + 0.5f); + y2 = y1 + 1; + x2 = x1 + 1; - // fix crash: indexing with out-of-bounds index, this might happen near the edges of image - // clip values so they fit into the image - const MatSize& size = Lx.size; - y1 = min(max(0, y1), size[0] - 1); - x1 = min(max(0, x1), size[1] - 1); - y2 = min(max(0, y2), size[0] - 1); - x2 = min(max(0, x2), size[1] - 1); - CV_DbgAssert(Lx.size == Ly.size); + if (x1 < 0 || y1 < 0 || x2 >= Lx.cols || y2 >= Lx.rows) + continue; // FIXIT Boundaries fx = sample_x - x1; fy = sample_y - y1; - res1 = *(Lx.ptr(y1, x1)); - res2 = *(Lx.ptr(y1, x2)); - res3 = *(Lx.ptr(y2, x1)); - res4 = *(Lx.ptr(y2, x2)); + res1 = Lx.at(y1, x1); + res2 = Lx.at(y1, x2); + res3 = Lx.at(y2, x1); + res4 = Lx.at(y2, x2); rx = (1.0f - fx)*(1.0f - fy)*res1 + fx*(1.0f - fy)*res2 + (1.0f - fx)*fy*res3 + fx*fy*res4; - res1 = *(Ly.ptr(y1, x1)); - res2 = *(Ly.ptr(y1, x2)); - res3 = *(Ly.ptr(y2, x1)); - res4 = *(Ly.ptr(y2, x2)); + res1 = Ly.at(y1, x1); + res2 = Ly.at(y1, x2); + res3 = Ly.at(y2, x1); + res4 = Ly.at(y2, x2); ry = (1.0f - fx)*(1.0f - fy)*res1 + fx*(1.0f - fy)*res2 + (1.0f - fx)*fy*res3 + fx*fy*res4; // Get the x and y derivatives on the rotated axis @@ -1675,8 +1659,9 @@ void MSURF_Descriptor_64_Invoker::Get_MSURF_Descriptor_64(const KeyPoint& kpt, f // convert to unit vector len = sqrt(len); + const float len_inv = 1.0f / len; for (i = 0; i < dsize; i++) { - desc[i] /= len; + desc[i] *= len_inv; } } @@ -1689,13 +1674,6 @@ void MSURF_Descriptor_64_Invoker::Get_MSURF_Descriptor_64(const KeyPoint& kpt, f */ void Upright_MLDB_Full_Descriptor_Invoker::Get_Upright_MLDB_Full_Descriptor(const KeyPoint& kpt, unsigned char *desc, int desc_size) const { - float di = 0.0, dx = 0.0, dy = 0.0; - float ri = 0.0, rx = 0.0, ry = 0.0, xf = 0.0, yf = 0.0; - float sample_x = 0.0, sample_y = 0.0, ratio = 0.0; - int x1 = 0, y1 = 0; - int nsamples = 0, scale = 0; - int dcount1 = 0, dcount2 = 0; - const AKAZEOptions & options = *options_; const std::vector& evolution = *evolution_; @@ -1705,14 +1683,14 @@ void Upright_MLDB_Full_Descriptor_Invoker::Get_Upright_MLDB_Full_Descriptor(cons float values[16*max_channels]; // Get the information from the keypoint - ratio = (float)(1 << kpt.octave); - scale = cvRound(0.5f*kpt.size / ratio); + const float ratio = (float)(1 << kpt.octave); + const int scale = cvRound(0.5f*kpt.size / ratio); const int level = kpt.class_id; - Mat Lx = evolution[level].Mx; - Mat Ly = evolution[level].My; - Mat Lt = evolution[level].Mt; - yf = kpt.pt.y / ratio; - xf = kpt.pt.x / ratio; + const Mat Lx = evolution[level].Lx; + const Mat Ly = evolution[level].Ly; + const Mat Lt = evolution[level].Lt; + const float yf = kpt.pt.y / ratio; + const float xf = kpt.pt.x / ratio; // For 2x2 grid, 3x3 grid and 4x4 grid const int pattern_size = options_->descriptor_pattern_size; @@ -1726,27 +1704,31 @@ void Upright_MLDB_Full_Descriptor_Invoker::Get_Upright_MLDB_Full_Descriptor(cons memset(desc, 0, desc_size); // For the three grids + int dcount1 = 0; for (int z = 0; z < 3; z++) { - dcount2 = 0; + int dcount2 = 0; const int step = sample_step[z]; for (int i = -pattern_size; i < pattern_size; i += step) { for (int j = -pattern_size; j < pattern_size; j += step) { - di = dx = dy = 0.0; - nsamples = 0; + float di = 0.0, dx = 0.0, dy = 0.0; - for (int k = i; k < i + step; k++) { - for (int l = j; l < j + step; l++) { + int nsamples = 0; + for (int k = 0; k < step; k++) { + for (int l = 0; l < step; l++) { // Get the coordinates of the sample point - sample_y = yf + l*scale; - sample_x = xf + k*scale; + const float sample_y = yf + (l+j)*scale; + const float sample_x = xf + (k+i)*scale; + + const int y1 = cvRound(sample_y); + const int x1 = cvRound(sample_x); - y1 = cvRound(sample_y); - x1 = cvRound(sample_x); + if (y1 < 0 || y1 >= Lt.rows || x1 < 0 || x1 >= Lt.cols) + continue; // Boundaries - ri = *(Lt.ptr(y1)+x1); - rx = *(Lx.ptr(y1)+x1); - ry = *(Ly.ptr(y1)+x1); + const float ri = Lt.at(y1, x1); + const float rx = Lx.at(y1, x1); + const float ry = Ly.at(y1, x1); di += ri; dx += rx; @@ -1755,9 +1737,13 @@ void Upright_MLDB_Full_Descriptor_Invoker::Get_Upright_MLDB_Full_Descriptor(cons } } - di /= nsamples; - dx /= nsamples; - dy /= nsamples; + if (nsamples > 0) + { + const float nsamples_inv = 1.0f / nsamples; + di *= nsamples_inv; + dx *= nsamples_inv; + dy *= nsamples_inv; + } float *val = &values[dcount2*max_channels]; *(val) = di; @@ -1794,17 +1780,20 @@ void MLDB_Full_Descriptor_Invoker::MLDB_Fill_Values(float* values, int sample_st const std::vector& evolution = *evolution_; int pattern_size = options_->descriptor_pattern_size; int chan = options_->descriptor_channels; - int valpos = 0; - Mat Lx = evolution[level].Mx; - Mat Ly = evolution[level].My; - Mat Lt = evolution[level].Mt; + const Mat Lx = evolution[level].Lx; + const Mat Ly = evolution[level].Ly; + const Mat Lt = evolution[level].Lt; + const Size size = Lt.size(); + CV_Assert(size == Lx.size()); + CV_Assert(size == Ly.size()); + + int valpos = 0; for (int i = -pattern_size; i < pattern_size; i += sample_step) { for (int j = -pattern_size; j < pattern_size; j += sample_step) { - float di, dx, dy; - di = dx = dy = 0.0; - int nsamples = 0; + float di = 0.0f, dx = 0.0f, dy = 0.0f; + int nsamples = 0; for (int k = i; k < i + sample_step; k++) { for (int l = j; l < j + sample_step; l++) { float sample_y = yf + (l*co * scale + k*si*scale); @@ -1813,20 +1802,15 @@ void MLDB_Full_Descriptor_Invoker::MLDB_Fill_Values(float* values, int sample_st int y1 = cvRound(sample_y); int x1 = cvRound(sample_x); - // fix crash: indexing with out-of-bounds index, this might happen near the edges of image - // clip values so they fit into the image - const MatSize& size = Lt.size; - CV_DbgAssert(size == Lx.size && - size == Ly.size); - y1 = min(max(0, y1), size[0] - 1); - x1 = min(max(0, x1), size[1] - 1); + if (y1 < 0 || y1 >= Lt.rows || x1 < 0 || x1 >= Lt.cols) + continue; // Boundaries - float ri = *(Lt.ptr(y1, x1)); + float ri = Lt.at(y1, x1); di += ri; if(chan > 1) { - float rx = *(Lx.ptr(y1, x1)); - float ry = *(Ly.ptr(y1, x1)); + float rx = Lx.at(y1, x1); + float ry = Ly.at(y1, x1); if (chan == 2) { dx += sqrtf(rx*rx + ry*ry); } @@ -1840,20 +1824,25 @@ void MLDB_Full_Descriptor_Invoker::MLDB_Fill_Values(float* values, int sample_st nsamples++; } } - di /= nsamples; - dx /= nsamples; - dy /= nsamples; + + if (nsamples > 0) + { + const float nsamples_inv = 1.0f / nsamples; + di *= nsamples_inv; + dx *= nsamples_inv; + dy *= nsamples_inv; + } values[valpos] = di; if (chan > 1) { values[valpos + 1] = dx; } if (chan > 2) { - values[valpos + 2] = dy; + values[valpos + 2] = dy; } valpos += chan; - } } + } } void MLDB_Full_Descriptor_Invoker::MLDB_Binary_Comparisons(float* values, unsigned char* desc, @@ -1931,10 +1920,8 @@ void MLDB_Full_Descriptor_Invoker::Get_MLDB_Full_Descriptor(const KeyPoint& kpt, */ void MLDB_Descriptor_Subset_Invoker::Get_MLDB_Descriptor_Subset(const KeyPoint& kpt, unsigned char *desc, int desc_size) const { - float di = 0.f, dx = 0.f, dy = 0.f; float rx = 0.f, ry = 0.f; float sample_x = 0.f, sample_y = 0.f; - int x1 = 0, y1 = 0; const AKAZEOptions & options = *options_; const std::vector& evolution = *evolution_; @@ -1944,9 +1931,9 @@ void MLDB_Descriptor_Subset_Invoker::Get_MLDB_Descriptor_Subset(const KeyPoint& int scale = cvRound(0.5f*kpt.size / ratio); float angle = kpt.angle * static_cast(CV_PI / 180.f); const int level = kpt.class_id; - Mat Lx = evolution[level].Mx; - Mat Ly = evolution[level].My; - Mat Lt = evolution[level].Mt; + const Mat Lx = evolution[level].Lx; + const Mat Ly = evolution[level].Ly; + const Mat Lt = evolution[level].Lt; float yf = kpt.pt.y / ratio; float xf = kpt.pt.x / ratio; float co = cos(angle); @@ -1957,7 +1944,7 @@ void MLDB_Descriptor_Subset_Invoker::Get_MLDB_Descriptor_Subset(const KeyPoint& const int max_channels = 3; const int channels = options.descriptor_channels; CV_Assert(channels <= max_channels); - float values[(4 + 9 + 16)*max_channels]; + float values[(4 + 9 + 16)*max_channels] = { 0 }; // Sample everything, but only do the comparisons const int pattern_size = options.descriptor_pattern_size; @@ -1972,9 +1959,7 @@ void MLDB_Descriptor_Subset_Invoker::Get_MLDB_Descriptor_Subset(const KeyPoint& const int *coords = descriptorSamples_.ptr(i); CV_Assert(coords[0] >= 0 && coords[0] < 3); const int sample_step = sample_steps[coords[0]]; - di = 0.0f; - dx = 0.0f; - dy = 0.0f; + float di = 0.f, dx = 0.f, dy = 0.f; for (int k = coords[1]; k < coords[1] + sample_step; k++) { for (int l = coords[2]; l < coords[2] + sample_step; l++) { @@ -1983,14 +1968,17 @@ void MLDB_Descriptor_Subset_Invoker::Get_MLDB_Descriptor_Subset(const KeyPoint& sample_y = yf + (l*scale*co + k*scale*si); sample_x = xf + (-l*scale*si + k*scale*co); - y1 = cvRound(sample_y); - x1 = cvRound(sample_x); + const int y1 = cvRound(sample_y); + const int x1 = cvRound(sample_x); - di += *(Lt.ptr(y1)+x1); + if (x1 < 0 || y1 < 0 || x1 >= Lt.cols || y1 >= Lt.rows) + continue; // Boundaries + + di += Lt.at(y1, x1); if (options.descriptor_channels > 1) { - rx = *(Lx.ptr(y1)+x1); - ry = *(Ly.ptr(y1)+x1); + rx = Lx.at(y1, x1); + ry = Ly.at(y1, x1); if (options.descriptor_channels == 2) { dx += sqrtf(rx*rx + ry*ry); @@ -2051,14 +2039,17 @@ void Upright_MLDB_Descriptor_Subset_Invoker::Get_Upright_MLDB_Descriptor_Subset( float ratio = (float)(1 << kpt.octave); int scale = cvRound(0.5f*kpt.size / ratio); const int level = kpt.class_id; - Mat Lx = evolution[level].Mx; - Mat Ly = evolution[level].My; - Mat Lt = evolution[level].Mt; + const Mat Lx = evolution[level].Lx; + const Mat Ly = evolution[level].Ly; + const Mat Lt = evolution[level].Lt; float yf = kpt.pt.y / ratio; float xf = kpt.pt.x / ratio; // Allocate memory for the matrix of values - Mat values ((4 + 9 + 16)*options.descriptor_channels, 1, CV_32FC1); + const int max_channels = 3; + const int channels = options.descriptor_channels; + CV_Assert(channels <= max_channels); + float values[(4 + 9 + 16)*max_channels] = { 0 }; const int pattern_size = options.descriptor_pattern_size; CV_Assert((pattern_size & 1) == 0); @@ -2083,11 +2074,15 @@ void Upright_MLDB_Descriptor_Subset_Invoker::Get_Upright_MLDB_Descriptor_Subset( y1 = cvRound(sample_y); x1 = cvRound(sample_x); - di += *(Lt.ptr(y1)+x1); + + if (x1 < 0 || y1 < 0 || x1 >= Lt.cols || y1 >= Lt.rows) + continue; // Boundaries + + di += Lt.at(y1, x1); if (options.descriptor_channels > 1) { - rx = *(Lx.ptr(y1)+x1); - ry = *(Ly.ptr(y1)+x1); + rx = Lx.at(y1, x1); + ry = Ly.at(y1, x1); if (options.descriptor_channels == 2) { dx += sqrtf(rx*rx + ry*ry); @@ -2100,26 +2095,26 @@ void Upright_MLDB_Descriptor_Subset_Invoker::Get_Upright_MLDB_Descriptor_Subset( } } - *(values.ptr(options.descriptor_channels*i)) = di; + float* pValues = &values[channels * i]; + pValues[0] = di; if (options.descriptor_channels == 2) { - *(values.ptr(options.descriptor_channels*i + 1)) = dx; + pValues[1] = dx; } else if (options.descriptor_channels == 3) { - *(values.ptr(options.descriptor_channels*i + 1)) = dx; - *(values.ptr(options.descriptor_channels*i + 2)) = dy; + pValues[1] = dx; + pValues[2] = dy; } } // Do the comparisons - const float *vals = values.ptr(0); const int *comps = descriptorBits_.ptr(0); CV_Assert(divUp(descriptorBits_.rows, 8) == desc_size); memset(desc, 0, desc_size); for (int i = 0; i vals[comps[2 * i + 1]]) { + if (values[comps[2 * i]] > values[comps[2 * i + 1]]) { desc[i / 8] |= (1 << (i % 8)); } } @@ -2149,7 +2144,8 @@ void generateDescriptorSubsample(Mat& sampleList, Mat& comparisons, int nbits, } ssz *= nchannels; - CV_Assert(nbits <= ssz); // Descriptor size can't be bigger than full descriptor + CV_Assert(ssz == 162*nchannels); + CV_Assert(nbits <= ssz && "Descriptor size can't be bigger than full descriptor (486 = 162*3 - 3 channels)"); // Since the full descriptor is usually under 10k elements, we pick // the selection from the full matrix. We take as many samples per diff --git a/modules/features2d/src/kaze/AKAZEFeatures.h b/modules/features2d/src/kaze/AKAZEFeatures.h index 9f44b579373d2e8d4b53ddb467aadac878ebdc0f..18dc5fd99c46d66ce477f3277b034b7f0213dded 100644 --- a/modules/features2d/src/kaze/AKAZEFeatures.h +++ b/modules/features2d/src/kaze/AKAZEFeatures.h @@ -29,15 +29,10 @@ struct Evolution border = 0; } - UMat Lx, Ly; ///< First order spatial derivatives - UMat Lt; ///< Evolution image - UMat Lsmooth; ///< Smoothed image, used only for computing determinant, released afterwards - UMat Ldet; ///< Detector response - - // the same as above, holding CPU mapping to UMats above - Mat Mx, My; - Mat Mt; - Mat Mdet; + Mat Lx, Ly; ///< First order spatial derivatives + Mat Lt; ///< Evolution image + Mat Lsmooth; ///< Smoothed image, used only for computing determinant, released afterwards + Mat Ldet; ///< Detector response Size size; ///< Size of the layer float etime; ///< Evolution time diff --git a/modules/features2d/test/test_descriptors_regression.cpp b/modules/features2d/test/test_descriptors_regression.cpp index 0862649bd93b71258bd6ee9c8a0a674d34b26bb5..7540d1d2bbb8fc3ae2e63020cc3a414505c517a5 100644 --- a/modules/features2d/test/test_descriptors_regression.cpp +++ b/modules/features2d/test/test_descriptors_regression.cpp @@ -43,6 +43,7 @@ using namespace std; using namespace cv; +using namespace testing; const string FEATURES2D_DIR = "features2d"; const string IMAGE_FILENAME = "tsukuba.png"; @@ -417,68 +418,82 @@ TEST( Features2d_DescriptorExtractor, batch ) } } -TEST( Features2d_Feature2d, no_crash ) +class DescriptorImage : public TestWithParam +{ +protected: + virtual void SetUp() { + pattern = GetParam(); + } + + std::string pattern; +}; + +TEST_P(DescriptorImage, no_crash) { - const String& pattern = string(cvtest::TS::ptr()->get_data_path() + "shared/*.png"); vector fnames; - glob(pattern, fnames, false); + glob(cvtest::TS::ptr()->get_data_path() + pattern, fnames, false); sort(fnames.begin(), fnames.end()); - Ptr akaze = AKAZE::create(); + Ptr akaze_mldb = AKAZE::create(AKAZE::DESCRIPTOR_MLDB); + Ptr akaze_mldb_upright = AKAZE::create(AKAZE::DESCRIPTOR_MLDB_UPRIGHT); + Ptr akaze_mldb_256 = AKAZE::create(AKAZE::DESCRIPTOR_MLDB, 256); + Ptr akaze_mldb_upright_256 = AKAZE::create(AKAZE::DESCRIPTOR_MLDB_UPRIGHT, 256); + Ptr akaze_kaze = AKAZE::create(AKAZE::DESCRIPTOR_KAZE); + Ptr akaze_kaze_upright = AKAZE::create(AKAZE::DESCRIPTOR_KAZE_UPRIGHT); Ptr orb = ORB::create(); Ptr kaze = KAZE::create(); Ptr brisk = BRISK::create(); - size_t i, n = fnames.size(); + size_t n = fnames.size(); vector keypoints; Mat descriptors; orb->setMaxFeatures(5000); - for( i = 0; i < n; i++ ) + for(size_t i = 0; i < n; i++ ) { printf("%d. image: %s:\n", (int)i, fnames[i].c_str()); if( strstr(fnames[i].c_str(), "MP.png") != 0 ) + { + printf("\tskip\n"); continue; + } bool checkCount = strstr(fnames[i].c_str(), "templ.png") == 0; Mat img = imread(fnames[i], -1); - printf("\tAKAZE ... "); fflush(stdout); - akaze->detectAndCompute(img, noArray(), keypoints, descriptors); - printf("(%d keypoints) ", (int)keypoints.size()); fflush(stdout); - if( checkCount ) - { - EXPECT_GT((int)keypoints.size(), 0); - } - ASSERT_EQ(descriptors.rows, (int)keypoints.size()); - printf("ok\n"); - printf("\tKAZE ... "); fflush(stdout); - kaze->detectAndCompute(img, noArray(), keypoints, descriptors); - printf("(%d keypoints) ", (int)keypoints.size()); fflush(stdout); - if( checkCount ) - { - EXPECT_GT((int)keypoints.size(), 0); - } + printf("\t%dx%d\n", img.cols, img.rows); + +#define TEST_DETECTOR(name, descriptor) \ + keypoints.clear(); descriptors.release(); \ + printf("\t" name "\n"); fflush(stdout); \ + descriptor->detectAndCompute(img, noArray(), keypoints, descriptors); \ + printf("\t\t\t(%d keypoints, descriptor size = %d)\n", (int)keypoints.size(), descriptors.cols); fflush(stdout); \ + if (checkCount) \ + { \ + EXPECT_GT((int)keypoints.size(), 0); \ + } \ ASSERT_EQ(descriptors.rows, (int)keypoints.size()); - printf("ok\n"); - printf("\tORB ... "); fflush(stdout); - orb->detectAndCompute(img, noArray(), keypoints, descriptors); - printf("(%d keypoints) ", (int)keypoints.size()); fflush(stdout); - if( checkCount ) - { - EXPECT_GT((int)keypoints.size(), 0); - } - ASSERT_EQ(descriptors.rows, (int)keypoints.size()); - printf("ok\n"); - - printf("\tBRISK ... "); fflush(stdout); - brisk->detectAndCompute(img, noArray(), keypoints, descriptors); - printf("(%d keypoints) ", (int)keypoints.size()); fflush(stdout); - if( checkCount ) - { - EXPECT_GT((int)keypoints.size(), 0); - } - ASSERT_EQ(descriptors.rows, (int)keypoints.size()); - printf("ok\n"); + TEST_DETECTOR("AKAZE:MLDB", akaze_mldb); + TEST_DETECTOR("AKAZE:MLDB_UPRIGHT", akaze_mldb_upright); + TEST_DETECTOR("AKAZE:MLDB_256", akaze_mldb_256); + TEST_DETECTOR("AKAZE:MLDB_UPRIGHT_256", akaze_mldb_upright_256); + TEST_DETECTOR("AKAZE:KAZE", akaze_kaze); + TEST_DETECTOR("AKAZE:KAZE_UPRIGHT", akaze_kaze_upright); + TEST_DETECTOR("KAZE", kaze); + TEST_DETECTOR("ORB", orb); + TEST_DETECTOR("BRISK", brisk); } } + +INSTANTIATE_TEST_CASE_P(Features2d, DescriptorImage, + testing::Values( + "shared/lena.png", + "shared/box*.png", + "shared/fruits*.png", + "shared/airplane.png", + "shared/graffiti.png", + "shared/1_itseez-0001*.png", + "shared/pic*.png", + "shared/templ.png" + ) +); diff --git a/modules/ts/misc/run_long.py b/modules/ts/misc/run_long.py index d820f9780a5b8b6362711d52b7fce97f03beda4e..5640ea3a582ebb2163a86b2dcdf6b9a2d51829ff 100644 --- a/modules/ts/misc/run_long.py +++ b/modules/ts/misc/run_long.py @@ -8,7 +8,11 @@ from pprint import PrettyPrinter as PP LONG_TESTS_DEBUG_VALGRIND = [ ('calib3d', 'Calib3d_InitUndistortRectifyMap.accuracy', 2017.22), ('dnn', 'Reproducibility*', 1000), # large DNN models - ('features2d', 'Features2d_Feature2d.no_crash', 1235.68), + ('features2d', 'Features2d/DescriptorImage.no_crash/3', 1000), + ('features2d', 'Features2d/DescriptorImage.no_crash/4', 1000), + ('features2d', 'Features2d/DescriptorImage.no_crash/5', 1000), + ('features2d', 'Features2d/DescriptorImage.no_crash/6', 1000), + ('features2d', 'Features2d/DescriptorImage.no_crash/7', 1000), ('imgcodecs', 'Imgcodecs_Png.write_big', 1000), # memory limit ('imgcodecs', 'Imgcodecs_Tiff.decode_tile16384x16384', 1000), # memory limit ('ml', 'ML_RTrees.regression', 1423.47),