diff --git a/modules/gpu/src/brute_force_matcher.cpp b/modules/gpu/src/brute_force_matcher.cpp index f4f53fc0dabcc7d78fd1529c4efb1fe450d670ad..86b74e4fe79ab601a7c852a83b28eecb6b010af2 100644 --- a/modules/gpu/src/brute_force_matcher.cpp +++ b/modules/gpu/src/brute_force_matcher.cpp @@ -446,10 +446,17 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& queryDescs, con const int nQuery = queryDescs.rows; const int nTrain = trainDescs.rows; - ensureSizeIsEnough(nQuery, k, CV_32S, trainIdx); - ensureSizeIsEnough(nQuery, k, CV_32F, distance); - if (k != 2) + if (k == 2) + { + ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx); + ensureSizeIsEnough(1, nQuery, CV_32FC2, distance); + } + else + { + ensureSizeIsEnough(nQuery, k, CV_32S, trainIdx); + ensureSizeIsEnough(nQuery, k, CV_32F, distance); ensureSizeIsEnough(nQuery, nTrain, CV_32FC1, allDist); + } if (stream) { @@ -491,14 +498,19 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchConvert(const Mat& trainIdx, c if (trainIdx.empty() || distance.empty()) return; - CV_Assert(trainIdx.type() == CV_32SC1); - CV_Assert(distance.type() == CV_32FC1 && distance.size() == trainIdx.size()); + CV_Assert(trainIdx.type() == CV_32SC2 || trainIdx.type() == CV_32SC1); + CV_Assert(distance.type() == CV_32FC2 || distance.type() == CV_32FC1); + CV_Assert(distance.size() == trainIdx.size()); + CV_Assert(trainIdx.isContinuous() && distance.isContinuous()); - const int nQuery = distance.rows; - const int k = trainIdx.cols; + const int nQuery = trainIdx.type() == CV_32SC2 ? trainIdx.cols : trainIdx.rows; + const int k = trainIdx.type() == CV_32SC2 ? 2 :trainIdx.cols; matches.clear(); matches.reserve(nQuery); + + const int* trainIdx_ptr = trainIdx.ptr(); + const float* distance_ptr = distance.ptr(); for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) { @@ -506,8 +518,6 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchConvert(const Mat& trainIdx, c vector& curMatches = matches.back(); curMatches.reserve(k); - const int* trainIdx_ptr = trainIdx.ptr(queryIdx); - const float* distance_ptr = distance.ptr(queryIdx); for (int i = 0; i < k; ++i, ++trainIdx_ptr, ++distance_ptr) { int trainIdx = *trainIdx_ptr; diff --git a/modules/gpu/src/cuda/brute_force_matcher.cu b/modules/gpu/src/cuda/brute_force_matcher.cu index c2c7317b1a1b3164f311711185ead382a93530a1..efe3510149de771cddf3f1d328549c302bd96d7e 100644 --- a/modules/gpu/src/cuda/brute_force_matcher.cu +++ b/modules/gpu/src/cuda/brute_force_matcher.cu @@ -452,7 +452,7 @@ namespace cv { namespace gpu { namespace bfmatcher } template - __global__ void knnMatch2(const PtrStep_ query, const DevMem2D_ train, const Mask m, PtrStep_ trainIdx, PtrStep_ distance) + __global__ void knnMatch2(const PtrStep_ query, const DevMem2D_ train, const Mask m, int2* trainIdx, float2* distance) { typedef typename Dist::result_type result_type; typedef typename Dist::value_type value_type; @@ -520,8 +520,8 @@ namespace cv { namespace gpu { namespace bfmatcher } } - trainIdx.ptr(queryIdx)[0] = make_int2(bestTrainIdx1, bestTrainIdx2); - distance.ptr(queryIdx)[0] = make_float2(distMin1, distMin2); + trainIdx[queryIdx] = make_int2(bestTrainIdx1, bestTrainIdx2); + distance[queryIdx] = make_float2(distMin1, distMin2); } } @@ -556,7 +556,7 @@ namespace cv { namespace gpu { namespace bfmatcher const dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1); knnMatch2, Dist, T> - <<>>(query, train, mask, trainIdx, distance); + <<>>(query, train, mask, trainIdx.data, distance.data); cudaSafeCall( cudaGetLastError() ); if (stream == 0)