brute_force_matcher.cpp 48.9 KB
Newer Older
Y
yao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                           License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
//    Nathan, liujun@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other oclMaterials provided with the distribution.
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#include "precomp.hpp"
Y
yao 已提交
47

Y
yao 已提交
48
#include <functional>
Y
yao 已提交
49 50 51 52 53 54
#include <iterator>
#include <vector>
using namespace cv;
using namespace cv::ocl;
using namespace std;

55
namespace cv
Y
yao 已提交
56
{
57 58
    namespace ocl
    {
Y
yao 已提交
59 60
        ////////////////////////////////////OpenCL kernel strings//////////////////////////
        extern const char *brute_force_match;
61
    }
Y
yao 已提交
62 63
}

Y
yao 已提交
64
template < int BLOCK_SIZE, int MAX_DESC_LEN/*, typename Mask*/ >
65
void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
66
                         const oclMat &trainIdx, const oclMat &distance, int distType)
Y
yao 已提交
67
{
Y
yao 已提交
68
    assert(query.type() == CV_32F);
69 70 71 72 73 74 75 76 77 78 79 80
    cv::ocl::Context *ctx = query.clCxt;
    size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
    size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
    const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= 2 * BLOCK_SIZE ? MAX_DESC_LEN : 2 * BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
    int block_size = BLOCK_SIZE;
    int m_size = MAX_DESC_LEN;
    vector< pair<size_t, const void *> > args;

    if(globalSize[0] != 0)
    {
        args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
81
        //args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
82 83 84 85 86 87 88 89 90 91 92 93 94 95
        args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
        args.push_back( make_pair( smemSize, (void *)NULL));
        args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&m_size ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));

        std::string kernelName = "BruteForceMatch_UnrollMatch";

Y
yao 已提交
96
        openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, query.depth());
97 98 99
    }
}

Y
yao 已提交
100
template < int BLOCK_SIZE, int MAX_DESC_LEN/*, typename Mask*/ >
N
Niko 已提交
101 102
void matchUnrolledCached(const oclMat /*query*/, const oclMat * /*trains*/, int /*n*/, const oclMat /*mask*/,
                         const oclMat &/*bestTrainIdx*/, const oclMat & /*bestImgIdx*/, const oclMat & /*bestDistance*/, int /*distType*/)
Y
yao 已提交
103 104 105
{
}

Y
yao 已提交
106
template < int BLOCK_SIZE/*, typename Mask*/ >
107
void match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
108
           const oclMat &trainIdx, const oclMat &distance, int distType)
Y
yao 已提交
109
{
Y
yao 已提交
110
    assert(query.type() == CV_32F);
111 112 113 114 115 116
    cv::ocl::Context *ctx = query.clCxt;
    size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
    size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
    const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
    int block_size = BLOCK_SIZE;
    vector< pair<size_t, const void *> > args;
Y
yao 已提交
117

118 119 120 121
    if(globalSize[0] != 0)
    {
        args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
122
        //args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
123 124 125 126 127 128 129 130 131 132
        args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
        args.push_back( make_pair( smemSize, (void *)NULL));
        args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
Y
yao 已提交
133

134
        std::string kernelName = "BruteForceMatch_Match";
Y
yao 已提交
135

Y
yao 已提交
136
        openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, query.depth());
137
    }
Y
yao 已提交
138 139
}

Y
yao 已提交
140
template < int BLOCK_SIZE/*, typename Mask*/ >
N
Niko 已提交
141 142
void match(const oclMat /*query*/, const oclMat * /*trains*/, int /*n*/, const oclMat /*mask*/,
           const oclMat &/*bestTrainIdx*/, const oclMat & /*bestImgIdx*/, const oclMat & /*bestDistance*/, int /*distType*/)
Y
yao 已提交
143 144 145 146
{
}

//radius_matchUnrolledCached
Y
yao 已提交
147
template < int BLOCK_SIZE, int MAX_DESC_LEN/*, typename Mask*/ >
148
void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &/*mask*/,
149 150
                         const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, int distType)
{
Y
yao 已提交
151
    assert(query.type() == CV_32F);
152 153 154 155 156 157 158 159 160 161 162 163 164
    cv::ocl::Context *ctx = query.clCxt;
    size_t globalSize[] = {(train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, (query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, 1};
    size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
    const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
    int block_size = BLOCK_SIZE;
    int m_size = MAX_DESC_LEN;
    vector< pair<size_t, const void *> > args;

    if(globalSize[0] != 0)
    {
        args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
        args.push_back( make_pair( sizeof(cl_float), (void *)&maxDistance ));
165
        //args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
166 167 168 169 170 171 172 173 174 175
        args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&nMatches.data ));
        args.push_back( make_pair( smemSize, (void *)NULL));
        args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&m_size ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
N
NikoKJ 已提交
176
        args.push_back( make_pair( sizeof(cl_int), (void *)&trainIdx.cols ));
177 178 179 180 181 182
        args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&trainIdx.step ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));

        std::string kernelName = "BruteForceMatch_RadiusUnrollMatch";

Y
yao 已提交
183
        openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, query.depth());
184
    }
Y
yao 已提交
185 186 187
}

//radius_match
Y
yao 已提交
188
template < int BLOCK_SIZE/*, typename Mask*/ >
189
void radius_match(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &/*mask*/,
190 191
                  const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, int distType)
{
Y
yao 已提交
192
    assert(query.type() == CV_32F);
193 194 195 196 197 198 199 200 201 202 203 204
    cv::ocl::Context *ctx = query.clCxt;
    size_t globalSize[] = {(train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, (query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, 1};
    size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
    const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
    int block_size = BLOCK_SIZE;
    vector< pair<size_t, const void *> > args;

    if(globalSize[0] != 0)
    {
        args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
        args.push_back( make_pair( sizeof(cl_float), (void *)&maxDistance ));
205
        //args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
206 207 208 209 210 211 212 213 214
        args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&nMatches.data ));
        args.push_back( make_pair( smemSize, (void *)NULL));
        args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
N
NikoKJ 已提交
215
        args.push_back( make_pair( sizeof(cl_int), (void *)&trainIdx.cols ));
216 217 218 219 220 221
        args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&trainIdx.step ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));

        std::string kernelName = "BruteForceMatch_RadiusMatch";

Y
yao 已提交
222
        openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, query.depth());
223
    }
Y
yao 已提交
224 225
}

Y
yao 已提交
226
static void matchDispatcher(const oclMat &query, const oclMat &train, const oclMat &mask,
227
                     const oclMat &trainIdx, const oclMat &distance, int distType)
Y
yao 已提交
228
{
Y
yao 已提交
229 230
    const oclMat zeroMask;
    const oclMat &tempMask = mask.data ? mask : zeroMask;
Y
yao 已提交
231 232
    if (query.cols <= 64)
    {
Y
yao 已提交
233
        matchUnrolledCached<16, 64>(query, train, tempMask, trainIdx, distance, distType);
Y
yao 已提交
234 235 236
    }
    else if (query.cols <= 128)
    {
Y
yao 已提交
237
        matchUnrolledCached<16, 128>(query, train, tempMask, trainIdx,  distance, distType);
Y
yao 已提交
238 239 240
    }
    else
    {
Y
yao 已提交
241
        match<16>(query, train, tempMask, trainIdx, distance, distType);
Y
yao 已提交
242 243 244
    }
}

Y
yao 已提交
245
static void matchDispatcher(const oclMat &query, const oclMat *trains, int n, const oclMat &mask,
246
                     const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, int distType)
Y
yao 已提交
247
{
Y
yao 已提交
248 249
    const oclMat zeroMask;
    const oclMat &tempMask = mask.data ? mask : zeroMask;
Y
yao 已提交
250 251
    if (query.cols <= 64)
    {
Y
yao 已提交
252
        matchUnrolledCached<16, 64>(query, trains, n, tempMask, trainIdx, imgIdx, distance, distType);
Y
yao 已提交
253 254 255
    }
    else if (query.cols <= 128)
    {
Y
yao 已提交
256
        matchUnrolledCached<16, 128>(query, trains, n, tempMask, trainIdx, imgIdx, distance, distType);
Y
yao 已提交
257 258 259
    }
    else
    {
Y
yao 已提交
260
        match<16>(query, trains, n, tempMask, trainIdx, imgIdx, distance, distType);
Y
yao 已提交
261 262 263 264
    }
}

//radius matchDispatcher
Y
yao 已提交
265
static void matchDispatcher(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &mask,
266
                     const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, int distType)
Y
yao 已提交
267
{
Y
yao 已提交
268 269
    const oclMat zeroMask;
    const oclMat &tempMask = mask.data ? mask : zeroMask;
Y
yao 已提交
270 271
    if (query.cols <= 64)
    {
Y
yao 已提交
272
        matchUnrolledCached<16, 64>(query, train, maxDistance, tempMask, trainIdx, distance, nMatches, distType);
Y
yao 已提交
273 274 275
    }
    else if (query.cols <= 128)
    {
Y
yao 已提交
276
        matchUnrolledCached<16, 128>(query, train, maxDistance, tempMask, trainIdx, distance, nMatches, distType);
Y
yao 已提交
277 278 279
    }
    else
    {
Y
yao 已提交
280
        radius_match<16>(query, train, maxDistance, tempMask, trainIdx, distance, nMatches, distType);
Y
yao 已提交
281 282 283 284
    }
}

//knn match Dispatcher
Y
yao 已提交
285
template < int BLOCK_SIZE, int MAX_DESC_LEN/*, typename Mask*/ >
286
void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
287 288 289 290 291 292 293 294 295 296 297 298 299 300
                             const oclMat &trainIdx, const oclMat &distance, int distType)
{
    cv::ocl::Context *ctx = query.clCxt;
    size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
    size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
    const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= BLOCK_SIZE ? MAX_DESC_LEN : BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
    int block_size = BLOCK_SIZE;
    int m_size = MAX_DESC_LEN;
    vector< pair<size_t, const void *> > args;

    if(globalSize[0] != 0)
    {
        args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
301
        //args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
302 303 304 305 306 307 308 309 310 311 312 313 314 315
        args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
        args.push_back( make_pair( smemSize, (void *)NULL));
        args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&m_size ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));

        std::string kernelName = "BruteForceMatch_knnUnrollMatch";

Y
yao 已提交
316
        openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, query.depth());
317 318 319
    }
}

Y
yao 已提交
320
template < int BLOCK_SIZE/*, typename Mask*/ >
321
void knn_match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
322 323 324 325 326 327 328 329 330 331 332 333 334
               const oclMat &trainIdx, const oclMat &distance, int distType)
{
    cv::ocl::Context *ctx = query.clCxt;
    size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
    size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
    const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
    int block_size = BLOCK_SIZE;
    vector< pair<size_t, const void *> > args;

    if(globalSize[0] != 0)
    {
        args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
335
        //args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
336 337 338 339 340 341 342 343 344 345 346 347 348
        args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
        args.push_back( make_pair( smemSize, (void *)NULL));
        args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));

        std::string kernelName = "BruteForceMatch_knnMatch";

Y
yao 已提交
349
        openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, query.depth());
350 351 352
    }
}

Y
yao 已提交
353
template < int BLOCK_SIZE, int MAX_DESC_LEN/*, typename Mask*/ >
354
void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat &/*mask*/, const oclMat &allDist, int distType)
355 356 357 358 359 360 361 362 363 364 365 366 367
{
    cv::ocl::Context *ctx = query.clCxt;
    size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
    size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
    const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
    int block_size = BLOCK_SIZE;
    int m_size = MAX_DESC_LEN;
    vector< pair<size_t, const void *> > args;

    if(globalSize[0] != 0)
    {
        args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
368
        //args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
369 370 371 372 373 374 375 376 377 378 379 380 381
        args.push_back( make_pair( sizeof(cl_mem), (void *)&allDist.data ));
        args.push_back( make_pair( smemSize, (void *)NULL));
        args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&m_size ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));

        std::string kernelName = "BruteForceMatch_calcDistanceUnrolled";

Y
yao 已提交
382
        openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, query.depth());
383 384 385
    }
}

Y
yao 已提交
386
template < int BLOCK_SIZE/*, typename Mask*/ >
387
void calcDistance(const oclMat &query, const oclMat &train, const oclMat &/*mask*/, const oclMat &allDist, int distType)
Y
yao 已提交
388 389
{
    cv::ocl::Context *ctx = query.clCxt;
390 391 392 393 394 395 396 397 398 399
    size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
    size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
    const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
    int block_size = BLOCK_SIZE;
    vector< pair<size_t, const void *> > args;

    if(globalSize[0] != 0)
    {
        args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
400
        //args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
401 402 403 404 405 406 407 408 409 410 411 412
        args.push_back( make_pair( sizeof(cl_mem), (void *)&allDist.data ));
        args.push_back( make_pair( smemSize, (void *)NULL));
        args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));

        std::string kernelName = "BruteForceMatch_calcDistance";

Y
yao 已提交
413
        openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, query.depth());
414
    }
Y
yao 已提交
415 416 417 418
}

///////////////////////////////////////////////////////////////////////////////
// Calc Distance dispatcher
Y
yao 已提交
419
static void calcDistanceDispatcher(const oclMat &query, const oclMat &train, const oclMat &mask,
420
                            const oclMat &allDist, int distType)
Y
yao 已提交
421 422 423
{
    if (query.cols <= 64)
    {
Y
yao 已提交
424
        calcDistanceUnrolled<16, 64>(query, train, mask, allDist, distType);
Y
yao 已提交
425 426 427
    }
    else if (query.cols <= 128)
    {
Y
yao 已提交
428
        calcDistanceUnrolled<16, 128>(query, train, mask, allDist, distType);
Y
yao 已提交
429 430 431
    }
    else
    {
Y
yao 已提交
432
        calcDistance<16>(query, train, mask, allDist, distType);
Y
yao 已提交
433 434 435
    }
}

Y
yao 已提交
436
static void match2Dispatcher(const oclMat &query, const oclMat &train, const oclMat &mask,
437
                      const oclMat &trainIdx, const oclMat &distance, int distType)
Y
yao 已提交
438 439 440
{
    if (query.cols <= 64)
    {
Y
yao 已提交
441
        knn_matchUnrolledCached<16, 64>(query, train, mask, trainIdx, distance, distType);
Y
yao 已提交
442 443 444
    }
    else if (query.cols <= 128)
    {
Y
yao 已提交
445
        knn_matchUnrolledCached<16, 128>(query, train, mask, trainIdx, distance, distType);
Y
yao 已提交
446 447 448
    }
    else
    {
Y
yao 已提交
449
        knn_match<16>(query, train, mask, trainIdx, distance, distType);
Y
yao 已提交
450 451 452 453
    }
}

template <int BLOCK_SIZE>
N
Niko 已提交
454
void findKnnMatch(int k, const oclMat &trainIdx, const oclMat &distance, const oclMat &allDist, int /*distType*/)
Y
yao 已提交
455
{
456 457 458 459 460
    cv::ocl::Context *ctx = trainIdx.clCxt;
    size_t globalSize[] = {trainIdx.rows * BLOCK_SIZE, 1, 1};
    size_t localSize[] = {BLOCK_SIZE, 1, 1};
    int block_size = BLOCK_SIZE;
    std::string kernelName = "BruteForceMatch_findBestMatch";
Y
yao 已提交
461 462

    for (int i = 0; i < k; ++i)
463 464
    {
        vector< pair<size_t, const void *> > args;
Y
yao 已提交
465

466 467 468 469 470 471 472 473
        args.push_back( make_pair( sizeof(cl_mem), (void *)&allDist.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&i));
        args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
        //args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
        //args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
        //args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
Y
yao 已提交
474

Y
yao 已提交
475
        openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, trainIdx.depth(), -1);
Y
yao 已提交
476 477 478
    }
}

Y
yao 已提交
479
static void findKnnMatchDispatcher(int k, const oclMat &trainIdx, const oclMat &distance, const oclMat &allDist, int distType)
Y
yao 已提交
480 481 482 483
{
    findKnnMatch<256>(k, trainIdx, distance, allDist, distType);
}

Y
yao 已提交
484
static void kmatchDispatcher(const oclMat &query, const oclMat &train, int k, const oclMat &mask,
485
                      const oclMat &trainIdx, const oclMat &distance, const oclMat &allDist, int distType)
Y
yao 已提交
486
{
Y
yao 已提交
487 488
    const oclMat zeroMask;
    const oclMat &tempMask = mask.data ? mask : zeroMask;
Y
yao 已提交
489 490
    if (k == 2)
    {
Y
yao 已提交
491
        match2Dispatcher(query, train, tempMask, trainIdx, distance, distType);
Y
yao 已提交
492 493 494
    }
    else
    {
Y
yao 已提交
495
        calcDistanceDispatcher(query, train, tempMask, allDist, distType);
Y
yao 已提交
496 497 498 499 500 501 502 503
        findKnnMatchDispatcher(k, trainIdx, distance, allDist, distType);
    }
}

cv::ocl::BruteForceMatcher_OCL_base::BruteForceMatcher_OCL_base(DistType distType_) : distType(distType_)
{
}

504
void cv::ocl::BruteForceMatcher_OCL_base::add(const vector<oclMat> &descCollection)
Y
yao 已提交
505
{
506
    trainDescCollection.insert(trainDescCollection.end(), descCollection.begin(), descCollection.end());
Y
yao 已提交
507 508
}

509 510 511
const vector<oclMat> &cv::ocl::BruteForceMatcher_OCL_base::getTrainDescriptors() const
{
    return trainDescCollection;
Y
yao 已提交
512 513
}

514
void cv::ocl::BruteForceMatcher_OCL_base::clear()
Y
yao 已提交
515
{
516
    trainDescCollection.clear();
Y
yao 已提交
517 518
}

519 520 521
bool cv::ocl::BruteForceMatcher_OCL_base::empty() const
{
    return trainDescCollection.empty();
Y
yao 已提交
522 523
}

524 525 526
bool cv::ocl::BruteForceMatcher_OCL_base::isMaskSupported() const
{
    return true;
Y
yao 已提交
527 528
}

529 530 531 532
void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat &query, const oclMat &train,
        oclMat &trainIdx, oclMat &distance, const oclMat &mask)
{
    if (query.empty() || train.empty())
Y
yao 已提交
533
        return;
Y
yao 已提交
534 535 536 537 538 539
    
    // match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
    int callType = query.depth();
    char cvFuncName[] = "singleMatch";
    if (callType != 5)
        CV_ERROR(CV_UNSUPPORTED_FORMAT_ERR, "BruteForceMatch OpenCL only support float type query!\n");
Y
yao 已提交
540

Y
yao 已提交
541 542
    if ((distType == 0 && callType == 1 ) || (distType == 1 && callType != 5) || (distType == 2 && (callType != 0
        || callType != 2 || callType != 4)))
Y
yao 已提交
543
    {
Y
yao 已提交
544 545
        CV_ERROR(CV_UNSUPPORTED_DEPTH_ERR, "BruteForceMatch OpenCL only support float type query!\n");
    }
Y
yao 已提交
546 547 548 549

    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
    CV_Assert(train.cols == query.cols && train.type() == query.type());

Y
yao 已提交
550 551
    trainIdx.create(1, query.rows, CV_32S);
    distance.create(1, query.rows, CV_32F);
Y
yao 已提交
552

Y
yao 已提交
553 554 555
    matchDispatcher(query, train, mask, trainIdx, distance, distType);
exit:
    return;
Y
yao 已提交
556 557
}

558 559 560
void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat &trainIdx, const oclMat &distance, vector<DMatch> &matches)
{
    if (trainIdx.empty() || distance.empty())
Y
yao 已提交
561
        return;
562

Y
yao 已提交
563 564 565 566 567 568
    Mat trainIdxCPU(trainIdx);
    Mat distanceCPU(distance);

    matchConvert(trainIdxCPU, distanceCPU, matches);
}

569 570 571
void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat &trainIdx, const Mat &distance, vector<DMatch> &matches)
{
    if (trainIdx.empty() || distance.empty())
Y
yao 已提交
572 573 574 575 576 577 578 579 580 581
        return;

    CV_Assert(trainIdx.type() == CV_32SC1);
    CV_Assert(distance.type() == CV_32FC1 && distance.cols == trainIdx.cols);

    const int nQuery = trainIdx.cols;

    matches.clear();
    matches.reserve(nQuery);

582 583
    const int *trainIdx_ptr = trainIdx.ptr<int>();
    const float *distance_ptr =  distance.ptr<float>();
Y
yao 已提交
584 585 586 587 588 589 590 591 592 593 594 595 596 597 598
    for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++distance_ptr)
    {
        int trainIdx = *trainIdx_ptr;

        if (trainIdx == -1)
            continue;

        float distance = *distance_ptr;

        DMatch m(queryIdx, trainIdx, 0, distance);

        matches.push_back(m);
    }
}

599
void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat &query, const oclMat &train, vector<DMatch> &matches, const oclMat &mask)
Y
yao 已提交
600
{
601
	assert(mask.empty()); // mask is not supported at the moment
602
    oclMat trainIdx, distance;
Y
yao 已提交
603 604 605 606
    matchSingle(query, train, trainIdx, distance, mask);
    matchDownload(trainIdx, distance, matches);
}

607 608
void cv::ocl::BruteForceMatcher_OCL_base::makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const vector<oclMat> &masks)
{
Y
yao 已提交
609

610
    if (empty())
Y
yao 已提交
611 612 613 614 615 616
        return;

    if (masks.empty())
    {
        Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(oclMat)));

617
        oclMat *trainCollectionCPU_ptr = trainCollectionCPU.ptr<oclMat>();
Y
yao 已提交
618 619 620 621 622 623 624 625 626 627 628 629 630 631

        for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr)
            *trainCollectionCPU_ptr = trainDescCollection[i];

        trainCollection.upload(trainCollectionCPU);
        maskCollection.release();
    }
    else
    {
        CV_Assert(masks.size() == trainDescCollection.size());

        Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(oclMat)));
        Mat maskCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(oclMat)));

632 633
        oclMat *trainCollectionCPU_ptr = trainCollectionCPU.ptr<oclMat>();
        oclMat *maskCollectionCPU_ptr = maskCollectionCPU.ptr<oclMat>();
Y
yao 已提交
634 635 636

        for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr)
        {
637 638
            const oclMat &train = trainDescCollection[i];
            const oclMat &mask = masks[i];
Y
yao 已提交
639 640 641 642 643 644 645 646 647 648 649 650

            CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.cols == train.rows));

            *trainCollectionCPU_ptr = train;
            *maskCollectionCPU_ptr = mask;
        }

        trainCollection.upload(trainCollectionCPU);
        maskCollection.upload(maskCollectionCPU);
    }
}

651 652 653 654
void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &query, const oclMat &trainCollection, oclMat &trainIdx,
        oclMat &imgIdx, oclMat &distance, const oclMat &masks)
{
    if (query.empty() || trainCollection.empty())
Y
yao 已提交
655 656
        return;

Y
yao 已提交
657 658 659 660 661
    // match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
    int callType = query.depth();
    char cvFuncName[] = "matchCollection";
    if (callType != 5)
        CV_ERROR(CV_UNSUPPORTED_FORMAT_ERR, "BruteForceMatch OpenCL only support float type query!\n");
Y
yao 已提交
662

Y
yao 已提交
663 664
    if ((distType == 0 && callType == 1 ) || (distType == 1 && callType != 5) || (distType == 2 && (callType != 0
        || callType != 2 || callType != 4)))
Y
yao 已提交
665
    {
Y
yao 已提交
666 667
        CV_ERROR(CV_UNSUPPORTED_DEPTH_ERR, "BruteForceMatch OpenCL only support float type query!\n");
    }
Y
yao 已提交
668 669 670

    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);

Y
yao 已提交
671 672 673
    trainIdx.create(1, query.rows, CV_32S);
    imgIdx.create(1, query.rows, CV_32S);
    distance.create(1, query.rows, CV_32F);
Y
yao 已提交
674

Y
yao 已提交
675 676 677
    matchDispatcher(query, (const oclMat *)trainCollection.ptr(), trainCollection.cols, masks, trainIdx, imgIdx, distance, distType);
exit:
    return;
Y
yao 已提交
678 679
}

680
void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, vector<DMatch> &matches)
Y
yao 已提交
681
{
682
    if (trainIdx.empty() || imgIdx.empty() || distance.empty())
Y
yao 已提交
683 684 685 686 687 688 689 690 691
        return;

    Mat trainIdxCPU(trainIdx);
    Mat imgIdxCPU(imgIdx);
    Mat distanceCPU(distance);

    matchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, matches);
}

692 693 694
void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, vector<DMatch> &matches)
{
    if (trainIdx.empty() || imgIdx.empty() || distance.empty())
Y
yao 已提交
695 696 697 698 699 700 701 702 703 704 705
        return;

    CV_Assert(trainIdx.type() == CV_32SC1);
    CV_Assert(imgIdx.type() == CV_32SC1 && imgIdx.cols == trainIdx.cols);
    CV_Assert(distance.type() == CV_32FC1 && distance.cols == trainIdx.cols);

    const int nQuery = trainIdx.cols;

    matches.clear();
    matches.reserve(nQuery);

706 707 708
    const int *trainIdx_ptr = trainIdx.ptr<int>();
    const int *imgIdx_ptr = imgIdx.ptr<int>();
    const float *distance_ptr =  distance.ptr<float>();
Y
yao 已提交
709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725
    for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)
    {
        int trainIdx = *trainIdx_ptr;

        if (trainIdx == -1)
            continue;

        int imgIdx = *imgIdx_ptr;

        float distance = *distance_ptr;

        DMatch m(queryIdx, trainIdx, imgIdx, distance);

        matches.push_back(m);
    }
}

726 727 728
void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat &query, vector<DMatch> &matches, const vector<oclMat> &masks)
{
    oclMat trainCollection;
Y
yao 已提交
729 730 731 732 733 734 735 736 737 738 739
    oclMat maskCollection;

    makeGpuCollection(trainCollection, maskCollection, masks);

    oclMat trainIdx, imgIdx, distance;

    matchCollection(query, trainCollection, trainIdx, imgIdx, distance, maskCollection);
    matchDownload(trainIdx, imgIdx, distance, matches);
}

// knn match
740 741 742 743
void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat &query, const oclMat &train, oclMat &trainIdx,
        oclMat &distance, oclMat &allDist, int k, const oclMat &mask)
{
    if (query.empty() || train.empty())
Y
yao 已提交
744 745
        return;

Y
yao 已提交
746 747
    // match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
    int callType = query.depth();
Y
yao 已提交
748

Y
yao 已提交
749 750 751 752 753 754
    char cvFuncName[] = "knnMatchSingle";
    if (callType != 5)
        CV_ERROR(CV_UNSUPPORTED_FORMAT_ERR, "BruteForceMatch OpenCL only support float type query!\n");

    if ((distType == 0 && callType == 1 ) || (distType == 1 && callType != 5) || (distType == 2 && (callType != 0
        || callType != 2 || callType != 4)))
Y
yao 已提交
755
    {
Y
yao 已提交
756 757
        CV_ERROR(CV_UNSUPPORTED_DEPTH_ERR, "BruteForceMatch OpenCL only support float type query!\n");
    }
Y
yao 已提交
758 759 760 761 762 763

    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
    CV_Assert(train.type() == query.type() && train.cols == query.cols);

    if (k == 2)
    {
Y
yao 已提交
764 765
        trainIdx.create(1, query.rows, CV_32SC2);
        distance.create(1, query.rows, CV_32FC2);
Y
yao 已提交
766 767 768
    }
    else
    {
Y
yao 已提交
769 770 771
        trainIdx.create(query.rows, k, CV_32S);
        distance.create(query.rows, k, CV_32F);
        allDist.create(query.rows, train.rows, CV_32FC1);
Y
yao 已提交
772 773 774 775
    }

    trainIdx.setTo(Scalar::all(-1));

Y
yao 已提交
776 777 778
    kmatchDispatcher(query, train, k, mask, trainIdx, distance, allDist, distType);
exit:
    return;
Y
yao 已提交
779 780
}

781
void cv::ocl::BruteForceMatcher_OCL_base::knnMatchDownload(const oclMat &trainIdx, const oclMat &distance, vector< vector<DMatch> > &matches, bool compactResult)
Y
yao 已提交
782
{
783
    if (trainIdx.empty() || distance.empty())
Y
yao 已提交
784 785 786 787 788 789 790 791
        return;

    Mat trainIdxCPU(trainIdx);
    Mat distanceCPU(distance);

    knnMatchConvert(trainIdxCPU, distanceCPU, matches, compactResult);
}

792 793 794
void cv::ocl::BruteForceMatcher_OCL_base::knnMatchConvert(const Mat &trainIdx, const Mat &distance, vector< vector<DMatch> > &matches, bool compactResult)
{
    if (trainIdx.empty() || distance.empty())
Y
yao 已提交
795 796 797 798 799 800 801 802
        return;

    CV_Assert(trainIdx.type() == CV_32SC2 || trainIdx.type() == CV_32SC1);
    CV_Assert(distance.type() == CV_32FC2 || distance.type() == CV_32FC1);
    CV_Assert(distance.size() == trainIdx.size());
    CV_Assert(trainIdx.isContinuous() && distance.isContinuous());

    const int nQuery = trainIdx.type() == CV_32SC2 ? trainIdx.cols : trainIdx.rows;
803
    const int k = trainIdx.type() == CV_32SC2 ? 2 : trainIdx.cols;
Y
yao 已提交
804 805 806 807

    matches.clear();
    matches.reserve(nQuery);

808 809
    const int *trainIdx_ptr = trainIdx.ptr<int>();
    const float *distance_ptr = distance.ptr<float>();
Y
yao 已提交
810 811 812 813

    for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
    {
        matches.push_back(vector<DMatch>());
814
        vector<DMatch> &curMatches = matches.back();
Y
yao 已提交
815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835
        curMatches.reserve(k);

        for (int i = 0; i < k; ++i, ++trainIdx_ptr, ++distance_ptr)
        {
            int trainIdx = *trainIdx_ptr;

            if (trainIdx != -1)
            {
                float distance = *distance_ptr;

                DMatch m(queryIdx, trainIdx, 0, distance);

                curMatches.push_back(m);
            }
        }

        if (compactResult && curMatches.empty())
            matches.pop_back();
    }
}

836 837
void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &query, const oclMat &train, vector< vector<DMatch> > &matches
        , int k, const oclMat &mask, bool compactResult)
Y
yao 已提交
838
{
839
    oclMat trainIdx, distance, allDist;
Y
yao 已提交
840 841 842 843
    knnMatchSingle(query, train, trainIdx, distance, allDist, k, mask);
    knnMatchDownload(trainIdx, distance, matches, compactResult);
}

844
void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
N
Niko 已提交
845
        oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, const oclMat &/*maskCollection*/)
Y
yao 已提交
846
{
847
    if (query.empty() || trainCollection.empty())
Y
yao 已提交
848 849
        return;

850 851
    typedef void (*caller_t)(const oclMat & query, const oclMat & trains, const oclMat & masks,
                             const oclMat & trainIdx, const oclMat & imgIdx, const oclMat & distance);
Y
yao 已提交
852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875
#if 0
    static const caller_t callers[3][6] =
    {
        {
            ocl_match2L1_gpu<unsigned char>, 0/*match2L1_gpu<signed char>*/,
            ocl_match2L1_gpu<unsigned short>, ocl_match2L1_gpu<short>,
            ocl_match2L1_gpu<int>, ocl_match2L1_gpu<float>
        },
        {
            0/*match2L2_gpu<unsigned char>*/, 0/*match2L2_gpu<signed char>*/,
            0/*match2L2_gpu<unsigned short>*/, 0/*match2L2_gpu<short>*/,
            0/*match2L2_gpu<int>*/, ocl_match2L2_gpu<float>
        },
        {
            ocl_match2Hamming_gpu<unsigned char>, 0/*match2Hamming_gpu<signed char>*/,
            ocl_match2Hamming_gpu<unsigned short>, 0/*match2Hamming_gpu<short>*/,
            ocl_match2Hamming_gpu<int>, 0/*match2Hamming_gpu<float>*/
        }
    };
#endif
    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);

    const int nQuery = query.rows;

876 877 878
    trainIdx.create(1, nQuery, CV_32SC2);
    imgIdx.create(1, nQuery, CV_32SC2);
    distance.create(1, nQuery, CV_32SC2);
Y
yao 已提交
879 880 881 882 883 884 885 886 887

    trainIdx.setTo(Scalar::all(-1));

    //caller_t func = callers[distType][query.depth()];
    //CV_Assert(func != 0);

    //func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, cc, StreamAccessor::getStream(stream));
}

888 889
void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx,
        const oclMat &distance, vector< vector<DMatch> > &matches, bool compactResult)
Y
yao 已提交
890
{
891
    if (trainIdx.empty() || imgIdx.empty() || distance.empty())
Y
yao 已提交
892 893 894 895 896 897 898 899 900
        return;

    Mat trainIdxCPU(trainIdx);
    Mat imgIdxCPU(imgIdx);
    Mat distanceCPU(distance);

    knnMatch2Convert(trainIdxCPU, imgIdxCPU, distanceCPU, matches, compactResult);
}

901 902
void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance,
        vector< vector<DMatch> > &matches, bool compactResult)
Y
yao 已提交
903
{
904
    if (trainIdx.empty() || imgIdx.empty() || distance.empty())
Y
yao 已提交
905 906 907 908 909 910 911 912 913 914 915
        return;

    CV_Assert(trainIdx.type() == CV_32SC2);
    CV_Assert(imgIdx.type() == CV_32SC2 && imgIdx.cols == trainIdx.cols);
    CV_Assert(distance.type() == CV_32FC2 && distance.cols == trainIdx.cols);

    const int nQuery = trainIdx.cols;

    matches.clear();
    matches.reserve(nQuery);

916 917 918
    const int *trainIdx_ptr = trainIdx.ptr<int>();
    const int *imgIdx_ptr = imgIdx.ptr<int>();
    const float *distance_ptr = distance.ptr<float>();
Y
yao 已提交
919 920 921 922

    for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
    {
        matches.push_back(vector<DMatch>());
923
        vector<DMatch> &curMatches = matches.back();
Y
yao 已提交
924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951
        curMatches.reserve(2);

        for (int i = 0; i < 2; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)
        {
            int trainIdx = *trainIdx_ptr;

            if (trainIdx != -1)
            {
                int imgIdx = *imgIdx_ptr;

                float distance = *distance_ptr;

                DMatch m(queryIdx, trainIdx, imgIdx, distance);

                curMatches.push_back(m);
            }
        }

        if (compactResult && curMatches.empty())
            matches.pop_back();
    }
}

namespace
{
    struct ImgIdxSetter
    {
        explicit inline ImgIdxSetter(int imgIdx_) : imgIdx(imgIdx_) {}
952 953 954 955
        inline void operator()(DMatch &m) const
        {
            m.imgIdx = imgIdx;
        }
Y
yao 已提交
956 957 958 959
        int imgIdx;
    };
}

960 961
void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &query, vector< vector<DMatch> > &matches, int k,
        const vector<oclMat> &masks, bool compactResult)
Y
yao 已提交
962
{
963
    if (k == 2)
Y
yao 已提交
964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984
    {
        oclMat trainCollection;
        oclMat maskCollection;

        makeGpuCollection(trainCollection, maskCollection, masks);

        oclMat trainIdx, imgIdx, distance;

        knnMatch2Collection(query, trainCollection, trainIdx, imgIdx, distance, maskCollection);
        knnMatch2Download(trainIdx, imgIdx, distance, matches);
    }
    else
    {
        if (query.empty() || empty())
            return;

        vector< vector<DMatch> > curMatches;
        vector<DMatch> temp;
        temp.reserve(2 * k);

        matches.resize(query.rows);
985
        for_each(matches.begin(), matches.end(), bind2nd(mem_fun_ref(&vector<DMatch>::reserve), k));
Y
yao 已提交
986 987 988 989 990 991 992

        for (size_t imgIdx = 0, size = trainDescCollection.size(); imgIdx < size; ++imgIdx)
        {
            knnMatch(query, trainDescCollection[imgIdx], curMatches, k, masks.empty() ? oclMat() : masks[imgIdx]);

            for (int queryIdx = 0; queryIdx < query.rows; ++queryIdx)
            {
993 994
                vector<DMatch> &localMatch = curMatches[queryIdx];
                vector<DMatch> &globalMatch = matches[queryIdx];
Y
yao 已提交
995 996 997 998

                for_each(localMatch.begin(), localMatch.end(), ImgIdxSetter(static_cast<int>(imgIdx)));

                temp.clear();
999
                merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), back_inserter(temp));
Y
yao 已提交
1000 1001 1002 1003 1004 1005 1006 1007 1008

                globalMatch.clear();
                const size_t count = std::min((size_t)k, temp.size());
                copy(temp.begin(), temp.begin() + count, back_inserter(globalMatch));
            }
        }

        if (compactResult)
        {
1009 1010
            vector< vector<DMatch> >::iterator new_end = remove_if(matches.begin(), matches.end(), mem_fun_ref(&vector<DMatch>::empty));
            matches.erase(new_end, matches.end());
Y
yao 已提交
1011 1012 1013 1014 1015
        }
    }
}

// radiusMatchSingle
1016 1017 1018 1019
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &query, const oclMat &train,
        oclMat &trainIdx,	oclMat &distance, oclMat &nMatches, float maxDistance, const oclMat &mask)
{
    if (query.empty() || train.empty())
Y
yao 已提交
1020 1021
        return;

Y
yao 已提交
1022 1023 1024 1025 1026
    // match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
    int callType = query.depth();
    char cvFuncName[] = "radiusMatchSingle";
    if (callType != 5)
        CV_ERROR(CV_UNSUPPORTED_FORMAT_ERR, "BruteForceMatch OpenCL only support float type query!\n");
Y
yao 已提交
1027

Y
yao 已提交
1028 1029
    if ((distType == 0 && callType == 1 ) || (distType == 1 && callType != 5) || (distType == 2 && (callType != 0
        || callType != 2 || callType != 4)))
Y
yao 已提交
1030
    {
Y
yao 已提交
1031 1032
        CV_ERROR(CV_UNSUPPORTED_DEPTH_ERR, "BruteForceMatch OpenCL only support float type query!\n");
    }
Y
yao 已提交
1033 1034 1035

    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
    CV_Assert(train.type() == query.type() && train.cols == query.cols);
Y
yao 已提交
1036
    CV_Assert(trainIdx.empty() || (trainIdx.rows == query.rows && trainIdx.size() == distance.size()));
Y
yao 已提交
1037

Y
yao 已提交
1038
    nMatches.create(1, query.rows, CV_32SC1);
Y
yao 已提交
1039 1040
    if (trainIdx.empty())
    {
Y
yao 已提交
1041 1042
        trainIdx.create(query.rows, std::max((train.rows/ 100), 10), CV_32SC1);
        distance.create(query.rows, std::max((train.rows/ 100), 10), CV_32FC1);
Y
yao 已提交
1043 1044 1045 1046
    }

    nMatches.setTo(Scalar::all(0));

Y
yao 已提交
1047 1048 1049
    matchDispatcher(query, train, maxDistance, mask, trainIdx, distance, nMatches, distType);
exit:
    return;
Y
yao 已提交
1050 1051
}

1052 1053 1054 1055
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches,
        vector< vector<DMatch> > &matches, bool compactResult)
{
    if (trainIdx.empty() || distance.empty() || nMatches.empty())
Y
yao 已提交
1056 1057 1058 1059 1060 1061 1062 1063 1064
        return;

    Mat trainIdxCPU(trainIdx);
    Mat distanceCPU(distance);
    Mat nMatchesCPU(nMatches);

    radiusMatchConvert(trainIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult);
}

1065 1066 1067 1068
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches,
        vector< vector<DMatch> > &matches, bool compactResult)
{
    if (trainIdx.empty() || distance.empty() || nMatches.empty())
Y
yao 已提交
1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079
        return;

    CV_Assert(trainIdx.type() == CV_32SC1);
    CV_Assert(distance.type() == CV_32FC1 && distance.size() == trainIdx.size());
    CV_Assert(nMatches.type() == CV_32SC1 && nMatches.cols == trainIdx.rows);

    const int nQuery = trainIdx.rows;

    matches.clear();
    matches.reserve(nQuery);

1080
    const int *nMatches_ptr = nMatches.ptr<int>();
Y
yao 已提交
1081 1082 1083

    for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
    {
1084 1085
        const int *trainIdx_ptr = trainIdx.ptr<int>(queryIdx);
        const float *distance_ptr = distance.ptr<float>(queryIdx);
Y
yao 已提交
1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096

        const int nMatches = std::min(nMatches_ptr[queryIdx], trainIdx.cols);

        if (nMatches == 0)
        {
            if (!compactResult)
                matches.push_back(vector<DMatch>());
            continue;
        }

        matches.push_back(vector<DMatch>(nMatches));
1097
        vector<DMatch> &curMatches = matches.back();
Y
yao 已提交
1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113

        for (int i = 0; i < nMatches; ++i, ++trainIdx_ptr, ++distance_ptr)
        {
            int trainIdx = *trainIdx_ptr;

            float distance = *distance_ptr;

            DMatch m(queryIdx, trainIdx, 0, distance);

            curMatches[i] = m;
        }

        sort(curMatches.begin(), curMatches.end());
    }
}

1114 1115 1116 1117
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &query, const oclMat &train, vector< vector<DMatch> > &matches,
        float maxDistance, const oclMat &mask, bool compactResult)
{
    oclMat trainIdx, distance, nMatches;
Y
yao 已提交
1118 1119 1120 1121
    radiusMatchSingle(query, train, trainIdx, distance, nMatches, maxDistance, mask);
    radiusMatchDownload(trainIdx, distance, nMatches, matches, compactResult);
}

1122
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
N
Niko 已提交
1123
        oclMat &nMatches, float /*maxDistance*/, const vector<oclMat> &masks)
1124 1125
{
    if (query.empty() || empty())
Y
yao 已提交
1126 1127
        return;

1128 1129
    typedef void (*caller_t)(const oclMat & query, const oclMat * trains, int n, float maxDistance, const oclMat * masks,
                             const oclMat & trainIdx, const oclMat & imgIdx, const oclMat & distance, const oclMat & nMatches);
Y
yao 已提交
1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154
#if 0
    static const caller_t callers[3][6] =
    {
        {
            ocl_matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
            ocl_matchL1_gpu<unsigned short>, matchL1_gpu<short>,
            ocl_matchL1_gpu<int>, matchL1_gpu<float>
        },
        {
            0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
            0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
            0/*matchL2_gpu<int>*/, ocl_matchL2_gpu<float>
        },
        {
            ocl_matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
            ocl_matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
            ocl_matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
        }
    };
#endif
    const int nQuery = query.rows;

    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
    CV_Assert(trainIdx.empty() || (trainIdx.rows == nQuery && trainIdx.size() == distance.size() && trainIdx.size() == imgIdx.size()));

1155
    nMatches.create(1, nQuery, CV_32SC1);
Y
yao 已提交
1156 1157
    if (trainIdx.empty())
    {
1158 1159 1160
        trainIdx.create(nQuery, std::max((nQuery / 100), 10), CV_32SC1);
        imgIdx.create(nQuery, std::max((nQuery / 100), 10), CV_32SC1);
        distance.create(nQuery, std::max((nQuery / 100), 10), CV_32FC1);
Y
yao 已提交
1161 1162 1163 1164 1165 1166 1167 1168 1169 1170
    }

    nMatches.setTo(Scalar::all(0));

    //caller_t func = callers[distType][query.depth()];
    //CV_Assert(func != 0);

    vector<oclMat> trains_(trainDescCollection.begin(), trainDescCollection.end());
    vector<oclMat> masks_(masks.begin(), masks.end());

1171 1172
    /*  func(query, &trains_[0], static_cast<int>(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0],
          trainIdx, imgIdx, distance, nMatches));*/
Y
yao 已提交
1173 1174
}

1175 1176
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance,
        const oclMat &nMatches, vector< vector<DMatch> > &matches, bool compactResult)
Y
yao 已提交
1177
{
1178
    if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty())
Y
yao 已提交
1179 1180 1181 1182 1183 1184 1185 1186 1187 1188
        return;

    Mat trainIdxCPU(trainIdx);
    Mat imgIdxCPU(imgIdx);
    Mat distanceCPU(distance);
    Mat nMatchesCPU(nMatches);

    radiusMatchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult);
}

1189 1190 1191 1192
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches,
        vector< vector<DMatch> > &matches, bool compactResult)
{
    if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty())
Y
yao 已提交
1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204
        return;

    CV_Assert(trainIdx.type() == CV_32SC1);
    CV_Assert(imgIdx.type() == CV_32SC1 && imgIdx.size() == trainIdx.size());
    CV_Assert(distance.type() == CV_32FC1 && distance.size() == trainIdx.size());
    CV_Assert(nMatches.type() == CV_32SC1 && nMatches.cols == trainIdx.rows);

    const int nQuery = trainIdx.rows;

    matches.clear();
    matches.reserve(nQuery);

1205
    const int *nMatches_ptr = nMatches.ptr<int>();
Y
yao 已提交
1206 1207 1208

    for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
    {
1209 1210 1211
        const int *trainIdx_ptr = trainIdx.ptr<int>(queryIdx);
        const int *imgIdx_ptr = imgIdx.ptr<int>(queryIdx);
        const float *distance_ptr = distance.ptr<float>(queryIdx);
Y
yao 已提交
1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222

        const int nMatches = std::min(nMatches_ptr[queryIdx], trainIdx.cols);

        if (nMatches == 0)
        {
            if (!compactResult)
                matches.push_back(vector<DMatch>());
            continue;
        }

        matches.push_back(vector<DMatch>());
1223
        vector<DMatch> &curMatches = matches.back();
Y
yao 已提交
1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240
        curMatches.reserve(nMatches);

        for (int i = 0; i < nMatches; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)
        {
            int trainIdx = *trainIdx_ptr;
            int imgIdx = *imgIdx_ptr;
            float distance = *distance_ptr;

            DMatch m(queryIdx, trainIdx, imgIdx, distance);

            curMatches.push_back(m);
        }

        sort(curMatches.begin(), curMatches.end());
    }
}

1241 1242
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &query, vector< vector<DMatch> > &matches, float maxDistance,
        const vector<oclMat> &masks, bool compactResult)
Y
yao 已提交
1243
{
1244
    oclMat trainIdx, imgIdx, distance, nMatches;
Y
yao 已提交
1245 1246
    radiusMatchCollection(query, trainIdx, imgIdx, distance, nMatches, maxDistance, masks);
    radiusMatchDownload(trainIdx, imgIdx, distance, nMatches, matches, compactResult);
1247
}