stereobm.cpp 48.5 KB
Newer Older
1 2 3 4 5 6 7 8 9
//M*//////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
V
Vadim Pisarevsky 已提交
10
//                          License Agreement
11 12 13
//                For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
V
Vadim Pisarevsky 已提交
14
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
15 16 17 18 19 20 21 22 23 24 25 26
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
V
Vadim Pisarevsky 已提交
27
//   * The name of the copyright holders may not be used to endorse or promote products
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

/****************************************************************************************\
*    Very fast SAD-based (Sum-of-Absolute-Diffrences) stereo correspondence algorithm.   *
*    Contributed by Kurt Konolige                                                        *
\****************************************************************************************/

#include "precomp.hpp"
S
Stefano Fabri 已提交
49
#include <stdio.h>
50
#include <limits>
A
Alexander Alekhin 已提交
51
#include "opencl_kernels_calib3d.hpp"
52
#include "opencv2/core/hal/intrin.hpp"
53

V
Vadim Pisarevsky 已提交
54
namespace cv
55 56
{

V
Vadim Pisarevsky 已提交
57
struct StereoBMParams
58
{
59
    StereoBMParams(int _numDisparities=64, int _SADWindowSize=21)
V
Vadim Pisarevsky 已提交
60
    {
61
        preFilterType = StereoBM::PREFILTER_XSOBEL;
V
Vadim Pisarevsky 已提交
62 63 64 65 66 67 68 69 70 71 72 73
        preFilterSize = 9;
        preFilterCap = 31;
        SADWindowSize = _SADWindowSize;
        minDisparity = 0;
        numDisparities = _numDisparities > 0 ? _numDisparities : 64;
        textureThreshold = 10;
        uniquenessRatio = 15;
        speckleRange = speckleWindowSize = 0;
        roi1 = roi2 = Rect(0,0,0,0);
        disp12MaxDiff = -1;
        dispType = CV_16S;
    }
74

V
Vadim Pisarevsky 已提交
75 76 77 78 79 80 81 82 83 84 85 86 87 88
    int preFilterType;
    int preFilterSize;
    int preFilterCap;
    int SADWindowSize;
    int minDisparity;
    int numDisparities;
    int textureThreshold;
    int uniquenessRatio;
    int speckleRange;
    int speckleWindowSize;
    Rect roi1, roi2;
    int disp12MaxDiff;
    int dispType;
};
89

90
#ifdef HAVE_OPENCL
K
Konstantin Matskevich 已提交
91 92
static bool ocl_prefilter_norm(InputArray _input, OutputArray _output, int winsize, int prefilterCap)
{
93
    ocl::Kernel k("prefilter_norm", ocl::calib3d::stereobm_oclsrc, cv::format("-D WSZ=%d", winsize));
K
Konstantin Matskevich 已提交
94 95 96 97 98 99 100 101 102 103
    if(k.empty())
        return false;

    int scale_g = winsize*winsize/8, scale_s = (1024 + scale_g)/(scale_g*2);
    scale_g *= scale_s;

    UMat input = _input.getUMat(), output;
    _output.create(input.size(), input.type());
    output = _output.getUMat();

104
    size_t globalThreads[3] = { (size_t)input.cols, (size_t)input.rows, 1 };
K
Konstantin Matskevich 已提交
105 106

    k.args(ocl::KernelArg::PtrReadOnly(input), ocl::KernelArg::PtrWriteOnly(output), input.rows, input.cols,
107
        prefilterCap, scale_g, scale_s);
K
Konstantin Matskevich 已提交
108 109 110

    return k.run(2, globalThreads, NULL, false);
}
111
#endif
112 113 114 115 116 117 118 119

static void prefilterNorm( const Mat& src, Mat& dst, int winsize, int ftzero, uchar* buf )
{
    int x, y, wsz2 = winsize/2;
    int* vsum = (int*)alignPtr(buf + (wsz2 + 1)*sizeof(vsum[0]), 32);
    int scale_g = winsize*winsize/8, scale_s = (1024 + scale_g)/(scale_g*2);
    const int OFS = 256*5, TABSZ = OFS*2 + 256;
    uchar tab[TABSZ];
120
    const uchar* sptr = src.ptr();
121
    int srcstep = (int)src.step;
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
    Size size = src.size();

    scale_g *= scale_s;

    for( x = 0; x < TABSZ; x++ )
        tab[x] = (uchar)(x - OFS < -ftzero ? 0 : x - OFS > ftzero ? ftzero*2 : x - OFS + ftzero);

    for( x = 0; x < size.width; x++ )
        vsum[x] = (ushort)(sptr[x]*(wsz2 + 2));

    for( y = 1; y < wsz2; y++ )
    {
        for( x = 0; x < size.width; x++ )
            vsum[x] = (ushort)(vsum[x] + sptr[srcstep*y + x]);
    }

    for( y = 0; y < size.height; y++ )
    {
        const uchar* top = sptr + srcstep*MAX(y-wsz2-1,0);
        const uchar* bottom = sptr + srcstep*MIN(y+wsz2,size.height-1);
        const uchar* prev = sptr + srcstep*MAX(y-1,0);
        const uchar* curr = sptr + srcstep*y;
        const uchar* next = sptr + srcstep*MIN(y+1,size.height-1);
        uchar* dptr = dst.ptr<uchar>(y);
Z
cleanup  
ziyangli 已提交
146

Z
Ziyang LI 已提交
147
        for( x = 0; x < size.width; x++ )
148
            vsum[x] = (ushort)(vsum[x] + bottom[x] - top[x]);
Z
cleanup  
ziyangli 已提交
149

150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
        for( x = 0; x <= wsz2; x++ )
        {
            vsum[-x-1] = vsum[0];
            vsum[size.width+x] = vsum[size.width-1];
        }

        int sum = vsum[0]*(wsz2 + 1);
        for( x = 1; x <= wsz2; x++ )
            sum += vsum[x];

        int val = ((curr[0]*5 + curr[1] + prev[0] + next[0])*scale_g - sum*scale_s) >> 10;
        dptr[0] = tab[val + OFS];

        for( x = 1; x < size.width-1; x++ )
        {
            sum += vsum[x+wsz2] - vsum[x-wsz2-1];
            val = ((curr[x]*4 + curr[x-1] + curr[x+1] + prev[x] + next[x])*scale_g - sum*scale_s) >> 10;
            dptr[x] = tab[val + OFS];
        }
A
Andrey Kamaev 已提交
169

170 171 172 173 174 175
        sum += vsum[x+wsz2] - vsum[x-wsz2-1];
        val = ((curr[x]*5 + curr[x-1] + prev[x] + next[x])*scale_g - sum*scale_s) >> 10;
        dptr[x] = tab[val + OFS];
    }
}

176
#ifdef HAVE_OPENCL
K
Konstantin Matskevich 已提交
177 178 179 180 181 182 183 184 185 186
static bool ocl_prefilter_xsobel(InputArray _input, OutputArray _output, int prefilterCap)
{
    ocl::Kernel k("prefilter_xsobel", ocl::calib3d::stereobm_oclsrc);
    if(k.empty())
        return false;

    UMat input = _input.getUMat(), output;
    _output.create(input.size(), input.type());
    output = _output.getUMat();

187
    size_t globalThreads[3] = { (size_t)input.cols, (size_t)input.rows, 1 };
K
Konstantin Matskevich 已提交
188 189 190

    k.args(ocl::KernelArg::PtrReadOnly(input), ocl::KernelArg::PtrWriteOnly(output), input.rows, input.cols, prefilterCap);

K
Konstantin Matskevich 已提交
191
    return k.run(2, globalThreads, NULL, false);
K
Konstantin Matskevich 已提交
192
}
193
#endif
194 195 196 197 198 199 200 201

static void
prefilterXSobel( const Mat& src, Mat& dst, int ftzero )
{
    int x, y;
    const int OFS = 256*4, TABSZ = OFS*2 + 256;
    uchar tab[TABSZ];
    Size size = src.size();
A
Andrey Kamaev 已提交
202

203 204 205
    for( x = 0; x < TABSZ; x++ )
        tab[x] = (uchar)(x - OFS < -ftzero ? 0 : x - OFS > ftzero ? ftzero*2 : x - OFS + ftzero);
    uchar val0 = tab[0 + OFS];
A
Andrey Kamaev 已提交
206

207 208
#if CV_SIMD128
    bool useSIMD = hasSIMD128();
209
#endif
A
Andrey Kamaev 已提交
210

211 212 213 214 215 216 217 218
    for( y = 0; y < size.height-1; y += 2 )
    {
        const uchar* srow1 = src.ptr<uchar>(y);
        const uchar* srow0 = y > 0 ? srow1 - src.step : size.height > 1 ? srow1 + src.step : srow1;
        const uchar* srow2 = y < size.height-1 ? srow1 + src.step : size.height > 1 ? srow1 - src.step : srow1;
        const uchar* srow3 = y < size.height-2 ? srow1 + src.step*2 : srow1;
        uchar* dptr0 = dst.ptr<uchar>(y);
        uchar* dptr1 = dptr0 + dst.step;
A
Andrey Kamaev 已提交
219

220 221
        dptr0[0] = dptr0[size.width-1] = dptr1[0] = dptr1[size.width-1] = val0;
        x = 1;
A
Andrey Kamaev 已提交
222

223
#if CV_SIMD128
224 225
        if( useSIMD )
        {
226 227 228
            v_int16x8 ftz = v_setall_s16((short) ftzero);
            v_int16x8 ftz2 = v_setall_s16((short)(ftzero*2));
            v_int16x8 z = v_setzero_s16();
A
Andrey Kamaev 已提交
229

230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
            for(; x <= size.width-8; x += 8 )
            {
                v_int16x8 s00 = v_reinterpret_as_s16(v_load_expand(srow0 + x + 1));
                v_int16x8 s01 = v_reinterpret_as_s16(v_load_expand(srow0 + x - 1));
                v_int16x8 s10 = v_reinterpret_as_s16(v_load_expand(srow1 + x + 1));
                v_int16x8 s11 = v_reinterpret_as_s16(v_load_expand(srow1 + x - 1));
                v_int16x8 s20 = v_reinterpret_as_s16(v_load_expand(srow2 + x + 1));
                v_int16x8 s21 = v_reinterpret_as_s16(v_load_expand(srow2 + x - 1));
                v_int16x8 s30 = v_reinterpret_as_s16(v_load_expand(srow3 + x + 1));
                v_int16x8 s31 = v_reinterpret_as_s16(v_load_expand(srow3 + x - 1));

                v_int16x8 d0 = s00 - s01;
                v_int16x8 d1 = s10 - s11;
                v_int16x8 d2 = s20 - s21;
                v_int16x8 d3 = s30 - s31;

                v_uint16x8 v0 = v_reinterpret_as_u16(v_max(v_min(d0 + d1 + d1 + d2 + ftz, ftz2), z));
                v_uint16x8 v1 = v_reinterpret_as_u16(v_max(v_min(d1 + d2 + d2 + d3 + ftz, ftz2), z));

                v_pack_store(dptr0 + x, v0);
                v_pack_store(dptr1 + x, v1);
251 252
            }
        }
V
Vadim Pisarevsky 已提交
253
#endif
A
Andrey Kamaev 已提交
254

255 256 257
        for( ; x < size.width-1; x++ )
        {
            int d0 = srow0[x+1] - srow0[x-1], d1 = srow1[x+1] - srow1[x-1],
V
Vadim Pisarevsky 已提交
258
            d2 = srow2[x+1] - srow2[x-1], d3 = srow3[x+1] - srow3[x-1];
259 260 261 262 263 264
            int v0 = tab[d0 + d1*2 + d2 + OFS];
            int v1 = tab[d1 + d2*2 + d3 + OFS];
            dptr0[x] = (uchar)v0;
            dptr1[x] = (uchar)v1;
        }
    }
A
Andrey Kamaev 已提交
265

266 267 268
    for( ; y < size.height; y++ )
    {
        uchar* dptr = dst.ptr<uchar>(y);
269
        x = 0;
270 271 272 273 274 275 276 277
#if CV_SIMD128
        if( useSIMD )
        {
            v_uint8x16 val0_16 = v_setall_u8(val0);
            for(; x <= size.width-16; x+=16 )
                v_store(dptr + x, val0_16);
        }
#endif
278
        for(; x < size.width; x++ )
279 280 281 282 283
            dptr[x] = val0;
    }
}


L
Leonardo Brás 已提交
284 285
static const int DISPARITY_SHIFT_16S = 4;
static const int DISPARITY_SHIFT_32S = 8;
286

287 288
#if CV_SIMD128
static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right,
V
Vadim Pisarevsky 已提交
289 290
                                            Mat& disp, Mat& cost, StereoBMParams& state,
                                            uchar* buf, int _dy0, int _dy1 )
291 292 293 294 295
{
    const int ALIGN = 16;
    int x, y, d;
    int wsz = state.SADWindowSize, wsz2 = wsz/2;
    int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1);
V
Vadim Pisarevsky 已提交
296
    int ndisp = state.numDisparities;
297 298 299 300 301 302 303
    int mindisp = state.minDisparity;
    int lofs = MAX(ndisp - 1 + mindisp, 0);
    int rofs = -MIN(ndisp - 1 + mindisp, 0);
    int width = left.cols, height = left.rows;
    int width1 = width - rofs - ndisp + 1;
    int ftzero = state.preFilterCap;
    int textureThreshold = state.textureThreshold;
304
    int uniquenessRatio = state.uniquenessRatio;
305
    short FILTERED = (short)((mindisp - 1) << DISPARITY_SHIFT_16S);
306 307 308 309

    ushort *sad, *hsad0, *hsad, *hsad_sub;
    int *htext;
    uchar *cbuf0, *cbuf;
310 311
    const uchar* lptr0 = left.ptr() + lofs;
    const uchar* rptr0 = right.ptr() + rofs;
312
    const uchar *lptr, *lptr_sub, *rptr;
313
    short* dptr = disp.ptr<short>();
314 315
    int sstep = (int)left.step;
    int dstep = (int)(disp.step/sizeof(dptr[0]));
316 317
    int cstep = (height + dy0 + dy1)*ndisp;
    short costbuf = 0;
318
    int coststep = cost.data ? (int)(cost.step/sizeof(costbuf)) : 0;
319 320
    const int TABSZ = 256;
    uchar tab[TABSZ];
321
    const v_int16x8 d0_8 = v_int16x8(0,1,2,3,4,5,6,7), dd_8 = v_setall_s16(8);
322 323 324 325

    sad = (ushort*)alignPtr(buf + sizeof(sad[0]), ALIGN);
    hsad0 = (ushort*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN);
    htext = (int*)alignPtr((int*)(hsad0 + (height+dy1)*ndisp) + wsz2 + 2, ALIGN);
326
    cbuf0 = (uchar*)alignPtr((uchar*)(htext + height + wsz2 + 2) + dy0*ndisp, ALIGN);
327 328 329 330 331 332 333 334 335 336 337 338

    for( x = 0; x < TABSZ; x++ )
        tab[x] = (uchar)std::abs(x - ftzero);

    // initialize buffers
    memset( hsad0 - dy0*ndisp, 0, (height + dy0 + dy1)*ndisp*sizeof(hsad0[0]) );
    memset( htext - wsz2 - 1, 0, (height + wsz + 1)*sizeof(htext[0]) );

    for( x = -wsz2-1; x < wsz2; x++ )
    {
        hsad = hsad0 - dy0*ndisp; cbuf = cbuf0 + (x + wsz2 + 1)*cstep - dy0*ndisp;
        lptr = lptr0 + MIN(MAX(x, -lofs), width-lofs-1) - dy0*sstep;
339
        rptr = rptr0 + MIN(MAX(x, -rofs), width-rofs-ndisp) - dy0*sstep;
340 341 342 343

        for( y = -dy0; y < height + dy1; y++, hsad += ndisp, cbuf += ndisp, lptr += sstep, rptr += sstep )
        {
            int lval = lptr[0];
344
            v_uint8x16 lv = v_setall_u8((uchar)lval);
345 346
            for( d = 0; d < ndisp; d += 16 )
            {
347 348 349 350 351 352 353 354 355 356 357
                v_uint8x16 rv = v_load(rptr + d);
                v_uint16x8 hsad_l = v_load(hsad + d);
                v_uint16x8 hsad_h = v_load(hsad + d + 8);
                v_uint8x16 diff = v_absdiff(lv, rv);
                v_store(cbuf + d, diff);
                v_uint16x8 diff0, diff1;
                v_expand(diff, diff0, diff1);
                hsad_l += diff0;
                hsad_h += diff1;
                v_store(hsad + d, hsad_l);
                v_store(hsad + d + 8, hsad_h);
358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
            }
            htext[y] += tab[lval];
        }
    }

    // initialize the left and right borders of the disparity map
    for( y = 0; y < height; y++ )
    {
        for( x = 0; x < lofs; x++ )
            dptr[y*dstep + x] = FILTERED;
        for( x = lofs + width1; x < width; x++ )
            dptr[y*dstep + x] = FILTERED;
    }
    dptr += lofs;

    for( x = 0; x < width1; x++, dptr++ )
    {
375
        short* costptr = cost.data ? cost.ptr<short>() + lofs + x : &costbuf;
376 377
        int x0 = x - wsz2 - 1, x1 = x + wsz2;
        const uchar* cbuf_sub = cbuf0 + ((x0 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp;
A
Andrey Kamaev 已提交
378
        cbuf = cbuf0 + ((x1 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp;
379 380 381
        hsad = hsad0 - dy0*ndisp;
        lptr_sub = lptr0 + MIN(MAX(x0, -lofs), width-1-lofs) - dy0*sstep;
        lptr = lptr0 + MIN(MAX(x1, -lofs), width-1-lofs) - dy0*sstep;
382
        rptr = rptr0 + MIN(MAX(x1, -rofs), width-ndisp-rofs) - dy0*sstep;
383 384

        for( y = -dy0; y < height + dy1; y++, cbuf += ndisp, cbuf_sub += ndisp,
V
Vadim Pisarevsky 已提交
385
            hsad += ndisp, lptr += sstep, lptr_sub += sstep, rptr += sstep )
386 387
        {
            int lval = lptr[0];
388
            v_uint8x16 lv = v_setall_u8((uchar)lval);
389 390
            for( d = 0; d < ndisp; d += 16 )
            {
391 392 393 394 395 396 397 398 399 400 401 402 403 404 405
                v_uint8x16 rv = v_load(rptr + d);
                v_uint16x8 hsad_l = v_load(hsad + d);
                v_uint16x8 hsad_h = v_load(hsad + d + 8);
                v_uint8x16 cbs = v_load(cbuf_sub + d);
                v_uint8x16 diff = v_absdiff(lv, rv);
                v_int16x8 diff_l, diff_h, cbs_l, cbs_h;
                v_store(cbuf + d, diff);
                v_expand(v_reinterpret_as_s8(diff), diff_l, diff_h);
                v_expand(v_reinterpret_as_s8(cbs), cbs_l, cbs_h);
                diff_l -= cbs_l;
                diff_h -= cbs_h;
                hsad_h = v_reinterpret_as_u16(v_reinterpret_as_s16(hsad_h) + diff_h);
                hsad_l = v_reinterpret_as_u16(v_reinterpret_as_s16(hsad_l) + diff_l);
                v_store(hsad + d, hsad_l);
                v_store(hsad + d + 8, hsad_h);
406 407 408 409 410 411 412 413 414 415 416 417 418
            }
            htext[y] += tab[lval] - tab[lptr_sub[0]];
        }

        // fill borders
        for( y = dy1; y <= wsz2; y++ )
            htext[height+y] = htext[height+dy1-1];
        for( y = -wsz2-1; y < -dy0; y++ )
            htext[y] = htext[-dy0];

        // initialize sums
        for( d = 0; d < ndisp; d++ )
            sad[d] = (ushort)(hsad0[d-ndisp*dy0]*(wsz2 + 2 - dy0));
A
Andrey Kamaev 已提交
419

420 421
        hsad = hsad0 + (1 - dy0)*ndisp;
        for( y = 1 - dy0; y < wsz2; y++, hsad += ndisp )
422
            for( d = 0; d <= ndisp-16; d += 16 )
423
            {
424 425 426 427 428 429 430 431
                v_uint16x8 s0 = v_load(sad + d);
                v_uint16x8 s1 = v_load(sad + d + 8);
                v_uint16x8 t0 = v_load(hsad + d);
                v_uint16x8 t1 = v_load(hsad + d + 8);
                s0 = s0 + t0;
                s1 = s1 + t1;
                v_store(sad + d, s0);
                v_store(sad + d + 8, s1);
432 433 434 435 436 437 438 439 440 441 442
            }
        int tsum = 0;
        for( y = -wsz2-1; y < wsz2; y++ )
            tsum += htext[y];

        // finally, start the real processing
        for( y = 0; y < height; y++ )
        {
            int minsad = INT_MAX, mind = -1;
            hsad = hsad0 + MIN(y + wsz2, height+dy1-1)*ndisp;
            hsad_sub = hsad0 + MAX(y - wsz2 - 1, -dy0)*ndisp;
443 444
            v_int16x8 minsad8 = v_setall_s16(SHRT_MAX);
            v_int16x8 mind8 = v_setall_s16(0), d8 = d0_8;
445 446 447

            for( d = 0; d < ndisp; d += 16 )
            {
448 449
                v_int16x8 u0 = v_reinterpret_as_s16(v_load(hsad_sub + d));
                v_int16x8 u1 = v_reinterpret_as_s16(v_load(hsad + d));
A
Andrey Kamaev 已提交
450

451 452
                v_int16x8 v0 = v_reinterpret_as_s16(v_load(hsad_sub + d + 8));
                v_int16x8 v1 = v_reinterpret_as_s16(v_load(hsad + d + 8));
A
Andrey Kamaev 已提交
453

454 455
                v_int16x8 usad8 = v_reinterpret_as_s16(v_load(sad + d));
                v_int16x8 vsad8 = v_reinterpret_as_s16(v_load(sad + d + 8));
A
Andrey Kamaev 已提交
456

457 458 459 460
                u1 -= u0;
                v1 -= v0;
                usad8 += u1;
                vsad8 += v1;
A
Andrey Kamaev 已提交
461

462 463 464
                v_int16x8 mask = minsad8 > usad8;
                minsad8 = v_min(minsad8, usad8);
                mind8 = v_max(mind8, (mask& d8));
A
Andrey Kamaev 已提交
465

466 467
                v_store(sad + d, v_reinterpret_as_u16(usad8));
                v_store(sad + d + 8, v_reinterpret_as_u16(vsad8));
A
Andrey Kamaev 已提交
468

469 470
                mask = minsad8 > vsad8;
                minsad8 = v_min(minsad8, vsad8);
A
Andrey Kamaev 已提交
471

472 473 474
                d8 = d8 + dd_8;
                mind8 = v_max(mind8, (mask & d8));
                d8 = d8 + dd_8;
475 476 477 478 479 480 481 482
            }

            tsum += htext[y + wsz2] - htext[y - wsz2 - 1];
            if( tsum < textureThreshold )
            {
                dptr[y*dstep] = FILTERED;
                continue;
            }
A
Andrey Kamaev 已提交
483

484
            ushort CV_DECL_ALIGNED(16) minsad_buf[8], mind_buf[8];
485 486
            v_store(minsad_buf, v_reinterpret_as_u16(minsad8));
            v_store(mind_buf, v_reinterpret_as_u16(mind8));
487 488 489 490 491 492
            for( d = 0; d < 8; d++ )
                if(minsad > (int)minsad_buf[d] || (minsad == (int)minsad_buf[d] && mind > mind_buf[d]))
                {
                    minsad = minsad_buf[d];
                    mind = mind_buf[d];
                }
A
Andrey Kamaev 已提交
493

494 495
            if( uniquenessRatio > 0 )
            {
496
                int thresh = minsad + (minsad * uniquenessRatio/100);
497 498 499 500 501
                v_int32x4 thresh4 = v_setall_s32(thresh + 1);
                v_int32x4 d1 = v_setall_s32(mind-1), d2 = v_setall_s32(mind+1);
                v_int32x4 dd_4 = v_setall_s32(4);
                v_int32x4 d4 = v_int32x4(0,1,2,3);
                v_int32x4 mask4;
502

503
                for( d = 0; d < ndisp; d += 8 )
504
                {
505 506 507 508 509 510
                    v_int16x8 sad8 = v_reinterpret_as_s16(v_load(sad + d));
                    v_int32x4 sad4_l, sad4_h;
                    v_expand(sad8, sad4_l, sad4_h);
                    mask4 = thresh4 > sad4_l;
                    mask4 = mask4 & ((d1 > d4) | (d4 > d2));
                    if( v_signmask(mask4) )
511
                        break;
512 513 514 515
                    d4 += dd_4;
                    mask4 = thresh4 > sad4_h;
                    mask4 = mask4 & ((d1 > d4) | (d4 > d2));
                    if( v_signmask(mask4) )
516
                        break;
517
                    d4 += dd_4;
518 519 520 521 522 523 524 525 526 527
                }
                if( d < ndisp )
                {
                    dptr[y*dstep] = FILTERED;
                    continue;
                }
            }

            if( 0 < mind && mind < ndisp - 1 )
            {
A
Andrey Kamaev 已提交
528 529
                int p = sad[mind+1], n = sad[mind-1];
                d = p + n - 2*sad[mind] + std::abs(p - n);
530
                dptr[y*dstep] = (short)(((ndisp - mind - 1 + mindisp)*256 + (d != 0 ? (p-n)*256/d : 0) + 15) >> 4);
531 532
            }
            else
533
                dptr[y*dstep] = (short)((ndisp - mind - 1 + mindisp)*16);
534 535 536 537 538 539
            costptr[y*coststep] = sad[mind];
        }
    }
}
#endif

L
Leonardo Brás 已提交
540
template <typename mType>
541
static void
542
findStereoCorrespondenceBM( const Mat& left, const Mat& right,
V
Vadim Pisarevsky 已提交
543
                           Mat& disp, Mat& cost, const StereoBMParams& state,
544
                           uchar* buf, int _dy0, int _dy1, const int disp_shift )
545
{
546

547 548 549 550
    const int ALIGN = 16;
    int x, y, d;
    int wsz = state.SADWindowSize, wsz2 = wsz/2;
    int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1);
V
Vadim Pisarevsky 已提交
551
    int ndisp = state.numDisparities;
552 553 554 555 556 557 558 559
    int mindisp = state.minDisparity;
    int lofs = MAX(ndisp - 1 + mindisp, 0);
    int rofs = -MIN(ndisp - 1 + mindisp, 0);
    int width = left.cols, height = left.rows;
    int width1 = width - rofs - ndisp + 1;
    int ftzero = state.preFilterCap;
    int textureThreshold = state.textureThreshold;
    int uniquenessRatio = state.uniquenessRatio;
560
    mType FILTERED = (mType)((mindisp - 1) << disp_shift);
561

562 563 564 565 566 567
#if CV_SIMD128
    bool useSIMD = hasSIMD128();
    if( useSIMD )
    {
        CV_Assert (ndisp % 8 == 0);
    }
568 569
#endif

570 571
    int *sad, *hsad0, *hsad, *hsad_sub, *htext;
    uchar *cbuf0, *cbuf;
572 573
    const uchar* lptr0 = left.ptr() + lofs;
    const uchar* rptr0 = right.ptr() + rofs;
574
    const uchar *lptr, *lptr_sub, *rptr;
L
Leonardo Brás 已提交
575
    mType* dptr = disp.ptr<mType>();
576 577
    int sstep = (int)left.step;
    int dstep = (int)(disp.step/sizeof(dptr[0]));
578 579
    int cstep = (height+dy0+dy1)*ndisp;
    int costbuf = 0;
580
    int coststep = cost.data ? (int)(cost.step/sizeof(costbuf)) : 0;
581 582 583 584 585 586
    const int TABSZ = 256;
    uchar tab[TABSZ];

    sad = (int*)alignPtr(buf + sizeof(sad[0]), ALIGN);
    hsad0 = (int*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN);
    htext = (int*)alignPtr((int*)(hsad0 + (height+dy1)*ndisp) + wsz2 + 2, ALIGN);
587
    cbuf0 = (uchar*)alignPtr((uchar*)(htext + height + wsz2 + 2) + dy0*ndisp, ALIGN);
588 589 590 591 592 593 594 595 596 597 598

    for( x = 0; x < TABSZ; x++ )
        tab[x] = (uchar)std::abs(x - ftzero);

    // initialize buffers
    memset( hsad0 - dy0*ndisp, 0, (height + dy0 + dy1)*ndisp*sizeof(hsad0[0]) );
    memset( htext - wsz2 - 1, 0, (height + wsz + 1)*sizeof(htext[0]) );

    for( x = -wsz2-1; x < wsz2; x++ )
    {
        hsad = hsad0 - dy0*ndisp; cbuf = cbuf0 + (x + wsz2 + 1)*cstep - dy0*ndisp;
599
        lptr = lptr0 + std::min(std::max(x, -lofs), width-lofs-1) - dy0*sstep;
600
        rptr = rptr0 + std::min(std::max(x, -rofs), width-rofs-ndisp) - dy0*sstep;
601 602 603
        for( y = -dy0; y < height + dy1; y++, hsad += ndisp, cbuf += ndisp, lptr += sstep, rptr += sstep )
        {
            int lval = lptr[0];
604 605 606
            d = 0;
#if CV_SIMD128
            if( useSIMD )
607
            {
608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635
                v_uint8x16 lv = v_setall_u8((uchar)lval);

                for( ; d <= ndisp - 16; d += 16 )
                {
                    v_uint8x16 rv = v_load(rptr + d);
                    v_int32x4 hsad_0 = v_load(hsad + d);
                    v_int32x4 hsad_1 = v_load(hsad + d + 4);
                    v_int32x4 hsad_2 = v_load(hsad + d + 8);
                    v_int32x4 hsad_3 = v_load(hsad + d + 12);
                    v_uint8x16 diff = v_absdiff(lv, rv);
                    v_store(cbuf + d, diff);

                    v_uint16x8 diff0, diff1;
                    v_uint32x4 diff00, diff01, diff10, diff11;
                    v_expand(diff, diff0, diff1);
                    v_expand(diff0, diff00, diff01);
                    v_expand(diff1, diff10, diff11);

                    hsad_0 += v_reinterpret_as_s32(diff00);
                    hsad_1 += v_reinterpret_as_s32(diff01);
                    hsad_2 += v_reinterpret_as_s32(diff10);
                    hsad_3 += v_reinterpret_as_s32(diff11);

                    v_store(hsad + d, hsad_0);
                    v_store(hsad + d + 4, hsad_1);
                    v_store(hsad + d + 8, hsad_2);
                    v_store(hsad + d + 12, hsad_3);
                }
636
            }
637 638
#endif
            for( ; d < ndisp; d++ )
639 640 641 642 643 644 645 646
            {
                int diff = std::abs(lval - rptr[d]);
                cbuf[d] = (uchar)diff;
                hsad[d] = (int)(hsad[d] + diff);
            }
            htext[y] += tab[lval];
        }
    }
A
Andrey Kamaev 已提交
647

648 649 650 651 652 653 654 655 656 657 658 659
    // initialize the left and right borders of the disparity map
    for( y = 0; y < height; y++ )
    {
        for( x = 0; x < lofs; x++ )
            dptr[y*dstep + x] = FILTERED;
        for( x = lofs + width1; x < width; x++ )
            dptr[y*dstep + x] = FILTERED;
    }
    dptr += lofs;

    for( x = 0; x < width1; x++, dptr++ )
    {
660
        int* costptr = cost.data ? cost.ptr<int>() + lofs + x : &costbuf;
661 662
        int x0 = x - wsz2 - 1, x1 = x + wsz2;
        const uchar* cbuf_sub = cbuf0 + ((x0 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp;
A
Andrey Kamaev 已提交
663
        cbuf = cbuf0 + ((x1 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp;
664 665 666
        hsad = hsad0 - dy0*ndisp;
        lptr_sub = lptr0 + MIN(MAX(x0, -lofs), width-1-lofs) - dy0*sstep;
        lptr = lptr0 + MIN(MAX(x1, -lofs), width-1-lofs) - dy0*sstep;
667
        rptr = rptr0 + MIN(MAX(x1, -rofs), width-ndisp-rofs) - dy0*sstep;
668 669

        for( y = -dy0; y < height + dy1; y++, cbuf += ndisp, cbuf_sub += ndisp,
V
Vadim Pisarevsky 已提交
670
            hsad += ndisp, lptr += sstep, lptr_sub += sstep, rptr += sstep )
671 672
        {
            int lval = lptr[0];
673 674 675
            d = 0;
#if CV_SIMD128
            if( useSIMD )
676
            {
677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711
                v_uint8x16 lv = v_setall_u8((uchar)lval);
                for( ; d <= ndisp - 16; d += 16 )
                {
                    v_uint8x16 rv = v_load(rptr + d);
                    v_int32x4 hsad_0 = v_load(hsad + d);
                    v_int32x4 hsad_1 = v_load(hsad + d + 4);
                    v_int32x4 hsad_2 = v_load(hsad + d + 8);
                    v_int32x4 hsad_3 = v_load(hsad + d + 12);
                    v_uint8x16 cbs = v_load(cbuf_sub + d);
                    v_uint8x16 diff = v_absdiff(lv, rv);
                    v_store(cbuf + d, diff);

                    v_uint16x8 diff0, diff1, cbs0, cbs1;
                    v_int32x4 diff00, diff01, diff10, diff11, cbs00, cbs01, cbs10, cbs11;
                    v_expand(diff, diff0, diff1);
                    v_expand(cbs, cbs0, cbs1);
                    v_expand(v_reinterpret_as_s16(diff0), diff00, diff01);
                    v_expand(v_reinterpret_as_s16(diff1), diff10, diff11);
                    v_expand(v_reinterpret_as_s16(cbs0), cbs00, cbs01);
                    v_expand(v_reinterpret_as_s16(cbs1), cbs10, cbs11);

                    v_int32x4 diff_0 = diff00 - cbs00;
                    v_int32x4 diff_1 = diff01 - cbs01;
                    v_int32x4 diff_2 = diff10 - cbs10;
                    v_int32x4 diff_3 = diff11 - cbs11;
                    hsad_0 += diff_0;
                    hsad_1 += diff_1;
                    hsad_2 += diff_2;
                    hsad_3 += diff_3;

                    v_store(hsad + d, hsad_0);
                    v_store(hsad + d + 4, hsad_1);
                    v_store(hsad + d + 8, hsad_2);
                    v_store(hsad + d + 12, hsad_3);
                }
712
            }
713 714
#endif
            for( ; d < ndisp; d++ )
715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731
            {
                int diff = std::abs(lval - rptr[d]);
                cbuf[d] = (uchar)diff;
                hsad[d] = hsad[d] + diff - cbuf_sub[d];
            }
            htext[y] += tab[lval] - tab[lptr_sub[0]];
        }

        // fill borders
        for( y = dy1; y <= wsz2; y++ )
            htext[height+y] = htext[height+dy1-1];
        for( y = -wsz2-1; y < -dy0; y++ )
            htext[y] = htext[-dy0];

        // initialize sums
        for( d = 0; d < ndisp; d++ )
            sad[d] = (int)(hsad0[d-ndisp*dy0]*(wsz2 + 2 - dy0));
A
Andrey Kamaev 已提交
732

733 734
        hsad = hsad0 + (1 - dy0)*ndisp;
        for( y = 1 - dy0; y < wsz2; y++, hsad += ndisp )
735
        {
736 737 738
            d = 0;
#if CV_SIMD128
            if( useSIMD )
739
            {
740 741 742 743 744 745 746 747 748 749 750
                for( d = 0; d <= ndisp-8; d += 8 )
                {
                    v_int32x4 s0 = v_load(sad + d);
                    v_int32x4 s1 = v_load(sad + d + 4);
                    v_int32x4 t0 = v_load(hsad + d);
                    v_int32x4 t1 = v_load(hsad + d + 4);
                    s0 += t0;
                    s1 += t1;
                    v_store(sad + d, s0);
                    v_store(sad + d + 4, s1);
                }
751
            }
752 753
#endif
            for( ; d < ndisp; d++ )
754
                sad[d] = (int)(sad[d] + hsad[d]);
755
        }
756 757 758 759 760 761 762 763 764 765
        int tsum = 0;
        for( y = -wsz2-1; y < wsz2; y++ )
            tsum += htext[y];

        // finally, start the real processing
        for( y = 0; y < height; y++ )
        {
            int minsad = INT_MAX, mind = -1;
            hsad = hsad0 + MIN(y + wsz2, height+dy1-1)*ndisp;
            hsad_sub = hsad0 + MAX(y - wsz2 - 1, -dy0)*ndisp;
766 767 768
            d = 0;
#if CV_SIMD128
            if( useSIMD )
769
            {
770 771 772 773
                v_int32x4 d0_4 = v_int32x4(0, 1, 2, 3);
                v_int32x4 dd_4 = v_setall_s32(4);
                v_int32x4 minsad4 = v_setall_s32(INT_MAX);
                v_int32x4 mind4 = v_setall_s32(0), d4 = d0_4;
774

775 776 777 778
                for( ; d <= ndisp - 8; d += 8 )
                {
                    v_int32x4 u0 = v_load(hsad_sub + d);
                    v_int32x4 u1 = v_load(hsad + d);
779

780 781
                    v_int32x4 v0 = v_load(hsad_sub + d + 4);
                    v_int32x4 v1 = v_load(hsad + d + 4);
782

783 784
                    v_int32x4 usad4 = v_load(sad + d);
                    v_int32x4 vsad4 = v_load(sad + d + 4);
785

786 787 788 789
                    u1 -= u0;
                    v1 -= v0;
                    usad4 += u1;
                    vsad4 += v1;
790

791 792
                    v_store(sad + d, usad4);
                    v_store(sad + d + 4, vsad4);
793

794 795 796 797
                    v_int32x4 mask = minsad4 > usad4;
                    minsad4 = v_min(minsad4, usad4);
                    mind4 = v_select(mask, d4, mind4);
                    d4 += dd_4;
798

799 800 801 802 803
                    mask = minsad4 > vsad4;
                    minsad4 = v_min(minsad4, vsad4);
                    mind4 = v_select(mask, d4, mind4);
                    d4 += dd_4;
                }
804

805 806 807 808 809 810 811
                int CV_DECL_ALIGNED(16) minsad_buf[4], mind_buf[4];
                v_store(minsad_buf, minsad4);
                v_store(mind_buf, mind4);
                if(minsad_buf[0] < minsad || (minsad == minsad_buf[0] && mind_buf[0] < mind)) { minsad = minsad_buf[0]; mind = mind_buf[0]; }
                if(minsad_buf[1] < minsad || (minsad == minsad_buf[1] && mind_buf[1] < mind)) { minsad = minsad_buf[1]; mind = mind_buf[1]; }
                if(minsad_buf[2] < minsad || (minsad == minsad_buf[2] && mind_buf[2] < mind)) { minsad = minsad_buf[2]; mind = mind_buf[2]; }
                if(minsad_buf[3] < minsad || (minsad == minsad_buf[3] && mind_buf[3] < mind)) { minsad = minsad_buf[3]; mind = mind_buf[3]; }
812
            }
813 814
#endif
            for( ; d < ndisp; d++ )
815 816 817 818 819 820 821 822 823
            {
                int currsad = sad[d] + hsad[d] - hsad_sub[d];
                sad[d] = currsad;
                if( currsad < minsad )
                {
                    minsad = currsad;
                    mind = d;
                }
            }
K
Konstantin Matskevich 已提交
824

825 826 827 828 829 830 831 832 833 834 835 836
            tsum += htext[y + wsz2] - htext[y - wsz2 - 1];
            if( tsum < textureThreshold )
            {
                dptr[y*dstep] = FILTERED;
                continue;
            }

            if( uniquenessRatio > 0 )
            {
                int thresh = minsad + (minsad * uniquenessRatio/100);
                for( d = 0; d < ndisp; d++ )
                {
J
Julien Nabet 已提交
837
                    if( (d < mind-1 || d > mind+1) && sad[d] <= thresh)
838 839 840 841 842 843 844 845 846 847
                        break;
                }
                if( d < ndisp )
                {
                    dptr[y*dstep] = FILTERED;
                    continue;
                }
            }

            {
V
Vadim Pisarevsky 已提交
848 849 850 851
                sad[-1] = sad[1];
                sad[ndisp] = sad[ndisp-2];
                int p = sad[mind+1], n = sad[mind-1];
                d = p + n - 2*sad[mind] + std::abs(p - n);
L
Leonardo Brás 已提交
852
                dptr[y*dstep] = (mType)(((ndisp - mind - 1 + mindisp)*256 + (d != 0 ? (p-n)*256/d : 0) + 15)
853
                                 >> (DISPARITY_SHIFT_32S - disp_shift));
V
Vadim Pisarevsky 已提交
854
                costptr[y*coststep] = sad[mind];
855 856 857 858 859
            }
        }
    }
}

860
#ifdef HAVE_OPENCL
K
Konstantin Matskevich 已提交
861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878
static bool ocl_prefiltering(InputArray left0, InputArray right0, OutputArray left, OutputArray right, StereoBMParams* state)
{
    if( state->preFilterType == StereoBM::PREFILTER_NORMALIZED_RESPONSE )
    {
        if(!ocl_prefilter_norm( left0, left, state->preFilterSize, state->preFilterCap))
            return false;
        if(!ocl_prefilter_norm( right0, right, state->preFilterSize, state->preFilterCap))
            return false;
    }
    else
    {
        if(!ocl_prefilter_xsobel( left0, left, state->preFilterCap ))
            return false;
        if(!ocl_prefilter_xsobel( right0, right, state->preFilterCap))
            return false;
    }
    return true;
}
879
#endif
K
Konstantin Matskevich 已提交
880

V
Vadim Pisarevsky 已提交
881
struct PrefilterInvoker : public ParallelLoopBody
882 883
{
    PrefilterInvoker(const Mat& left0, const Mat& right0, Mat& left, Mat& right,
V
Vadim Pisarevsky 已提交
884
                     uchar* buf0, uchar* buf1, StereoBMParams* _state)
885 886 887 888 889 890 891
    {
        imgs0[0] = &left0; imgs0[1] = &right0;
        imgs[0] = &left; imgs[1] = &right;
        buf[0] = buf0; buf[1] = buf1;
        state = _state;
    }

V
Vadim Pisarevsky 已提交
892
    void operator()( const Range& range ) const
893
    {
V
Vadim Pisarevsky 已提交
894 895
        for( int i = range.start; i < range.end; i++ )
        {
896
            if( state->preFilterType == StereoBM::PREFILTER_NORMALIZED_RESPONSE )
V
Vadim Pisarevsky 已提交
897 898 899 900
                prefilterNorm( *imgs0[i], *imgs[i], state->preFilterSize, state->preFilterCap, buf[i] );
            else
                prefilterXSobel( *imgs0[i], *imgs[i], state->preFilterCap );
        }
901
    }
A
Andrey Kamaev 已提交
902

903
    const Mat* imgs0[2];
A
Andrey Kamaev 已提交
904
    Mat* imgs[2];
905
    uchar* buf[2];
V
Vadim Pisarevsky 已提交
906
    StereoBMParams* state;
907 908
};

909
#ifdef HAVE_OPENCL
K
Konstantin Matskevich 已提交
910
static bool ocl_stereobm( InputArray _left, InputArray _right,
K
Konstantin Matskevich 已提交
911
                       OutputArray _disp, StereoBMParams* state)
K
Konstantin Matskevich 已提交
912
{
K
Konstantin Matskevich 已提交
913
    int ndisp = state->numDisparities;
K
Konstantin Matskevich 已提交
914
    int mindisp = state->minDisparity;
K
Konstantin Matskevich 已提交
915
    int wsz = state->SADWindowSize;
K
Konstantin Matskevich 已提交
916 917
    int wsz2 = wsz/2;

918 919 920 921
    ocl::Device devDef = ocl::Device::getDefault();
    int sizeX = devDef.isIntel() ? 32 : std::max(11, 27 - devDef.maxComputeUnits()),
        sizeY = sizeX - 1,
        N = ndisp * 2;
K
Konstantin Matskevich 已提交
922

923 924 925 926 927
    cv::String opt = cv::format("-D DEFINE_KERNEL_STEREOBM -D MIN_DISP=%d -D NUM_DISP=%d"
                                " -D BLOCK_SIZE_X=%d -D BLOCK_SIZE_Y=%d -D WSZ=%d",
                                mindisp, ndisp,
                                sizeX, sizeY, wsz);
    ocl::Kernel k("stereoBM", ocl::calib3d::stereobm_oclsrc, opt);
K
Konstantin Matskevich 已提交
928 929 930 931
    if(k.empty())
        return false;

    UMat left = _left.getUMat(), right = _right.getUMat();
K
Konstantin Matskevich 已提交
932 933
    int cols = left.cols, rows = left.rows;

K
Konstantin Matskevich 已提交
934
    _disp.create(_left.size(), CV_16S);
935
    _disp.setTo((mindisp - 1) << 4);
K
temp  
Konstantin Matskevich 已提交
936
    Rect roi = Rect(Point(wsz2 + mindisp + ndisp - 1, wsz2), Point(cols-wsz2-mindisp, rows-wsz2) );
K
Konstantin Matskevich 已提交
937
    UMat disp = (_disp.getUMat())(roi);
K
Konstantin Matskevich 已提交
938

939 940
    int globalX = (disp.cols + sizeX - 1) / sizeX,
        globalY = (disp.rows + sizeY - 1) / sizeY;
941 942
    size_t globalThreads[3] = {(size_t)N, (size_t)globalX, (size_t)globalY};
    size_t localThreads[3]  = {(size_t)N, 1, 1};
K
Konstantin Matskevich 已提交
943 944 945 946

    int idx = 0;
    idx = k.set(idx, ocl::KernelArg::PtrReadOnly(left));
    idx = k.set(idx, ocl::KernelArg::PtrReadOnly(right));
K
Konstantin Matskevich 已提交
947 948 949
    idx = k.set(idx, ocl::KernelArg::WriteOnlyNoSize(disp));
    idx = k.set(idx, rows);
    idx = k.set(idx, cols);
K
Konstantin Matskevich 已提交
950 951
    idx = k.set(idx, state->textureThreshold);
    idx = k.set(idx, state->uniquenessRatio);
K
Konstantin Matskevich 已提交
952
    return k.run(3, globalThreads, localThreads, false);
K
Konstantin Matskevich 已提交
953
}
954
#endif
K
Konstantin Matskevich 已提交
955

V
Vadim Pisarevsky 已提交
956
struct FindStereoCorrespInvoker : public ParallelLoopBody
957 958
{
    FindStereoCorrespInvoker( const Mat& _left, const Mat& _right,
V
Vadim Pisarevsky 已提交
959 960 961 962
                             Mat& _disp, StereoBMParams* _state,
                             int _nstripes, size_t _stripeBufSize,
                             bool _useShorts, Rect _validDisparityRect,
                             Mat& _slidingSumBuf, Mat& _cost )
963
    {
964
        CV_Assert( _disp.type() == CV_16S || _disp.type() == CV_32S );
965 966 967 968 969
        left = &_left; right = &_right;
        disp = &_disp; state = _state;
        nstripes = _nstripes; stripeBufSize = _stripeBufSize;
        useShorts = _useShorts;
        validDisparityRect = _validDisparityRect;
V
Vadim Pisarevsky 已提交
970 971
        slidingSumBuf = &_slidingSumBuf;
        cost = &_cost;
972 973 974
#if CV_SIMD128
        useSIMD = hasSIMD128();
#endif
975
    }
A
Andrey Kamaev 已提交
976

V
Vadim Pisarevsky 已提交
977
    void operator()( const Range& range ) const
978 979
    {
        int cols = left->cols, rows = left->rows;
V
Vadim Pisarevsky 已提交
980 981
        int _row0 = std::min(cvRound(range.start * rows / nstripes), rows);
        int _row1 = std::min(cvRound(range.end * rows / nstripes), rows);
982
        uchar *ptr = slidingSumBuf->ptr() + range.start * stripeBufSize;
983
        int FILTERED = (state->minDisparity - 1)*16;
A
Andrey Kamaev 已提交
984

985
        Rect roi = validDisparityRect & Rect(0, _row0, cols, _row1 - _row0);
986 987 988 989
        if( roi.height == 0 )
            return;
        int row0 = roi.y;
        int row1 = roi.y + roi.height;
A
Andrey Kamaev 已提交
990

991 992 993 994 995 996 997 998 999 1000 1001
        Mat part;
        if( row0 > _row0 )
        {
            part = disp->rowRange(_row0, row0);
            part = Scalar::all(FILTERED);
        }
        if( _row1 > row1 )
        {
            part = disp->rowRange(row1, _row1);
            part = Scalar::all(FILTERED);
        }
A
Andrey Kamaev 已提交
1002

1003 1004 1005
        Mat left_i = left->rowRange(row0, row1);
        Mat right_i = right->rowRange(row0, row1);
        Mat disp_i = disp->rowRange(row0, row1);
V
Vadim Pisarevsky 已提交
1006
        Mat cost_i = state->disp12MaxDiff >= 0 ? cost->rowRange(row0, row1) : Mat();
A
Andrey Kamaev 已提交
1007

1008 1009 1010 1011 1012
#if CV_SIMD128
        if( useSIMD && useShorts )
        {
            findStereoCorrespondenceBM_SIMD( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1 );
        }
1013
        else
A
Andrey Kamaev 已提交
1014
#endif
1015 1016 1017 1018 1019 1020
        {
            if( disp_i.type() == CV_16S )
                findStereoCorrespondenceBM<short>( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1, DISPARITY_SHIFT_16S );
            else
                findStereoCorrespondenceBM<int>( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1, DISPARITY_SHIFT_32S );
        }
A
Andrey Kamaev 已提交
1021

1022
        if( state->disp12MaxDiff >= 0 )
V
Vadim Pisarevsky 已提交
1023
            validateDisparity( disp_i, cost_i, state->minDisparity, state->numDisparities, state->disp12MaxDiff );
A
Andrey Kamaev 已提交
1024

1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038
        if( roi.x > 0 )
        {
            part = disp_i.colRange(0, roi.x);
            part = Scalar::all(FILTERED);
        }
        if( roi.x + roi.width < cols )
        {
            part = disp_i.colRange(roi.x + roi.width, cols);
            part = Scalar::all(FILTERED);
        }
    }

protected:
    const Mat *left, *right;
V
Vadim Pisarevsky 已提交
1039 1040
    Mat* disp, *slidingSumBuf, *cost;
    StereoBMParams *state;
A
Andrey Kamaev 已提交
1041

1042
    int nstripes;
V
Vadim Pisarevsky 已提交
1043
    size_t stripeBufSize;
1044 1045
    bool useShorts;
    Rect validDisparityRect;
1046
    bool useSIMD;
1047 1048
};

1049
class StereoBMImpl : public StereoBM
A
Andrey Kamaev 已提交
1050
{
V
Vadim Pisarevsky 已提交
1051 1052 1053 1054 1055
public:
    StereoBMImpl()
    {
        params = StereoBMParams();
    }
1056

1057
    StereoBMImpl( int _numDisparities, int _SADWindowSize )
V
Vadim Pisarevsky 已提交
1058
    {
1059
        params = StereoBMParams(_numDisparities, _SADWindowSize);
V
Vadim Pisarevsky 已提交
1060
    }
1061

V
Vadim Pisarevsky 已提交
1062 1063
    void compute( InputArray leftarr, InputArray rightarr, OutputArray disparr )
    {
1064 1065
        CV_INSTRUMENT_REGION()

V
Vadim Pisarevsky 已提交
1066
        int dtype = disparr.fixedType() ? disparr.type() : params.dispType;
K
Konstantin Matskevich 已提交
1067
        Size leftsize = leftarr.size();
1068

K
Konstantin Matskevich 已提交
1069
        if (leftarr.size() != rightarr.size())
1070
            CV_Error( Error::StsUnmatchedSizes, "All the images must have the same size" );
1071

K
Konstantin Matskevich 已提交
1072
        if (leftarr.type() != CV_8UC1 || rightarr.type() != CV_8UC1)
1073
            CV_Error( Error::StsUnsupportedFormat, "Both input images must have CV_8UC1" );
1074

V
Vadim Pisarevsky 已提交
1075
        if (dtype != CV_16SC1 && dtype != CV_32FC1)
1076
            CV_Error( Error::StsUnsupportedFormat, "Disparity image must have CV_16SC1 or CV_32FC1 format" );
1077

1078 1079
        if( params.preFilterType != PREFILTER_NORMALIZED_RESPONSE &&
            params.preFilterType != PREFILTER_XSOBEL )
1080
            CV_Error( Error::StsOutOfRange, "preFilterType must be = CV_STEREO_BM_NORMALIZED_RESPONSE" );
1081

V
Vadim Pisarevsky 已提交
1082
        if( params.preFilterSize < 5 || params.preFilterSize > 255 || params.preFilterSize % 2 == 0 )
1083
            CV_Error( Error::StsOutOfRange, "preFilterSize must be odd and be within 5..255" );
1084

V
Vadim Pisarevsky 已提交
1085
        if( params.preFilterCap < 1 || params.preFilterCap > 63 )
1086
            CV_Error( Error::StsOutOfRange, "preFilterCap must be within 1..63" );
1087

V
Vadim Pisarevsky 已提交
1088
        if( params.SADWindowSize < 5 || params.SADWindowSize > 255 || params.SADWindowSize % 2 == 0 ||
K
Konstantin Matskevich 已提交
1089
            params.SADWindowSize >= std::min(leftsize.width, leftsize.height) )
1090
            CV_Error( Error::StsOutOfRange, "SADWindowSize must be odd, be within 5..255 and be not larger than image width or height" );
1091

V
Vadim Pisarevsky 已提交
1092
        if( params.numDisparities <= 0 || params.numDisparities % 16 != 0 )
1093
            CV_Error( Error::StsOutOfRange, "numDisparities must be positive and divisble by 16" );
A
Andrey Kamaev 已提交
1094

V
Vadim Pisarevsky 已提交
1095
        if( params.textureThreshold < 0 )
1096
            CV_Error( Error::StsOutOfRange, "texture threshold must be non-negative" );
1097

V
Vadim Pisarevsky 已提交
1098
        if( params.uniquenessRatio < 0 )
1099
            CV_Error( Error::StsOutOfRange, "uniqueness ratio must be non-negative" );
A
Andrey Kamaev 已提交
1100

Y
Your Name 已提交
1101 1102 1103 1104 1105
        int disp_shift;
        if (dtype == CV_16SC1)
            disp_shift = DISPARITY_SHIFT_16S;
        else
            disp_shift = DISPARITY_SHIFT_32S;
1106 1107


Y
Your Name 已提交
1108
        int FILTERED = (params.minDisparity - 1) << disp_shift;
K
Konstantin Matskevich 已提交
1109

1110
#ifdef HAVE_OPENCL
K
Konstantin Matskevich 已提交
1111
        if(ocl::useOpenCL() && disparr.isUMat() && params.textureThreshold == 0)
K
Konstantin Matskevich 已提交
1112 1113
        {
            UMat left, right;
K
Konstantin Matskevich 已提交
1114 1115 1116 1117 1118 1119 1120
            if(ocl_prefiltering(leftarr, rightarr, left, right, &params))
            {
                if(ocl_stereobm(left, right, disparr, &params))
                {
                    if( params.speckleRange >= 0 && params.speckleWindowSize > 0 )
                        filterSpeckles(disparr.getMat(), FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf);
                    if (dtype == CV_32F)
Y
Your Name 已提交
1121
                        disparr.getUMat().convertTo(disparr, CV_32FC1, 1./(1 << disp_shift), 0);
1122
                    CV_IMPL_ADD(CV_IMPL_OCL);
K
Konstantin Matskevich 已提交
1123 1124 1125
                    return;
                }
            }
K
Konstantin Matskevich 已提交
1126
        }
1127
#endif
K
Konstantin Matskevich 已提交
1128 1129 1130 1131 1132

        Mat left0 = leftarr.getMat(), right0 = rightarr.getMat();
        disparr.create(left0.size(), dtype);
        Mat disp0 = disparr.getMat();

V
Vadim Pisarevsky 已提交
1133 1134 1135
        preFilteredImg0.create( left0.size(), CV_8U );
        preFilteredImg1.create( left0.size(), CV_8U );
        cost.create( left0.size(), CV_16S );
1136

V
Vadim Pisarevsky 已提交
1137
        Mat left = preFilteredImg0, right = preFilteredImg1;
A
Andrey Kamaev 已提交
1138

V
Vadim Pisarevsky 已提交
1139 1140
        int mindisp = params.minDisparity;
        int ndisp = params.numDisparities;
1141

V
Vadim Pisarevsky 已提交
1142 1143 1144 1145 1146
        int width = left0.cols;
        int height = left0.rows;
        int lofs = std::max(ndisp - 1 + mindisp, 0);
        int rofs = -std::min(ndisp - 1 + mindisp, 0);
        int width1 = width - rofs - ndisp + 1;
A
Andrey Kamaev 已提交
1147

V
Vadim Pisarevsky 已提交
1148
        if( lofs >= width || rofs >= width || width1 < 1 )
1149
        {
Y
Your Name 已提交
1150
            disp0 = Scalar::all( FILTERED * ( disp0.type() < CV_32F ? 1 : 1./(1 << disp_shift) ) );
V
Vadim Pisarevsky 已提交
1151
            return;
1152
        }
A
Andrey Kamaev 已提交
1153

V
Vadim Pisarevsky 已提交
1154 1155 1156
        Mat disp = disp0;
        if( dtype == CV_32F )
        {
L
Leonardo Brás 已提交
1157
            dispbuf.create(disp0.size(), CV_32S);
V
Vadim Pisarevsky 已提交
1158 1159
            disp = dispbuf;
        }
1160

V
Vadim Pisarevsky 已提交
1161 1162 1163 1164 1165
        int wsz = params.SADWindowSize;
        int bufSize0 = (int)((ndisp + 2)*sizeof(int));
        bufSize0 += (int)((height+wsz+2)*ndisp*sizeof(int));
        bufSize0 += (int)((height + wsz + 2)*sizeof(int));
        bufSize0 += (int)((height+wsz+2)*ndisp*(wsz+2)*sizeof(uchar) + 256);
A
Andrey Kamaev 已提交
1166

V
Vadim Pisarevsky 已提交
1167 1168 1169 1170
        int bufSize1 = (int)((width + params.preFilterSize + 2) * sizeof(int) + 256);
        int bufSize2 = 0;
        if( params.speckleRange >= 0 && params.speckleWindowSize > 0 )
            bufSize2 = width*height*(sizeof(Point_<short>) + sizeof(int) + sizeof(uchar));
A
Andrey Kamaev 已提交
1171

1172
        bool useShorts = params.preFilterCap <= 31 && params.SADWindowSize <= 21;
V
Vadim Pisarevsky 已提交
1173 1174 1175 1176 1177 1178 1179 1180 1181
        const double SAD_overhead_coeff = 10.0;
        double N0 = 8000000 / (useShorts ? 1 : 4);  // approx tbb's min number instructions reasonable for one thread
        double maxStripeSize = std::min(std::max(N0 / (width * ndisp), (wsz-1) * SAD_overhead_coeff), (double)height);
        int nstripes = cvCeil(height / maxStripeSize);
        int bufSize = std::max(bufSize0 * nstripes, std::max(bufSize1 * 2, bufSize2));

        if( slidingSumBuf.cols < bufSize )
            slidingSumBuf.create( 1, bufSize, CV_8U );

1182
        uchar *_buf = slidingSumBuf.ptr();
K
Konstantin Matskevich 已提交
1183

V
Vadim Pisarevsky 已提交
1184 1185 1186
        parallel_for_(Range(0, 2), PrefilterInvoker(left0, right0, left, right, _buf, _buf + bufSize1, &params), 1);

        Rect validDisparityRect(0, 0, width, height), R1 = params.roi1, R2 = params.roi2;
P
Paolo Perkovic 已提交
1187 1188
        validDisparityRect = getValidDisparityROI(R1.area() > 0 ? R1 : validDisparityRect,
                                                  R2.area() > 0 ? R2 : validDisparityRect,
V
Vadim Pisarevsky 已提交
1189 1190
                                                  params.minDisparity, params.numDisparities,
                                                  params.SADWindowSize);
1191

V
Vadim Pisarevsky 已提交
1192 1193 1194 1195
        parallel_for_(Range(0, nstripes),
                      FindStereoCorrespInvoker(left, right, disp, &params, nstripes,
                                               bufSize0, useShorts, validDisparityRect,
                                               slidingSumBuf, cost));
1196

V
Vadim Pisarevsky 已提交
1197 1198
        if( params.speckleRange >= 0 && params.speckleWindowSize > 0 )
            filterSpeckles(disp, FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf);
1199

V
Vadim Pisarevsky 已提交
1200
        if (disp0.data != disp.data)
Y
Your Name 已提交
1201
            disp.convertTo(disp0, disp0.type(), 1./(1 << disp_shift), 0);
1202
    }
A
Andrey Kamaev 已提交
1203

1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247
    int getMinDisparity() const { return params.minDisparity; }
    void setMinDisparity(int minDisparity) { params.minDisparity = minDisparity; }

    int getNumDisparities() const { return params.numDisparities; }
    void setNumDisparities(int numDisparities) { params.numDisparities = numDisparities; }

    int getBlockSize() const { return params.SADWindowSize; }
    void setBlockSize(int blockSize) { params.SADWindowSize = blockSize; }

    int getSpeckleWindowSize() const { return params.speckleWindowSize; }
    void setSpeckleWindowSize(int speckleWindowSize) { params.speckleWindowSize = speckleWindowSize; }

    int getSpeckleRange() const { return params.speckleRange; }
    void setSpeckleRange(int speckleRange) { params.speckleRange = speckleRange; }

    int getDisp12MaxDiff() const { return params.disp12MaxDiff; }
    void setDisp12MaxDiff(int disp12MaxDiff) { params.disp12MaxDiff = disp12MaxDiff; }

    int getPreFilterType() const { return params.preFilterType; }
    void setPreFilterType(int preFilterType) { params.preFilterType = preFilterType; }

    int getPreFilterSize() const { return params.preFilterSize; }
    void setPreFilterSize(int preFilterSize) { params.preFilterSize = preFilterSize; }

    int getPreFilterCap() const { return params.preFilterCap; }
    void setPreFilterCap(int preFilterCap) { params.preFilterCap = preFilterCap; }

    int getTextureThreshold() const { return params.textureThreshold; }
    void setTextureThreshold(int textureThreshold) { params.textureThreshold = textureThreshold; }

    int getUniquenessRatio() const { return params.uniquenessRatio; }
    void setUniquenessRatio(int uniquenessRatio) { params.uniquenessRatio = uniquenessRatio; }

    int getSmallerBlockSize() const { return 0; }
    void setSmallerBlockSize(int) {}

    Rect getROI1() const { return params.roi1; }
    void setROI1(Rect roi1) { params.roi1 = roi1; }

    Rect getROI2() const { return params.roi2; }
    void setROI2(Rect roi2) { params.roi2 = roi2; }

    void write(FileStorage& fs) const
    {
1248
        writeFormat(fs);
1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264
        fs << "name" << name_
        << "minDisparity" << params.minDisparity
        << "numDisparities" << params.numDisparities
        << "blockSize" << params.SADWindowSize
        << "speckleWindowSize" << params.speckleWindowSize
        << "speckleRange" << params.speckleRange
        << "disp12MaxDiff" << params.disp12MaxDiff
        << "preFilterType" << params.preFilterType
        << "preFilterSize" << params.preFilterSize
        << "preFilterCap" << params.preFilterCap
        << "textureThreshold" << params.textureThreshold
        << "uniquenessRatio" << params.uniquenessRatio;
    }

    void read(const FileNode& fn)
    {
1265
        FileNode n = fn["name"];
1266
        CV_Assert( n.isString() && String(n) == name_ );
1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279
        params.minDisparity = (int)fn["minDisparity"];
        params.numDisparities = (int)fn["numDisparities"];
        params.SADWindowSize = (int)fn["blockSize"];
        params.speckleWindowSize = (int)fn["speckleWindowSize"];
        params.speckleRange = (int)fn["speckleRange"];
        params.disp12MaxDiff = (int)fn["disp12MaxDiff"];
        params.preFilterType = (int)fn["preFilterType"];
        params.preFilterSize = (int)fn["preFilterSize"];
        params.preFilterCap = (int)fn["preFilterCap"];
        params.textureThreshold = (int)fn["textureThreshold"];
        params.uniquenessRatio = (int)fn["uniquenessRatio"];
        params.roi1 = params.roi2 = Rect();
    }
A
Andrey Kamaev 已提交
1280

V
Vadim Pisarevsky 已提交
1281 1282 1283
    StereoBMParams params;
    Mat preFilteredImg0, preFilteredImg1, cost, dispbuf;
    Mat slidingSumBuf;
1284 1285

    static const char* name_;
V
Vadim Pisarevsky 已提交
1286
};
1287

1288
const char* StereoBMImpl::name_ = "StereoMatcher.BM";
A
Andrey Kamaev 已提交
1289

1290
Ptr<StereoBM> StereoBM::create(int _numDisparities, int _SADWindowSize)
1291
{
R
Roman Donchenko 已提交
1292
    return makePtr<StereoBMImpl>(_numDisparities, _SADWindowSize);
1293 1294
}

1295 1296
}

1297
/* End of file. */