arithm.cl 15.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                           License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
//    Jia Haipeng, jiahaipeng95@gmail.com
//
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the copyright holders or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

/*
  Usage:
     after compiling this program user gets a single kernel called KF.
     the following flags should be passed:
     1) one of "-D BINARY_OP", "-D UNARY_OP", "-D MASK_BINARY_OP" or "-D MASK_UNARY_OP"
     2) the actual operation performed, one of "-D OP_...", see below the list of operations.
     2a) "-D dstDepth=<destination depth> [-D cn=<num channels]"
         for some operations, like min/max/and/or/xor it's enough
     2b) "-D srcDepth1=<source1 depth> -D srcDepth2=<source2 depth> -D dstDepth=<destination depth>
          -D workDepth=<work depth> [-D cn=<num channels>]" - for mixed-type operations
*/

I
Ilya Lavrenov 已提交
60
#ifdef DOUBLE_SUPPORT
I
Ilya Lavrenov 已提交
61
#ifdef cl_amd_fp64
62
#pragma OPENCL EXTENSION cl_amd_fp64:enable
I
Ilya Lavrenov 已提交
63 64
#elif defined cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
65
#endif
I
Ilya Lavrenov 已提交
66 67 68
#endif

#if depth <= 5
I
Ilya Lavrenov 已提交
69
#define CV_PI M_PI_F
I
Ilya Lavrenov 已提交
70 71
#else
#define CV_PI M_PI
72 73
#endif

74 75 76 77 78 79 80 81 82 83 84 85 86 87
#ifndef cn
#define cn 1
#endif

#if cn == 1
#undef srcT1_C1
#undef srcT2_C1
#undef dstT_C1
#define srcT1_C1 srcT1
#define srcT2_C1 srcT2
#define dstT_C1 dstT
#endif

#if cn != 3
I
Ilya Lavrenov 已提交
88 89
    #define storedst(val) *(__global dstT *)(dstptr + dst_index) = val
    #define storedst2(val) *(__global dstT *)(dstptr2 + dst_index2) = val
90
#else
I
Ilya Lavrenov 已提交
91 92
    #define storedst(val) vstore3(val, 0, (__global dstT_C1 *)(dstptr + dst_index))
    #define storedst2(val) vstore3(val, 0, (__global dstT_C1 *)(dstptr2 + dst_index2))
93 94
#endif

I
Ilya Lavrenov 已提交
95
#define noconvert
96 97 98

#ifndef workT

I
Ilya Lavrenov 已提交
99
    #ifndef srcT1
100
    #define srcT1 dstT
I
Ilya Lavrenov 已提交
101 102 103
    #endif

    #ifndef srcT1_C1
104
    #define srcT1_C1 dstT_C1
I
Ilya Lavrenov 已提交
105
    #endif
I
Ilya Lavrenov 已提交
106

I
Ilya Lavrenov 已提交
107
    #ifndef srcT2
108
    #define srcT2 dstT
I
Ilya Lavrenov 已提交
109 110 111
    #endif

    #ifndef srcT2_C1
112
    #define srcT2_C1 dstT_C1
I
Ilya Lavrenov 已提交
113
    #endif
I
Ilya Lavrenov 已提交
114

115
    #define workT dstT
116
    #if cn != 3
I
Ilya Lavrenov 已提交
117 118
        #define srcelem1 *(__global srcT1 *)(srcptr1 + src1_index)
        #define srcelem2 *(__global srcT2 *)(srcptr2 + src2_index)
119
    #else
I
Ilya Lavrenov 已提交
120 121
        #define srcelem1 vload3(0, (__global srcT1_C1 *)(srcptr1 + src1_index))
        #define srcelem2 vload3(0, (__global srcT2_C1 *)(srcptr2 + src2_index))
122
    #endif
I
Ilya Lavrenov 已提交
123
    #ifndef convertToDT
124
    #define convertToDT noconvert
I
Ilya Lavrenov 已提交
125
    #endif
126 127 128

#else

129 130 131
    #ifndef convertToWT2
    #define convertToWT2 convertToWT1
    #endif
132
    #if cn != 3
I
Ilya Lavrenov 已提交
133 134
        #define srcelem1 convertToWT1(*(__global srcT1 *)(srcptr1 + src1_index))
        #define srcelem2 convertToWT2(*(__global srcT2 *)(srcptr2 + src2_index))
135
    #else
I
Ilya Lavrenov 已提交
136 137
        #define srcelem1 convertToWT1(vload3(0, (__global srcT1_C1 *)(srcptr1 + src1_index)))
        #define srcelem2 convertToWT2(vload3(0, (__global srcT2_C1 *)(srcptr2 + src2_index)))
138 139 140
    #endif

#endif
141

142 143
#ifndef workST
#define workST workT
144 145 146
#endif

#define EXTRA_PARAMS
I
Ilya Lavrenov 已提交
147
#define EXTRA_INDEX
I
Ilya Lavrenov 已提交
148
#define EXTRA_INDEX_ADD
149

150
#if defined OP_ADD
151
#define PROCESS_ELEM storedst(convertToDT(srcelem1 + srcelem2))
152 153

#elif defined OP_SUB
154
#define PROCESS_ELEM storedst(convertToDT(srcelem1 - srcelem2))
155 156

#elif defined OP_RSUB
157
#define PROCESS_ELEM storedst(convertToDT(srcelem2 - srcelem1))
158 159

#elif defined OP_ABSDIFF
I
Ilya Lavrenov 已提交
160 161
#define PROCESS_ELEM \
    workT v = srcelem1 - srcelem2; \
162
    storedst(convertToDT(v >= (workT)(0) ? v : -v))
163 164

#elif defined OP_AND
165
#define PROCESS_ELEM storedst(srcelem1 & srcelem2)
166 167

#elif defined OP_OR
168
#define PROCESS_ELEM storedst(srcelem1 | srcelem2)
169 170

#elif defined OP_XOR
171
#define PROCESS_ELEM storedst(srcelem1 ^ srcelem2)
172 173

#elif defined OP_NOT
174
#define PROCESS_ELEM storedst(~srcelem1)
175 176

#elif defined OP_MIN
177
#define PROCESS_ELEM storedst(min(srcelem1, srcelem2))
178 179

#elif defined OP_MAX
180
#define PROCESS_ELEM storedst(max(srcelem1, srcelem2))
181 182

#elif defined OP_MUL
183
#define PROCESS_ELEM storedst(convertToDT(srcelem1 * srcelem2))
184 185 186

#elif defined OP_MUL_SCALE
#undef EXTRA_PARAMS
I
Ilya Lavrenov 已提交
187
#ifdef UNARY_OP
188 189 190
#define EXTRA_PARAMS , workST srcelem2_, scaleT scale
#undef srcelem2
#define srcelem2 srcelem2_
I
Ilya Lavrenov 已提交
191 192 193
#else
#define EXTRA_PARAMS , scaleT scale
#endif
194
#define PROCESS_ELEM storedst(convertToDT(srcelem1 * scale * srcelem2))
195 196 197 198

#elif defined OP_DIV
#define PROCESS_ELEM \
        workT e2 = srcelem2, zero = (workT)(0); \
199
        storedst(convertToDT(e2 != zero ? srcelem1 / e2 : zero))
200 201 202

#elif defined OP_DIV_SCALE
#undef EXTRA_PARAMS
I
Ilya Lavrenov 已提交
203
#ifdef UNARY_OP
204 205 206
#define EXTRA_PARAMS , workST srcelem2_, scaleT scale
#undef srcelem2
#define srcelem2 srcelem2_
I
Ilya Lavrenov 已提交
207 208 209
#else
#define EXTRA_PARAMS , scaleT scale
#endif
210 211
#define PROCESS_ELEM \
        workT e2 = srcelem2, zero = (workT)(0); \
212
        storedst(convertToDT(e2 == zero ? zero : (srcelem1 * (workT)(scale) / e2)))
I
Ilya Lavrenov 已提交
213 214 215 216

#elif defined OP_RDIV_SCALE
#undef EXTRA_PARAMS
#ifdef UNARY_OP
217 218 219
#define EXTRA_PARAMS , workST srcelem2_, scaleT scale
#undef srcelem2
#define srcelem2 srcelem2_
I
Ilya Lavrenov 已提交
220 221 222 223 224
#else
#define EXTRA_PARAMS , scaleT scale
#endif
#define PROCESS_ELEM \
        workT e1 = srcelem1, zero = (workT)(0); \
225
        storedst(convertToDT(e1 == zero ? zero : (srcelem2 * (workT)(scale) / e1)))
226 227 228

#elif defined OP_RECIP_SCALE
#undef EXTRA_PARAMS
I
Ilya Lavrenov 已提交
229
#define EXTRA_PARAMS , scaleT scale
230 231
#define PROCESS_ELEM \
        workT e1 = srcelem1, zero = (workT)(0); \
232
        storedst(convertToDT(e1 != zero ? scale / e1 : zero))
233 234 235

#elif defined OP_ADDW
#undef EXTRA_PARAMS
I
Ilya Lavrenov 已提交
236
#define EXTRA_PARAMS , scaleT alpha, scaleT beta, scaleT gamma
I
Ilya Lavrenov 已提交
237 238 239 240 241
#if wdepth <= 4
#define PROCESS_ELEM storedst(convertToDT(mad24(srcelem1, alpha, mad24(srcelem2, beta, gamma))))
#else
#define PROCESS_ELEM storedst(convertToDT(mad(srcelem1, alpha, mad(srcelem2, beta, gamma))))
#endif
242 243

#elif defined OP_MAG
244
#define PROCESS_ELEM storedst(hypot(srcelem1, srcelem2))
245

I
Ilya Lavrenov 已提交
246 247 248
#elif defined OP_ABS_NOSAT
#define PROCESS_ELEM \
    dstT v = convertToDT(srcelem1); \
249
    storedst(v >= 0 ? v : -v)
I
Ilya Lavrenov 已提交
250

251 252 253
#elif defined OP_PHASE_RADIANS
#define PROCESS_ELEM \
        workT tmp = atan2(srcelem2, srcelem1); \
254
        if(tmp < 0) tmp += 6.283185307179586232f; \
255
        storedst(tmp)
256 257 258

#elif defined OP_PHASE_DEGREES
    #define PROCESS_ELEM \
259
    workT tmp = atan2(srcelem2, srcelem1)*57.29577951308232286465f; \
260
    if(tmp < 0) tmp += 360; \
261
    storedst(tmp)
262 263

#elif defined OP_EXP
264
#define PROCESS_ELEM storedst(exp(srcelem1))
265

I
Ilya Lavrenov 已提交
266
#elif defined OP_POW
267
#define PROCESS_ELEM storedst(pow(srcelem1, srcelem2))
I
Ilya Lavrenov 已提交
268

I
Ilya Lavrenov 已提交
269 270 271 272 273 274 275 276 277 278
#elif defined OP_ROOTN
#define PROCESS_ELEM storedst(rootn(srcelem1, srcelem2))

#elif defined OP_POWR
#if depth == 5
#define PROCESS_ELEM storedst(native_powr(srcelem1, srcelem2))
#else
#define PROCESS_ELEM storedst(powr(srcelem1, srcelem2))
#endif

279 280 281
#elif defined OP_POWN
#undef workT
#define workT int
282
#define PROCESS_ELEM storedst(pown(srcelem1, srcelem2))
283

284
#elif defined OP_SQRT
285
#define PROCESS_ELEM storedst(sqrt(srcelem1))
286 287

#elif defined OP_LOG
288
#define PROCESS_ELEM \
289 290
    dstT v = (dstT)(srcelem1);\
    storedst(v > (dstT)(0) ? log(v) : log(-v))
291 292

#elif defined OP_CMP
I
Ilya Lavrenov 已提交
293
#define srcT2 srcT1
A
Alexander Alekhin 已提交
294
#ifndef convertToWT1
I
Ilya Lavrenov 已提交
295
#define convertToWT1
A
Alexander Alekhin 已提交
296 297 298 299 300
#endif
#define PROCESS_ELEM \
    workT __s1 = srcelem1; \
    workT __s2 = srcelem2; \
    storedst(((__s1 CMP_OPERATOR __s2) ? (dstT)(255) : (dstT)(0)))
301

302
#elif defined OP_CONVERT_SCALE_ABS
303
#undef EXTRA_PARAMS
I
Ilya Lavrenov 已提交
304
#define EXTRA_PARAMS , workT1 alpha, workT1 beta
I
Ilya Lavrenov 已提交
305
#if wdepth <= 4
306
#define PROCESS_ELEM \
I
Ilya Lavrenov 已提交
307
    workT value = mad24(srcelem1, (workT)(alpha), (workT)(beta)); \
308
    storedst(convertToDT(value >= 0 ? value : -value))
I
Ilya Lavrenov 已提交
309 310
#else
#define PROCESS_ELEM \
I
Ilya Lavrenov 已提交
311
    workT value = mad(srcelem1, (workT)(alpha), (workT)(beta)); \
I
Ilya Lavrenov 已提交
312 313
    storedst(convertToDT(value >= 0 ? value : -value))
#endif
314

315 316
#elif defined OP_SCALE_ADD
#undef EXTRA_PARAMS
I
Ilya Lavrenov 已提交
317
#define EXTRA_PARAMS , workT1 alpha
I
Ilya Lavrenov 已提交
318
#if wdepth <= 4
I
Ilya Lavrenov 已提交
319
#define PROCESS_ELEM storedst(convertToDT(mad24(srcelem1, (workT)(alpha), srcelem2)))
I
Ilya Lavrenov 已提交
320
#else
I
Ilya Lavrenov 已提交
321
#define PROCESS_ELEM storedst(convertToDT(mad(srcelem1, (workT)(alpha), srcelem2)))
I
Ilya Lavrenov 已提交
322
#endif
323

I
Ilya Lavrenov 已提交
324
#elif defined OP_CTP_AD || defined OP_CTP_AR
I
Ilya Lavrenov 已提交
325 326 327 328 329
#if depth <= 5
#define CV_EPSILON FLT_EPSILON
#else
#define CV_EPSILON DBL_EPSILON
#endif
I
Ilya Lavrenov 已提交
330 331 332 333 334 335 336 337 338 339 340 341
#ifdef OP_CTP_AD
#define TO_DEGREE cartToPolar *= (180 / CV_PI);
#elif defined OP_CTP_AR
#define TO_DEGREE
#endif
#define PROCESS_ELEM \
    dstT x = srcelem1, y = srcelem2; \
    dstT x2 = x * x, y2 = y * y; \
    dstT magnitude = sqrt(x2 + y2); \
    dstT tmp = y >= 0 ? 0 : CV_PI * 2; \
    tmp = x < 0 ? CV_PI : tmp; \
    dstT tmp1 = y >= 0 ? CV_PI * 0.5f : CV_PI * 1.5f; \
I
Ilya Lavrenov 已提交
342
    dstT cartToPolar = y2 <= x2 ? x * y / mad((dstT)(0.28f), y2, x2 + CV_EPSILON) + tmp : (tmp1 - x * y / mad((dstT)(0.28f), x2, y2 + CV_EPSILON)); \
I
Ilya Lavrenov 已提交
343
    TO_DEGREE \
344 345
    storedst(magnitude); \
    storedst2(cartToPolar)
I
Ilya Lavrenov 已提交
346

I
Ilya Lavrenov 已提交
347 348 349 350 351 352 353 354 355 356 357 358
#elif defined OP_PTC_AD || defined OP_PTC_AR
#ifdef OP_PTC_AD
#define FROM_DEGREE \
    dstT ascale = CV_PI/180.0f; \
    dstT alpha = y * ascale
#else
#define FROM_DEGREE \
    dstT alpha = y
#endif
#define PROCESS_ELEM \
    dstT x = srcelem1, y = srcelem2; \
    FROM_DEGREE; \
359 360
    storedst(cos(alpha) * x); \
    storedst2(sin(alpha) * x)
I
Ilya Lavrenov 已提交
361

362 363 364 365 366
#elif defined OP_PATCH_NANS
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , int val
#define PROCESS_ELEM \
    if (( srcelem1 & 0x7fffffff) > 0x7f800000 ) \
367
        storedst(val)
368

369 370 371 372
#else
#error "unknown op type"
#endif

I
Ilya Lavrenov 已提交
373
#if defined OP_CTP_AD || defined OP_CTP_AR || defined OP_PTC_AD || defined OP_PTC_AR
I
Ilya Lavrenov 已提交
374 375 376
    #undef EXTRA_PARAMS
    #define EXTRA_PARAMS , __global uchar* dstptr2, int dststep2, int dstoffset2
    #undef EXTRA_INDEX
I
Ilya Lavrenov 已提交
377 378 379
    #define EXTRA_INDEX int dst_index2 = mad24(y0, dststep2, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset2))
    #undef EXTRA_INDEX_ADD
    #define EXTRA_INDEX_ADD dst_index2 += dststep2
I
Ilya Lavrenov 已提交
380 381
#endif

382
#if defined UNARY_OP || defined MASK_UNARY_OP
383

384 385
#if defined OP_AND || defined OP_OR || defined OP_XOR || defined OP_ADD || defined OP_SAT_ADD || \
    defined OP_SUB || defined OP_SAT_SUB || defined OP_RSUB || defined OP_SAT_RSUB || \
I
Ilya Lavrenov 已提交
386
    defined OP_ABSDIFF || defined OP_CMP || defined OP_MIN || defined OP_MAX || defined OP_POW || \
I
Ilya Lavrenov 已提交
387
    defined OP_MUL || defined OP_DIV || defined OP_POWN || defined OP_POWR || defined OP_ROOTN
388
    #undef EXTRA_PARAMS
389 390 391
    #define EXTRA_PARAMS , workST srcelem2_
    #undef srcelem2
    #define srcelem2 srcelem2_
392
#endif
393 394 395 396 397 398

#if cn == 3
#undef srcelem2
#define srcelem2 (workT)(srcelem2_.x, srcelem2_.y, srcelem2_.z)
#endif

399 400 401 402
#endif

#if defined BINARY_OP

I
Ilya Lavrenov 已提交
403 404 405
__kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
                 __global const uchar * srcptr2, int srcstep2, int srcoffset2,
                 __global uchar * dstptr, int dststep, int dstoffset,
406 407 408
                 int rows, int cols EXTRA_PARAMS )
{
    int x = get_global_id(0);
I
Ilya Lavrenov 已提交
409
    int y0 = get_global_id(1) * rowsPerWI;
410

I
Ilya Lavrenov 已提交
411
    if (x < cols)
412
    {
I
Ilya Lavrenov 已提交
413
        int src1_index = mad24(y0, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
414
#if !(defined(OP_RECIP_SCALE) || defined(OP_NOT))
I
Ilya Lavrenov 已提交
415
        int src2_index = mad24(y0, srcstep2, mad24(x, (int)sizeof(srcT2_C1) * cn, srcoffset2));
416
#endif
I
Ilya Lavrenov 已提交
417
        int dst_index  = mad24(y0, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
I
Ilya Lavrenov 已提交
418
        EXTRA_INDEX;
419

I
Ilya Lavrenov 已提交
420 421 422 423 424 425 426 427
        for (int y = y0, y1 = min(rows, y0 + rowsPerWI); y < y1; ++y, src1_index += srcstep1, dst_index += dststep)
        {
            PROCESS_ELEM;
#if !(defined(OP_RECIP_SCALE) || defined(OP_NOT))
            src2_index += srcstep2;
#endif
            EXTRA_INDEX_ADD;
        }
428 429 430 431 432
    }
}

#elif defined MASK_BINARY_OP

I
Ilya Lavrenov 已提交
433 434 435 436
__kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
                 __global const uchar * srcptr2, int srcstep2, int srcoffset2,
                 __global const uchar * mask, int maskstep, int maskoffset,
                 __global uchar * dstptr, int dststep, int dstoffset,
437 438 439
                 int rows, int cols EXTRA_PARAMS )
{
    int x = get_global_id(0);
I
Ilya Lavrenov 已提交
440
    int y0 = get_global_id(1) * rowsPerWI;
441

I
Ilya Lavrenov 已提交
442
    if (x < cols)
443
    {
I
Ilya Lavrenov 已提交
444 445 446 447 448 449 450 451 452 453 454
        int mask_index = mad24(y0, maskstep, x + maskoffset);
        int src1_index = mad24(y0, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
        int src2_index = mad24(y0, srcstep2, mad24(x, (int)sizeof(srcT2_C1) * cn, srcoffset2));
        int dst_index  = mad24(y0, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));

        for (int y = y0, y1 = min(rows, y0 + rowsPerWI); y < y1; ++y, src1_index += srcstep1, src2_index += srcstep2,
                                                                mask_index += maskstep, dst_index += dststep)
            if (mask[mask_index])
            {
                PROCESS_ELEM;
            }
455 456 457 458 459
    }
}

#elif defined UNARY_OP

I
Ilya Lavrenov 已提交
460 461
__kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
                 __global uchar * dstptr, int dststep, int dstoffset,
462 463 464
                 int rows, int cols EXTRA_PARAMS )
{
    int x = get_global_id(0);
I
Ilya Lavrenov 已提交
465
    int y0 = get_global_id(1) * rowsPerWI;
466

I
Ilya Lavrenov 已提交
467
    if (x < cols)
468
    {
I
Ilya Lavrenov 已提交
469 470
        int src1_index = mad24(y0, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
        int dst_index  = mad24(y0, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
471

I
Ilya Lavrenov 已提交
472 473 474 475
        for (int y = y0, y1 = min(rows, y0 + rowsPerWI); y < y1; ++y, src1_index += srcstep1, dst_index += dststep)
        {
            PROCESS_ELEM;
        }
476 477 478 479 480
    }
}

#elif defined MASK_UNARY_OP

I
Ilya Lavrenov 已提交
481 482 483
__kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
                 __global const uchar * mask, int maskstep, int maskoffset,
                 __global uchar * dstptr, int dststep, int dstoffset,
484 485 486
                 int rows, int cols EXTRA_PARAMS )
{
    int x = get_global_id(0);
I
fix  
Ilya Lavrenov 已提交
487
    int y0 = get_global_id(1) * rowsPerWI;
488

I
Ilya Lavrenov 已提交
489
    if (x < cols)
490
    {
I
Ilya Lavrenov 已提交
491 492 493 494 495 496 497 498 499
        int mask_index = mad24(y0, maskstep, x + maskoffset);
        int src1_index = mad24(y0, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
        int dst_index  = mad24(y0, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));

        for (int y = y0, y1 = min(rows, y0 + rowsPerWI); y < y1; ++y, src1_index += srcstep1, mask_index += maskstep, dst_index += dststep)
            if (mask[mask_index])
            {
                PROCESS_ELEM;
            }
500 501 502 503 504 505 506 507
    }
}

#else

#error "Unknown operation type"

#endif