提交 bcc086ba 编写于 作者: Y yao

fix all redefine build errors on some Intel OCL

上级 656594ad
......@@ -44,7 +44,11 @@
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
......@@ -63,6 +67,9 @@ __kernel void arithm_absdiff_D0 (__global uchar *src1, int src1_step, int src1_o
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
......@@ -111,7 +118,10 @@ __kernel void arithm_absdiff_D2 (__global ushort *src1, int src1_step, int src1_
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......@@ -145,7 +155,10 @@ __kernel void arithm_absdiff_D3 (__global short *src1, int src1_step, int src1_o
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......@@ -249,7 +262,10 @@ __kernel void arithm_s_absdiff_C1_D0 (__global uchar *src1, int src1_step, int
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -288,7 +304,10 @@ __kernel void arithm_s_absdiff_C1_D2 (__global ushort *src1, int src1_step, in
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -319,7 +338,10 @@ __kernel void arithm_s_absdiff_C1_D3 (__global short *src1, int src1_step, int
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -422,7 +444,10 @@ __kernel void arithm_s_absdiff_C2_D0 (__global uchar *src1, int src1_step, int
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -564,7 +589,10 @@ __kernel void arithm_s_absdiff_C3_D0 (__global uchar *src1, int src1_step, int
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -618,7 +646,10 @@ __kernel void arithm_s_absdiff_C3_D2 (__global ushort *src1, int src1_step, in
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -668,7 +699,10 @@ __kernel void arithm_s_absdiff_C3_D3 (__global short *src1, int src1_step, int
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
......
......@@ -45,7 +45,11 @@
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
......@@ -64,7 +68,10 @@ __kernel void arithm_add_D0 (__global uchar *src1, int src1_step, int src1_offse
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
......@@ -112,7 +119,10 @@ __kernel void arithm_add_D2 (__global ushort *src1, int src1_step, int src1_offs
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......@@ -147,7 +157,10 @@ __kernel void arithm_add_D3 (__global short *src1, int src1_step, int src1_offse
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......@@ -252,7 +265,10 @@ __kernel void arithm_add_with_mask_C1_D0 (__global uchar *src1, int src1_step, i
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
......@@ -311,7 +327,10 @@ __kernel void arithm_add_with_mask_C1_D2 (__global ushort *src1, int src1_step,
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
......@@ -348,7 +367,10 @@ __kernel void arithm_add_with_mask_C1_D3 (__global short *src1, int src1_step, i
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
......@@ -477,7 +499,10 @@ __kernel void arithm_add_with_mask_C2_D0 (__global uchar *src1, int src1_step, i
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
......@@ -664,7 +689,10 @@ __kernel void arithm_add_with_mask_C3_D0 (__global uchar *src1, int src1_step, i
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
......@@ -724,7 +752,10 @@ __kernel void arithm_add_with_mask_C3_D2 (__global ushort *src1, int src1_step,
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
......@@ -780,7 +811,10 @@ __kernel void arithm_add_with_mask_C3_D3 (__global short *src1, int src1_step, i
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
......
......@@ -42,8 +42,12 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if defined DOUBLE_SUPPORT
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
typedef double F;
#else
typedef float F;
......@@ -65,7 +69,10 @@ __kernel void addWeighted_D0 (__global uchar *src1,int src1_step,int src1_offset
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
......@@ -122,7 +129,10 @@ __kernel void addWeighted_D2 (__global ushort *src1, int src1_step,int src1_offs
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset -( dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset -( dst_align << 1));
......@@ -182,7 +192,10 @@ __kernel void addWeighted_D3 (__global short *src1, int src1_step,int src1_offse
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset -( dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset -( dst_align << 1));
......@@ -241,9 +254,12 @@ __kernel void addWeighted_D4 (__global int *src1, int src1_step,int src1_offset,
x = x << 2;
#define bitOfInt (sizeof(int)== 4 ? 2: 3)
#define bitOfInt (sizeof(int)== 4 ? 2: 3)
#define dst_align ((dst_offset >> bitOfInt) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> bitOfInt) & 3)
int src1_index = mad24(y, src1_step, (x << bitOfInt) + src1_offset - (dst_align << bitOfInt));
int src2_index = mad24(y, src2_step, (x << bitOfInt) + src2_offset - (dst_align << bitOfInt));
......@@ -304,7 +320,10 @@ __kernel void addWeighted_D5 (__global float *src1,int src1_step,int src1_offset
x = x << 2;
#define dst_align ((dst_offset >> 2) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
......@@ -366,7 +385,10 @@ __kernel void addWeighted_D6 (__global double *src1, int src1_step,int src1_offs
x = x << 2;
#define dst_align ((dst_offset >> 3) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 3) & 3)
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
......
......@@ -44,9 +44,13 @@
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
/**************************************add with scalar without mask**************************************/
__kernel void arithm_s_add_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
......@@ -59,7 +63,10 @@ __kernel void arithm_s_add_C1_D0 (__global uchar *src1, int src1_step, int src
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -99,7 +106,10 @@ __kernel void arithm_s_add_C1_D2 (__global ushort *src1, int src1_step, int sr
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -131,7 +141,10 @@ __kernel void arithm_s_add_C1_D3 (__global short *src1, int src1_step, int src
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -233,7 +246,10 @@ __kernel void arithm_s_add_C2_D0 (__global uchar *src1, int src1_step, int src
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -378,7 +394,10 @@ __kernel void arithm_s_add_C3_D0 (__global uchar *src1, int src1_step, int src
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -432,7 +451,10 @@ __kernel void arithm_s_add_C3_D2 (__global ushort *src1, int src1_step, int sr
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -482,7 +504,10 @@ __kernel void arithm_s_add_C3_D3 (__global short *src1, int src1_step, int src
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
......
......@@ -44,7 +44,11 @@
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
/**************************************add with scalar with mask**************************************/
......@@ -61,7 +65,10 @@ __kernel void arithm_s_add_with_mask_C1_D0 (__global uchar *src1, int src1_ste
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
......@@ -111,7 +118,10 @@ __kernel void arithm_s_add_with_mask_C1_D2 (__global ushort *src1, int src1_st
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
......@@ -146,7 +156,10 @@ __kernel void arithm_s_add_with_mask_C1_D3 (__global short *src1, int src1_ste
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
......@@ -267,7 +280,10 @@ __kernel void arithm_s_add_with_mask_C2_D0 (__global uchar *src1, int src1_ste
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
......@@ -443,7 +459,10 @@ __kernel void arithm_s_add_with_mask_C3_D0 (__global uchar *src1, int src1_ste
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
......@@ -501,7 +520,10 @@ __kernel void arithm_s_add_with_mask_C3_D2 (__global ushort *src1, int src1_st
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
......@@ -555,7 +577,10 @@ __kernel void arithm_s_add_with_mask_C3_D3 (__global short *src1, int src1_ste
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
......
......@@ -43,7 +43,11 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
......@@ -62,7 +66,10 @@ __kernel void arithm_bitwise_and_D0 (__global uchar *src1, int src1_step, int sr
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
......@@ -112,7 +119,10 @@ __kernel void arithm_bitwise_and_D1 (__global char *src1, int src1_step, int src
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
......@@ -163,7 +173,10 @@ __kernel void arithm_bitwise_and_D2 (__global ushort *src1, int src1_step, int s
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......@@ -215,7 +228,10 @@ __kernel void arithm_bitwise_and_D3 (__global short *src1, int src1_step, int sr
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......
......@@ -42,17 +42,20 @@
// the use of this software, even if advised of the possibility of such damage.
//
//
#if defined (__ATI__)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (__NVIDIA__)
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_AND////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************and with scalar without mask**************************************/
__kernel void arithm_s_bitwise_and_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C1_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
......@@ -63,7 +66,10 @@ __kernel void arithm_s_bitwise_and_C1_D0 (__global uchar *src1, int src1_step,
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -86,7 +92,8 @@ __kernel void arithm_s_bitwise_and_C1_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_and_C1_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
......@@ -97,7 +104,10 @@ __kernel void arithm_s_bitwise_and_C1_D1 (__global char *src1, int src1_step,
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -119,7 +129,8 @@ __kernel void arithm_s_bitwise_and_C1_D1 (__global char *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_and_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C1_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
......@@ -131,7 +142,10 @@ __kernel void arithm_s_bitwise_and_C1_D2 (__global ushort *src1, int src1_step
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -150,7 +164,8 @@ __kernel void arithm_s_bitwise_and_C1_D2 (__global ushort *src1, int src1_step
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C1_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
......@@ -162,7 +177,10 @@ __kernel void arithm_s_bitwise_and_C1_D3 (__global short *src1, int src1_step,
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -181,7 +199,8 @@ __kernel void arithm_s_bitwise_and_C1_D3 (__global short *src1, int src1_step,
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C1_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C1_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
......@@ -202,7 +221,8 @@ __kernel void arithm_s_bitwise_and_C1_D4 (__global int *src1, int src1_step, i
*((__global int *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C1_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
......@@ -232,9 +252,9 @@ __kernel void arithm_s_bitwise_and_C1_D5 (__global char *src1, int src1_step,
*((__global char4 *)((__global char *)dst + dst_index)) = data;
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_and_C1_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C1_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
......@@ -256,7 +276,8 @@ __kernel void arithm_s_bitwise_and_C1_D6 (__global short *src1, int src1_step, i
}
}
#endif
__kernel void arithm_s_bitwise_and_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
......@@ -268,7 +289,10 @@ __kernel void arithm_s_bitwise_and_C2_D0 (__global uchar *src1, int src1_step,
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -290,7 +314,8 @@ __kernel void arithm_s_bitwise_and_C2_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_and_C2_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C2_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
......@@ -302,7 +327,10 @@ __kernel void arithm_s_bitwise_and_C2_D1 (__global char *src1, int src1_step,
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -322,7 +350,8 @@ __kernel void arithm_s_bitwise_and_C2_D1 (__global char *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_and_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
......@@ -343,7 +372,8 @@ __kernel void arithm_s_bitwise_and_C2_D2 (__global ushort *src1, int src1_step
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C2_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C2_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
......@@ -364,7 +394,8 @@ __kernel void arithm_s_bitwise_and_C2_D3 (__global short *src1, int src1_step,
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C2_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C2_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
......@@ -384,7 +415,8 @@ __kernel void arithm_s_bitwise_and_C2_D4 (__global int *src1, int src1_step, i
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C2_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C2_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
......@@ -406,7 +438,8 @@ __kernel void arithm_s_bitwise_and_C2_D5 (__global char *src1, int src1_step,
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_and_C2_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C2_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
......@@ -428,7 +461,8 @@ __kernel void arithm_s_bitwise_and_C2_D6 (__global short *src1, int src1_step, i
}
}
#endif
__kernel void arithm_s_bitwise_and_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C3_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
......@@ -440,7 +474,10 @@ __kernel void arithm_s_bitwise_and_C3_D0 (__global uchar *src1, int src1_step,
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -484,7 +521,8 @@ __kernel void arithm_s_bitwise_and_C3_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_and_C3_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C3_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
......@@ -496,7 +534,10 @@ __kernel void arithm_s_bitwise_and_C3_D1 (__global char *src1, int src1_step,
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -539,7 +580,8 @@ __kernel void arithm_s_bitwise_and_C3_D1 (__global char *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_and_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C3_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
......@@ -551,7 +593,10 @@ __kernel void arithm_s_bitwise_and_C3_D2 (__global ushort *src1, int src1_step
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -589,7 +634,8 @@ __kernel void arithm_s_bitwise_and_C3_D2 (__global ushort *src1, int src1_step
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_and_C3_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C3_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
......@@ -601,7 +647,10 @@ __kernel void arithm_s_bitwise_and_C3_D3 (__global short *src1, int src1_step,
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -639,7 +688,8 @@ __kernel void arithm_s_bitwise_and_C3_D3 (__global short *src1, int src1_step,
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_and_C3_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C3_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
......@@ -673,7 +723,8 @@ __kernel void arithm_s_bitwise_and_C3_D4 (__global int *src1, int src1_step, i
*((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
__kernel void arithm_s_bitwise_and_C3_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C3_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
......@@ -708,7 +759,8 @@ __kernel void arithm_s_bitwise_and_C3_D5 (__global char *src1, int src1_step,
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_and_C3_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C3_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
......@@ -743,7 +795,8 @@ __kernel void arithm_s_bitwise_and_C3_D6 (__global short *src1, int src1_step, i
}
}
#endif
__kernel void arithm_s_bitwise_and_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
......@@ -765,7 +818,8 @@ __kernel void arithm_s_bitwise_and_C4_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_and_C4_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C4_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
......@@ -786,7 +840,8 @@ __kernel void arithm_s_bitwise_and_C4_D1 (__global char *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_and_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
......@@ -806,7 +861,8 @@ __kernel void arithm_s_bitwise_and_C4_D2 (__global ushort *src1, int src1_step
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C4_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C4_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
......@@ -826,7 +882,8 @@ __kernel void arithm_s_bitwise_and_C4_D3 (__global short *src1, int src1_step,
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C4_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
......@@ -846,7 +903,8 @@ __kernel void arithm_s_bitwise_and_C4_D4 (__global int *src1, int src1_step, i
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C4_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C4_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
......@@ -869,7 +927,8 @@ __kernel void arithm_s_bitwise_and_C4_D5 (__global char *src1, int src1_step,
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_and_C4_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C4_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
......
......@@ -43,9 +43,12 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_NOT////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
......@@ -61,7 +64,10 @@ __kernel void arithm_bitwise_not_D0 (__global uchar *src1, int src1_step, int sr
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -101,7 +107,10 @@ __kernel void arithm_bitwise_not_D1 (__global char *src1, int src1_step, int src
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -135,7 +144,10 @@ __kernel void arithm_bitwise_not_D2 (__global ushort *src1, int src1_step, int s
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -170,7 +182,10 @@ __kernel void arithm_bitwise_not_D3 (__global short *src1, int src1_step, int sr
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
......
......@@ -43,7 +43,11 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
......@@ -62,7 +66,10 @@ __kernel void arithm_bitwise_or_D0 (__global uchar *src1, int src1_step, int src
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
......@@ -110,7 +117,10 @@ __kernel void arithm_bitwise_or_D1 (__global char *src1, int src1_step, int src1
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
......@@ -147,7 +157,10 @@ __kernel void arithm_bitwise_or_D2 (__global ushort *src1, int src1_step, int sr
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......@@ -185,7 +198,10 @@ __kernel void arithm_bitwise_or_D3 (__global short *src1, int src1_step, int src
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......
......@@ -43,14 +43,19 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_OR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************and with scalar without mask**************************************/
__kernel void arithm_s_bitwise_or_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C1_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
......@@ -61,7 +66,10 @@ __kernel void arithm_s_bitwise_or_C1_D0 (__global uchar *src1, int src1_step,
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -84,7 +92,8 @@ __kernel void arithm_s_bitwise_or_C1_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_or_C1_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
......@@ -95,7 +104,10 @@ __kernel void arithm_s_bitwise_or_C1_D1 (__global char *src1, int src1_step, i
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -117,7 +129,8 @@ __kernel void arithm_s_bitwise_or_C1_D1 (__global char *src1, int src1_step, i
}
}
__kernel void arithm_s_bitwise_or_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C1_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
......@@ -129,7 +142,10 @@ __kernel void arithm_s_bitwise_or_C1_D2 (__global ushort *src1, int src1_step,
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -148,7 +164,8 @@ __kernel void arithm_s_bitwise_or_C1_D2 (__global ushort *src1, int src1_step,
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C1_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
......@@ -160,7 +177,10 @@ __kernel void arithm_s_bitwise_or_C1_D3 (__global short *src1, int src1_step,
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -179,7 +199,8 @@ __kernel void arithm_s_bitwise_or_C1_D3 (__global short *src1, int src1_step,
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C1_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C1_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
......@@ -200,7 +221,8 @@ __kernel void arithm_s_bitwise_or_C1_D4 (__global int *src1, int src1_step, in
*((__global int *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C1_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
......@@ -222,7 +244,8 @@ __kernel void arithm_s_bitwise_or_C1_D5 (__global char *src1, int src1_step, i
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_or_C1_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C1_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
......@@ -245,8 +268,8 @@ __kernel void arithm_s_bitwise_or_C1_D6 (__global short *src1, int src1_step, in
}
}
#endif
__kernel void arithm_s_bitwise_or_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
......@@ -259,7 +282,10 @@ __kernel void arithm_s_bitwise_or_C2_D0 (__global uchar *src1, int src1_step,
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -280,7 +306,8 @@ __kernel void arithm_s_bitwise_or_C2_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_or_C2_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C2_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
......@@ -293,7 +320,10 @@ __kernel void arithm_s_bitwise_or_C2_D1 (__global char *src1, int src1_step, i
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -313,7 +343,8 @@ __kernel void arithm_s_bitwise_or_C2_D1 (__global char *src1, int src1_step, i
}
}
__kernel void arithm_s_bitwise_or_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
......@@ -335,7 +366,8 @@ __kernel void arithm_s_bitwise_or_C2_D2 (__global ushort *src1, int src1_step,
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C2_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C2_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
......@@ -378,7 +410,8 @@ __kernel void arithm_s_bitwise_or_C2_D4 (__global int *src1, int src1_step, in
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C2_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C2_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
......@@ -400,7 +433,8 @@ __kernel void arithm_s_bitwise_or_C2_D5 (__global char *src1, int src1_step, i
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_or_C2_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C2_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
......@@ -423,7 +457,8 @@ __kernel void arithm_s_bitwise_or_C2_D6 (__global short *src1, int src1_step, in
}
}
#endif
__kernel void arithm_s_bitwise_or_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C3_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
......@@ -436,7 +471,10 @@ __kernel void arithm_s_bitwise_or_C3_D0 (__global uchar *src1, int src1_step,
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -480,7 +518,8 @@ __kernel void arithm_s_bitwise_or_C3_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_or_C3_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C3_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
......@@ -493,7 +532,10 @@ __kernel void arithm_s_bitwise_or_C3_D1 (__global char *src1, int src1_step, i
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -536,7 +578,8 @@ __kernel void arithm_s_bitwise_or_C3_D1 (__global char *src1, int src1_step, i
}
}
__kernel void arithm_s_bitwise_or_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C3_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
......@@ -549,7 +592,10 @@ __kernel void arithm_s_bitwise_or_C3_D2 (__global ushort *src1, int src1_step,
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -587,7 +633,8 @@ __kernel void arithm_s_bitwise_or_C3_D2 (__global ushort *src1, int src1_step,
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_or_C3_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C3_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
......@@ -600,7 +647,10 @@ __kernel void arithm_s_bitwise_or_C3_D3 (__global short *src1, int src1_step,
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -638,7 +688,8 @@ __kernel void arithm_s_bitwise_or_C3_D3 (__global short *src1, int src1_step,
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_or_C3_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C3_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
......@@ -673,7 +724,8 @@ __kernel void arithm_s_bitwise_or_C3_D4 (__global int *src1, int src1_step, in
*((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
__kernel void arithm_s_bitwise_or_C3_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C3_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
......@@ -706,7 +758,8 @@ __kernel void arithm_s_bitwise_or_C3_D5 (__global char *src1, int src1_step, i
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_or_C3_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C3_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
......@@ -742,7 +795,8 @@ __kernel void arithm_s_bitwise_or_C3_D6 (__global short *src1, int src1_step, in
}
}
#endif
__kernel void arithm_s_bitwise_or_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
......@@ -765,7 +819,8 @@ __kernel void arithm_s_bitwise_or_C4_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_or_C4_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C4_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
......@@ -787,7 +842,8 @@ __kernel void arithm_s_bitwise_or_C4_D1 (__global char *src1, int src1_step, i
}
}
__kernel void arithm_s_bitwise_or_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
......@@ -808,7 +864,8 @@ __kernel void arithm_s_bitwise_or_C4_D2 (__global ushort *src1, int src1_step,
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C4_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C4_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
......@@ -829,7 +886,8 @@ __kernel void arithm_s_bitwise_or_C4_D3 (__global short *src1, int src1_step,
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C4_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
......@@ -850,7 +908,8 @@ __kernel void arithm_s_bitwise_or_C4_D4 (__global int *src1, int src1_step, in
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C4_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C4_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
......@@ -874,7 +933,8 @@ __kernel void arithm_s_bitwise_or_C4_D5 (__global char *src1, int src1_step, i
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_or_C4_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C4_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
......
......@@ -43,9 +43,12 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
......@@ -62,7 +65,10 @@ __kernel void arithm_bitwise_xor_D0 (__global uchar *src1, int src1_step, int sr
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
......@@ -112,7 +118,10 @@ __kernel void arithm_bitwise_xor_D1 (__global char *src1, int src1_step, int src
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
......@@ -163,7 +172,10 @@ __kernel void arithm_bitwise_xor_D2 (__global ushort *src1, int src1_step, int s
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......@@ -215,7 +227,10 @@ __kernel void arithm_bitwise_xor_D3 (__global short *src1, int src1_step, int sr
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......@@ -301,7 +316,6 @@ __kernel void arithm_bitwise_xor_D5 (__global char *src1, int src1_step, int src
*((__global char4 *)((__global char *)dst + dst_index)) = tmp;
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_xor_D6 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
......
......@@ -42,17 +42,19 @@
// the use of this software, even if advised of the possibility of such damage.
//
//
#if defined (__ATI__)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (__NVIDIA__)
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************xor with scalar without mask**************************************/
__kernel void arithm_s_bitwise_xor_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C1_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
......@@ -63,7 +65,10 @@ __kernel void arithm_s_bitwise_xor_C1_D0 (__global uchar *src1, int src1_step,
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -86,7 +91,8 @@ __kernel void arithm_s_bitwise_xor_C1_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_xor_C1_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
......@@ -97,7 +103,10 @@ __kernel void arithm_s_bitwise_xor_C1_D1 (__global char *src1, int src1_step,
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -119,7 +128,8 @@ __kernel void arithm_s_bitwise_xor_C1_D1 (__global char *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_xor_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C1_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
......@@ -131,7 +141,10 @@ __kernel void arithm_s_bitwise_xor_C1_D2 (__global ushort *src1, int src1_step
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -150,7 +163,8 @@ __kernel void arithm_s_bitwise_xor_C1_D2 (__global ushort *src1, int src1_step
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C1_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
......@@ -162,7 +176,10 @@ __kernel void arithm_s_bitwise_xor_C1_D3 (__global short *src1, int src1_step,
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -181,7 +198,8 @@ __kernel void arithm_s_bitwise_xor_C1_D3 (__global short *src1, int src1_step,
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C1_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C1_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
......@@ -202,7 +220,8 @@ __kernel void arithm_s_bitwise_xor_C1_D4 (__global int *src1, int src1_step, i
*((__global int *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C1_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
......@@ -234,7 +253,8 @@ __kernel void arithm_s_bitwise_xor_C1_D5 (__global char *src1, int src1_step,
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_xor_C1_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C1_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
......@@ -256,7 +276,8 @@ __kernel void arithm_s_bitwise_xor_C1_D6 (__global short *src1, int src1_step, i
}
}
#endif
__kernel void arithm_s_bitwise_xor_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
......@@ -268,7 +289,10 @@ __kernel void arithm_s_bitwise_xor_C2_D0 (__global uchar *src1, int src1_step,
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -290,7 +314,8 @@ __kernel void arithm_s_bitwise_xor_C2_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_xor_C2_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C2_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
......@@ -302,7 +327,10 @@ __kernel void arithm_s_bitwise_xor_C2_D1 (__global char *src1, int src1_step,
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -322,7 +350,8 @@ __kernel void arithm_s_bitwise_xor_C2_D1 (__global char *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_xor_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
......@@ -343,7 +372,8 @@ __kernel void arithm_s_bitwise_xor_C2_D2 (__global ushort *src1, int src1_step
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C2_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C2_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
......@@ -364,7 +394,8 @@ __kernel void arithm_s_bitwise_xor_C2_D3 (__global short *src1, int src1_step,
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C2_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C2_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
......@@ -384,7 +415,8 @@ __kernel void arithm_s_bitwise_xor_C2_D4 (__global int *src1, int src1_step, i
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C2_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C2_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
......@@ -406,7 +438,8 @@ __kernel void arithm_s_bitwise_xor_C2_D5 (__global char *src1, int src1_step,
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_xor_C2_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C2_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
......@@ -428,7 +461,8 @@ __kernel void arithm_s_bitwise_xor_C2_D6 (__global short *src1, int src1_step, i
}
}
#endif
__kernel void arithm_s_bitwise_xor_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C3_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
......@@ -440,7 +474,10 @@ __kernel void arithm_s_bitwise_xor_C3_D0 (__global uchar *src1, int src1_step,
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -484,7 +521,8 @@ __kernel void arithm_s_bitwise_xor_C3_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_xor_C3_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C3_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
......@@ -496,7 +534,10 @@ __kernel void arithm_s_bitwise_xor_C3_D1 (__global char *src1, int src1_step,
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -539,7 +580,8 @@ __kernel void arithm_s_bitwise_xor_C3_D1 (__global char *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_xor_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C3_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
......@@ -551,7 +593,10 @@ __kernel void arithm_s_bitwise_xor_C3_D2 (__global ushort *src1, int src1_step
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -589,7 +634,8 @@ __kernel void arithm_s_bitwise_xor_C3_D2 (__global ushort *src1, int src1_step
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_xor_C3_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C3_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
......@@ -601,7 +647,10 @@ __kernel void arithm_s_bitwise_xor_C3_D3 (__global short *src1, int src1_step,
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -639,7 +688,8 @@ __kernel void arithm_s_bitwise_xor_C3_D3 (__global short *src1, int src1_step,
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_xor_C3_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C3_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
......@@ -673,7 +723,8 @@ __kernel void arithm_s_bitwise_xor_C3_D4 (__global int *src1, int src1_step, i
*((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
__kernel void arithm_s_bitwise_xor_C3_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C3_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
......@@ -708,7 +759,8 @@ __kernel void arithm_s_bitwise_xor_C3_D5 (__global char *src1, int src1_step,
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_xor_C3_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C3_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
......@@ -743,7 +795,8 @@ __kernel void arithm_s_bitwise_xor_C3_D6 (__global short *src1, int src1_step, i
}
}
#endif
__kernel void arithm_s_bitwise_xor_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
......@@ -765,7 +818,8 @@ __kernel void arithm_s_bitwise_xor_C4_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_xor_C4_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C4_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
......@@ -786,7 +840,8 @@ __kernel void arithm_s_bitwise_xor_C4_D1 (__global char *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_xor_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
......@@ -806,7 +861,8 @@ __kernel void arithm_s_bitwise_xor_C4_D2 (__global ushort *src1, int src1_step
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C4_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C4_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
......@@ -826,7 +882,8 @@ __kernel void arithm_s_bitwise_xor_C4_D3 (__global short *src1, int src1_step,
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C4_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
......@@ -846,7 +903,8 @@ __kernel void arithm_s_bitwise_xor_C4_D4 (__global int *src1, int src1_step, i
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C4_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C4_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
......@@ -869,7 +927,8 @@ __kernel void arithm_s_bitwise_xor_C4_D5 (__global char *src1, int src1_step,
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_xor_C4_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C4_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
......
......@@ -43,7 +43,11 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
......@@ -62,7 +66,10 @@ __kernel void arithm_compare_eq_D0 (__global uchar *src1, int src1_step, int src
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
......@@ -114,7 +121,10 @@ __kernel void arithm_compare_ne_D2 (__global ushort *src1, int src1_step, int sr
{
x = x << 2;
#define dst_align ((dst_offset >> 1)& 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1)& 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......@@ -165,7 +175,10 @@ __kernel void arithm_compare_eq_D3 (__global short *src1, int src1_step, int src
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......@@ -217,7 +230,10 @@ __kernel void arithm_compare_eq_D4 (__global int *src1, int src1_step, int src1_
if (x < cols && y < rows)
{
x = x << 2;
#define dst_align ((dst_offset >> 2) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
......@@ -265,7 +281,10 @@ __kernel void arithm_compare_eq_D5 (__global float *src1, int src1_step, int src
if (x < cols && y < rows)
{
x = x << 2;
#define dst_align ((dst_offset >> 2) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
......@@ -275,7 +294,8 @@ __kernel void arithm_compare_eq_D5 (__global float *src1, int src1_step, int src
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix));
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix)); if(src2_index < 0)
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix));
if(src2_index < 0)
{
float4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
......@@ -307,7 +327,10 @@ __kernel void arithm_compare_eq_D6 (__global double *src1, int src1_step, int sr
if (x < cols && y < rows)
{
x = x << 2;
#define dst_align ((dst_offset >> 3) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 3) & 3)
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
......@@ -358,7 +381,10 @@ __kernel void arithm_compare_gt_D0 (__global uchar *src1, int src1_step, int src
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
......@@ -409,7 +435,10 @@ __kernel void arithm_compare_gt_D2 (__global ushort *src1, int src1_step, int sr
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......@@ -462,7 +491,10 @@ __kernel void arithm_compare_gt_D3 (__global short *src1, int src1_step, int src
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......@@ -511,7 +543,10 @@ __kernel void arithm_compare_gt_D4 (__global int *src1, int src1_step, int src1_
if (x < cols && y < rows)
{
x = x << 2;
#define dst_align ((dst_offset >> 2) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
......@@ -560,7 +595,10 @@ __kernel void arithm_compare_gt_D5 (__global float *src1, int src1_step, int src
if (x < cols && y < rows)
{
x = x << 2;
#define dst_align ((dst_offset >> 2) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
......@@ -609,7 +647,10 @@ __kernel void arithm_compare_gt_D6 (__global double *src1, int src1_step, int sr
if (x < cols && y < rows)
{
x = x << 2;
#define dst_align ((dst_offset >> 3) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 3) & 3)
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
......@@ -660,7 +701,10 @@ __kernel void arithm_compare_ge_D0 (__global uchar *src1, int src1_step, int src
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
......@@ -714,7 +758,10 @@ __kernel void arithm_compare_ge_D2 (__global ushort *src1, int src1_step, int sr
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......@@ -769,7 +816,10 @@ __kernel void arithm_compare_ge_D3 (__global short *src1, int src1_step, int src
{
x = x << 2;
#define dst_align ((dst_offset >> 1)& 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1)& 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......@@ -820,7 +870,10 @@ __kernel void arithm_compare_ge_D4 (__global int *src1, int src1_step, int src1_
{
x = x << 2;
#define dst_align ((dst_offset >> 2)& 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2)& 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
......@@ -869,7 +922,10 @@ __kernel void arithm_compare_ge_D5 (__global float *src1, int src1_step, int src
{
x = x << 2;
#define dst_align ((dst_offset >> 2)& 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2)& 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
......@@ -920,7 +976,10 @@ __kernel void arithm_compare_ge_D6 (__global double *src1, int src1_step, int sr
{
x = x << 2;
#define dst_align ((dst_offset >> 3)& 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 3)& 3)
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
......@@ -942,7 +1001,8 @@ __kernel void arithm_compare_ge_D6 (__global double *src1, int src1_step, int sr
double4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
} uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
}
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
uchar4 tmp_data = convert_uchar4((src1_data >= src2_data));
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
......@@ -954,3 +1014,4 @@ __kernel void arithm_compare_ge_D6 (__global double *src1, int src1_step, int sr
}
}
#endif
......@@ -43,7 +43,11 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
/***********************************Compare NE*******************************/
__kernel void arithm_compare_ne_D0 (__global uchar *src1, int src1_step, int src1_offset,
......@@ -58,7 +62,10 @@ __kernel void arithm_compare_ne_D0 (__global uchar *src1, int src1_step, int src
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
......@@ -110,7 +117,10 @@ __kernel void arithm_compare_ne_D2 (__global ushort *src1, int src1_step, int sr
{
x = x << 2;
#define dst_align ((dst_offset >> 1)& 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1)& 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......@@ -162,7 +172,10 @@ __kernel void arithm_compare_ne_D3 (__global short *src1, int src1_step, int src
{
x = x << 2;
#define dst_align ((dst_offset >> 1)& 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1)& 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......@@ -210,7 +223,10 @@ __kernel void arithm_compare_ne_D4 (__global int *src1, int src1_step, int src1_
if (x < cols && y < rows)
{
x = x << 2;
#define dst_align ((dst_offset >> 2)& 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2)& 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
......@@ -259,7 +275,10 @@ __kernel void arithm_compare_ne_D5 (__global float *src1, int src1_step, int src
if (x < cols && y < rows)
{
x = x << 2;
#define dst_align ((dst_offset >> 2) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
......@@ -269,7 +288,8 @@ __kernel void arithm_compare_ne_D5 (__global float *src1, int src1_step, int src
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix));
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix)); if(src1_index < 0)
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix));
if(src1_index < 0)
{
float4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
......@@ -306,7 +326,10 @@ __kernel void arithm_compare_ne_D6 (__global double *src1, int src1_step, int sr
if (x < cols && y < rows)
{
x = x << 2;
#define dst_align ((dst_offset >> 3) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 3) & 3)
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
......@@ -358,7 +381,10 @@ __kernel void arithm_compare_lt_D0 (__global uchar *src1, int src1_step, int src
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
......@@ -410,7 +436,10 @@ __kernel void arithm_compare_lt_D2 (__global ushort *src1, int src1_step, int sr
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......@@ -463,7 +492,10 @@ __kernel void arithm_compare_lt_D3 (__global short *src1, int src1_step, int src
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......@@ -512,7 +544,10 @@ __kernel void arithm_compare_lt_D4 (__global int *src1, int src1_step, int src1_
if (x < cols && y < rows)
{
x = x << 2;
#define dst_align ((dst_offset >> 2) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
......@@ -564,7 +599,10 @@ __kernel void arithm_compare_lt_D5 (__global float *src1, int src1_step, int src
if (x < cols && y < rows)
{
x = x << 2;
#define dst_align ((dst_offset >> 2) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
......@@ -613,7 +651,10 @@ __kernel void arithm_compare_lt_D6 (__global double *src1, int src1_step, int sr
if (x < cols && y < rows)
{
x = x << 2;
#define dst_align ((dst_offset >> 3) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 3) & 3)
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
......@@ -664,7 +705,10 @@ __kernel void arithm_compare_le_D0 (__global uchar *src1, int src1_step, int src
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
......@@ -717,7 +761,10 @@ __kernel void arithm_compare_le_D2 (__global ushort *src1, int src1_step, int sr
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......@@ -770,7 +817,10 @@ __kernel void arithm_compare_le_D3 (__global short *src1, int src1_step, int src
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......@@ -819,7 +869,10 @@ __kernel void arithm_compare_le_D4 (__global int *src1, int src1_step, int src1_
if (x < cols && y < rows)
{
x = x << 2;
#define dst_align ((dst_offset >> 2)& 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2)& 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
......@@ -867,7 +920,10 @@ __kernel void arithm_compare_le_D5 (__global float *src1, int src1_step, int src
if (x < cols && y < rows)
{
x = x << 2;
#define dst_align ((dst_offset >> 2)& 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2)& 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
......@@ -915,7 +971,10 @@ __kernel void arithm_compare_le_D6 (__global double *src1, int src1_step, int sr
if (x < cols && y < rows)
{
x = x << 2;
#define dst_align ((dst_offset >> 3)& 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 3)& 3)
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
......@@ -952,3 +1011,5 @@ __kernel void arithm_compare_le_D6 (__global double *src1, int src1_step, int sr
}
}
#endif
......@@ -44,7 +44,11 @@
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
typedef double F ;
typedef double4 F4;
#define convert_F4 convert_double4
......@@ -56,34 +60,24 @@ typedef float4 F4;
#define convert_F float
#endif
uchar round2_uchar(F v){
uchar v1 = convert_uchar_sat(round(v));
//uchar v2 = convert_uchar_sat(v+(v>=0 ? 0.5 : -0.5));
return v1;//(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
inline uchar round2_uchar(F v)
{
return convert_uchar_sat(round(v));
}
ushort round2_ushort(F v){
ushort v1 = convert_ushort_sat(round(v));
//ushort v2 = convert_ushort_sat(v+(v>=0 ? 0.5 : -0.5));
return v1;//(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
inline ushort round2_ushort(F v)
{
return convert_ushort_sat(round(v));
}
short round2_short(F v){
short v1 = convert_short_sat(round(v));
//short v2 = convert_short_sat(v+(v>=0 ? 0.5 : -0.5));
return v1;//(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
inline short round2_short(F v)
{
return convert_short_sat(round(v));
}
int round2_int(F v){
int v1 = convert_int_sat(round(v));
//int v2 = convert_int_sat(v+(v>=0 ? 0.5 : -0.5));
return v1;//(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
inline int round2_int(F v)
{
return convert_int_sat(round(v));
}
///////////////////////////////////////////////////////////////////////////////////////
////////////////////////////divide///////////////////////////////////////////////////
......@@ -94,39 +88,41 @@ __kernel void arithm_div_D0 (__global uchar *src1, int src1_step, int src1_offse
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1, F scalar)
{
int x = get_global_id(0);
int y = get_global_id(1);
int2 coor = (int2)(get_global_id(0), get_global_id(1));
if (x < cols && y < rows)
if (coor.x < cols && coor.y < rows)
{
x = x << 2;
coor.x = coor.x << 2;
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int2 src_index = (int2)(mad24(coor.y, src1_step, coor.x + src1_offset - dst_align),
mad24(coor.y, src2_step, coor.x + src2_offset - dst_align));
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
int4 dst_args = (int4)(mad24(coor.y, dst_step, dst_offset),
mad24(coor.y, dst_step, dst_offset + dst_step1),
mad24(coor.y, dst_step, dst_offset + coor.x & (int)0xfffffffc),
0);
uchar4 src1_data = vload4(0, src1 + src1_index);
uchar4 src2_data = vload4(0, src2 + src2_index);
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
uchar4 src1_data = vload4(0, src1 + src_index.x);
uchar4 src2_data = vload4(0, src2 + src_index.y);
uchar4 dst_data = *((__global uchar4 *)(dst + dst_args.z));
F4 tmp = convert_F4(src1_data) * scalar;
uchar4 tmp_data;
tmp_data.x = ((tmp.x == 0) || (src2_data.x == 0)) ? 0 : round2_uchar(tmp.x / (F)src2_data.x);
tmp_data.y = ((tmp.y == 0) || (src2_data.y == 0)) ? 0 : round2_uchar(tmp.y / (F)src2_data.y);
tmp_data.z = ((tmp.z == 0) || (src2_data.z == 0)) ? 0 : round2_uchar(tmp.z / (F)src2_data.z);
tmp_data.w = ((tmp.w == 0) || (src2_data.w == 0)) ? 0 : round2_uchar(tmp.w / (F)src2_data.w);
tmp_data.x = ((tmp.x == 0) || (src2_data.x == 0)) ? 0 : round2_uchar(tmp.x / src2_data.x);
tmp_data.y = ((tmp.y == 0) || (src2_data.y == 0)) ? 0 : round2_uchar(tmp.y / src2_data.y);
tmp_data.z = ((tmp.z == 0) || (src2_data.z == 0)) ? 0 : round2_uchar(tmp.z / src2_data.z);
tmp_data.w = ((tmp.w == 0) || (src2_data.w == 0)) ? 0 : round2_uchar(tmp.w / src2_data.w);
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y;
dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z;
dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w;
dst_data.x = ((dst_args.z + 0 >= dst_args.x) && (dst_args.z + 0 < dst_args.y)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_args.z + 1 >= dst_args.x) && (dst_args.z + 1 < dst_args.y)) ? tmp_data.y : dst_data.y;
dst_data.z = ((dst_args.z + 2 >= dst_args.x) && (dst_args.z + 2 < dst_args.y)) ? tmp_data.z : dst_data.z;
dst_data.w = ((dst_args.z + 3 >= dst_args.x) && (dst_args.z + 3 < dst_args.y)) ? tmp_data.w : dst_data.w;
*((__global uchar4 *)(dst + dst_index)) = dst_data;
*((__global uchar4 *)(dst + dst_args.z)) = dst_data;
}
}
......@@ -142,7 +138,10 @@ __kernel void arithm_div_D2 (__global ushort *src1, int src1_step, int src1_offs
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......@@ -182,7 +181,10 @@ __kernel void arithm_div_D3 (__global short *src1, int src1_step, int src1_offse
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......@@ -297,7 +299,10 @@ __kernel void arithm_s_div_D0 (__global uchar *src, int src_step, int src_offset
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src_index = mad24(y, src_step, x + src_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -333,7 +338,10 @@ __kernel void arithm_s_div_D2 (__global ushort *src, int src_step, int src_offse
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src_index = mad24(y, src_step, (x << 1) + src_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -368,7 +376,10 @@ __kernel void arithm_s_div_D3 (__global short *src, int src_step, int src_offset
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src_index = mad24(y, src_step, (x << 1) + src_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
......@@ -455,3 +466,5 @@ __kernel void arithm_s_div_D6 (__global double *src, int src_step, int src_offse
}
}
#endif
......@@ -44,7 +44,11 @@
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
......@@ -61,7 +65,10 @@ __kernel void arithm_flip_rows_D0 (__global uchar *src, int src_step, int src_of
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src_index_0 = mad24(y, src_step, x + src_offset - dst_align);
int src_index_1 = mad24(rows - y - 1, src_step, x + src_offset - dst_align);
......@@ -116,7 +123,10 @@ __kernel void arithm_flip_rows_D1 (__global char *src, int src_step, int src_off
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src_index_0 = mad24(y, src_step, x + src_offset - dst_align);
int src_index_1 = mad24(rows - y - 1, src_step, x + src_offset - dst_align);
......@@ -158,7 +168,10 @@ __kernel void arithm_flip_rows_D2 (__global ushort *src, int src_step, int src_o
{
x = x << 2;
#define dst_align (((dst_offset >> 1) & 3) << 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset >> 1) & 3) << 1)
int src_index_0 = mad24(y, src_step, (x << 1) + src_offset - dst_align);
int src_index_1 = mad24(rows - y - 1, src_step, (x << 1) + src_offset - dst_align);
......@@ -200,7 +213,10 @@ __kernel void arithm_flip_rows_D3 (__global short *src, int src_step, int src_of
{
x = x << 2;
#define dst_align (((dst_offset >> 1) & 3) << 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset >> 1) & 3) << 1)
int src_index_0 = mad24(y, src_step, (x << 1) + src_offset - dst_align);
int src_index_1 = mad24(rows - y - 1, src_step, (x << 1) + src_offset - dst_align);
......
......@@ -16,7 +16,6 @@
//
// @Authors
// Jia Haipeng, jiahaipeng95@gmail.com
// Dachuan Zhao, dachuan@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
......@@ -44,11 +43,16 @@
//
//M*/
#if defined DOUBLE_SUPPORT
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
int4 round_int4(float4 v){
int4 round_int4(float4 v)
{
v.s0 = v.s0 + (v.s0 > 0 ? 0.5 : -0.5);
v.s1 = v.s1 + (v.s1 > 0 ? 0.5 : -0.5);
v.s2 = v.s2 + (v.s2 > 0 ? 0.5 : -0.5);
......@@ -56,7 +60,8 @@ int4 round_int4(float4 v){
return convert_int4_sat(v);
}
uint4 round_uint4(float4 v){
uint4 round_uint4(float4 v)
{
v.s0 = v.s0 + (v.s0 > 0 ? 0.5 : -0.5);
v.s1 = v.s1 + (v.s1 > 0 ? 0.5 : -0.5);
v.s2 = v.s2 + (v.s2 > 0 ? 0.5 : -0.5);
......@@ -64,7 +69,8 @@ uint4 round_uint4(float4 v){
return convert_uint4_sat(v);
}
long round_int(float v){
long round_int(float v)
{
v = v + (v > 0 ? 0.5 : -0.5);
return convert_int_sat(v);
......@@ -85,7 +91,10 @@ __kernel void arithm_mul_D0 (__global uchar *src1, int src1_step, int src1_offse
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
......@@ -130,7 +139,10 @@ __kernel void arithm_mul_D2 (__global ushort *src1, int src1_step, int src1_offs
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......@@ -166,7 +178,10 @@ __kernel void arithm_mul_D3 (__global short *src1, int src1_step, int src1_offse
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册