提交 df3997b1 编写于 作者: P peng xiao

Fix ocl::pyrUp

Use predefined OpenCL function to convert integers to floating points.
This is more accurate than before as it enables:
1. saturate cast
2. customized rounding
上级 2aa5f1bf
......@@ -18,6 +18,7 @@
// Zhang Chunpeng chunpeng@multicorewareinc.com
// Dachuan Zhao, dachuan@multicorewareinc.com
// Yao Wang, yao@multicorewareinc.com
// Peng Xiao, pengxiao@outlook.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
......@@ -47,7 +48,7 @@
//#pragma OPENCL EXTENSION cl_amd_printf : enable
uchar get_valid_uchar(uchar data)
uchar get_valid_uchar(float data)
{
return (uchar)(data <= 255 ? data : data > 0 ? 255 : 0);
}
......@@ -142,7 +143,7 @@ __kernel void pyrUp_C1_D0(__global uchar* src,__global uchar* dst,
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][tidx];
if ((x < dstCols) && (y < dstRows))
dst[x + y * dstStep] = (float)(4.0f * sum);
dst[x + y * dstStep] = convert_uchar_sat_rte(4.0f * sum);
}
......@@ -244,7 +245,7 @@ __kernel void pyrUp_C1_D2(__global ushort* src,__global ushort* dst,
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)];
if ((x < dstCols) && (y < dstRows))
dst[x + y * dstStep] = (float)(4.0f * sum);
dst[x + y * dstStep] = convert_short_sat_rte(4.0f * sum);
}
......@@ -351,31 +352,6 @@ __kernel void pyrUp_C1_D5(__global float* src,__global float* dst,
///////////////////////////////////////////////////////////////////////
////////////////////////// CV_8UC4 //////////////////////////////////
///////////////////////////////////////////////////////////////////////
float4 covert_uchar4_to_float4(uchar4 data)
{
float4 f4Data = {0,0,0,0};
f4Data.x = (float)data.x;
f4Data.y = (float)data.y;
f4Data.z = (float)data.z;
f4Data.w = (float)data.w;
return f4Data;
}
uchar4 convert_float4_to_uchar4(float4 data)
{
uchar4 u4Data;
u4Data.x = get_valid_uchar(data.x);
u4Data.y = get_valid_uchar(data.y);
u4Data.z = get_valid_uchar(data.z);
u4Data.w = get_valid_uchar(data.w);
return u4Data;
}
__kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst,
int srcRows,int dstRows,int srcCols,int dstCols,
int srcOffset,int dstOffset,int srcStep,int dstStep)
......@@ -406,7 +382,7 @@ __kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst,
srcy = abs(srcy);
srcy = min(srcRows -1 ,srcy);
s_srcPatch[tidy][tidx] = covert_uchar4_to_float4(src[srcx + srcy * srcStep]);
s_srcPatch[tidy][tidx] = convert_float4(src[srcx + srcy * srcStep]);
}
barrier(CLK_LOCAL_MEM_FENCE);
......@@ -476,38 +452,12 @@ __kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst,
if ((x < dstCols) && (y < dstRows))
{
dst[x + y * dstStep] = convert_float4_to_uchar4(4.0f * sum);
dst[x + y * dstStep] = convert_uchar4_sat_rte(4.0f * sum);
}
}
///////////////////////////////////////////////////////////////////////
////////////////////////// CV_16UC4 //////////////////////////////////
///////////////////////////////////////////////////////////////////////
float4 covert_ushort4_to_float4(ushort4 data)
{
float4 f4Data = {0,0,0,0};
f4Data.x = (float)data.x;
f4Data.y = (float)data.y;
f4Data.z = (float)data.z;
f4Data.w = (float)data.w;
return f4Data;
}
ushort4 convert_float4_to_ushort4(float4 data)
{
ushort4 u4Data;
u4Data.x = (float)data.x;
u4Data.y = (float)data.y;
u4Data.z = (float)data.z;
u4Data.w = (float)data.w;
return u4Data;
}
__kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst,
int srcRows,int dstRows,int srcCols,int dstCols,
int srcOffset,int dstOffset,int srcStep,int dstStep)
......@@ -535,7 +485,7 @@ __kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst,
srcy = abs(srcy);
srcy = min(srcRows -1 ,srcy);
s_srcPatch[get_local_id(1)][get_local_id(0)] = covert_ushort4_to_float4(src[srcx + srcy * srcStep]);
s_srcPatch[get_local_id(1)][get_local_id(0)] = convert_float4(src[srcx + srcy * srcStep]);
}
barrier(CLK_LOCAL_MEM_FENCE);
......@@ -570,11 +520,11 @@ __kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst,
if (eveny)
{
sum = sum + (evenFlag * co3) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * co2 ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * co3 ) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
sum = sum + (oddFlag * co2 ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * co1 ) * s_srcPatch[0][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * co2 ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * co3) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
sum = sum + (oddFlag * co2 ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * co3 ) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
......@@ -610,7 +560,7 @@ __kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst,
if ((x < dstCols) && (y < dstRows))
{
dst[x + y * dstStep] = convert_float4_to_ushort4(4.0f * sum);
dst[x + y * dstStep] = convert_ushort4_sat_rte(4.0f * sum);
}
}
......@@ -681,11 +631,11 @@ __kernel void pyrUp_C4_D5(__global float4* src,__global float4* dst,
if (eveny)
{
sum = sum + (evenFlag * co3) * s_srcPatch[lsizey-16][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * co3 ) * s_srcPatch[lsizey-16][1 + ((tidx - 2) >> 1)];
sum = sum + (oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * co1 ) * s_srcPatch[lsizey-16][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * co3) * s_srcPatch[lsizey-16][1 + ((tidx + 2) >> 1)];
sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * co3 ) * s_srcPatch[lsizey-16][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[tidy][tidx] = sum;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册