提交 504bc763 编写于 作者: V vbystricky

Remove pre_invalid parameter

上级 45f21e4f
......@@ -63,7 +63,7 @@
#if sdepth == 4
kernel void integral_sum_cols(__global uchar4 *src, __global int *sum,
int src_offset, int pre_invalid, int rows, int cols, int src_step, int dst_step)
int src_offset, int rows, int cols, int src_step, int dst_step)
{
int lid = get_local_id(0);
int gid = get_group_id(0);
......@@ -122,19 +122,19 @@ kernel void integral_sum_cols(__global uchar4 *src, __global int *sum,
barrier(CLK_LOCAL_MEM_FENCE);
if(lid > 0 && (i+lid) <= rows)
{
int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ;
int loc_s0 = gid * dst_step + i + lid - 1, loc_s1 = loc_s0 + dst_step ;
lm_sum[0][bf_loc] += sum_t[0];
lm_sum[1][bf_loc] += sum_t[1];
sum_p = (__local int*)(&(lm_sum[0][bf_loc]));
for(int k = 0; k < 4; k++)
{
if(gid * 4 + k >= cols + pre_invalid || gid * 4 + k < pre_invalid) continue;
if(gid * 4 + k >= cols) continue;
sum[loc_s0 + k * dst_step / 4] = sum_p[k];
}
sum_p = (__local int*)(&(lm_sum[1][bf_loc]));
for(int k = 0; k < 4; k++)
{
if(gid * 4 + k + 4 >= cols + pre_invalid) break;
if(gid * 4 + k + 4 >= cols) break;
sum[loc_s1 + k * dst_step / 4] = sum_p[k];
}
}
......@@ -238,7 +238,7 @@ kernel void integral_sum_rows(__global int4 *srcsum, __global int *sum,
#elif sdepth == 5
kernel void integral_sum_cols(__global uchar4 *src, __global float *sum,
int src_offset, int pre_invalid, int rows, int cols, int src_step, int dst_step)
int src_offset, int rows, int cols, int src_step, int dst_step)
{
int lid = get_local_id(0);
int gid = get_group_id(0);
......@@ -297,19 +297,19 @@ kernel void integral_sum_cols(__global uchar4 *src, __global float *sum,
barrier(CLK_LOCAL_MEM_FENCE);
if(lid > 0 && (i+lid) <= rows)
{
int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ;
int loc_s0 = gid * dst_step + i + lid - 1, loc_s1 = loc_s0 + dst_step ;
lm_sum[0][bf_loc] += sum_t[0];
lm_sum[1][bf_loc] += sum_t[1];
sum_p = (__local float*)(&(lm_sum[0][bf_loc]));
for(int k = 0; k < 4; k++)
{
if(gid * 4 + k >= cols + pre_invalid || gid * 4 + k < pre_invalid) continue;
if(gid * 4 + k >= cols) continue;
sum[loc_s0 + k * dst_step / 4] = sum_p[k];
}
sum_p = (__local float*)(&(lm_sum[1][bf_loc]));
for(int k = 0; k < 4; k++)
{
if(gid * 4 + k + 4 >= cols + pre_invalid) break;
if(gid * 4 + k + 4 >= cols) break;
sum[loc_s1 + k * dst_step / 4] = sum_p[k];
}
}
......
......@@ -254,12 +254,12 @@ static bool ocl_integral( InputArray _src, OutputArray _sum, int sdepth )
UMat src = _src.getUMat(), t_sum(t_size, sdepth), sum = _sum.getUMat();
t_sum = t_sum(Range::all(), Range(0, size.height));
int offset = (int)src.offset / vlen, pre_invalid = (int)src.offset % vlen;
int vcols = (pre_invalid + src.cols + vlen - 1) / vlen;
int offset = (int)src.offset / vlen;
int vcols = (src.cols + vlen - 1) / vlen;
int sum_offset = (int)sum.offset / vlen;
k1.args(ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrWriteOnly(t_sum),
offset, pre_invalid, src.rows, src.cols, (int)src.step, (int)t_sum.step);
offset, src.rows, src.cols, (int)src.step, (int)t_sum.step);
size_t gt = ((vcols + 1) / 2) * 256, lt = 256;
if (!k1.run(1, &gt, &lt, false))
return false;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册