Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
ce5bae1c
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
ce5bae1c
编写于
6月 17, 2014
作者:
A
Alexander Alekhin
提交者:
OpenCV Buildbot
6月 17, 2014
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2871 from vbystricky:oclopt_integral
上级
1db9cc3f
606df046
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
94 addition
and
218 deletion
+94
-218
modules/imgproc/src/opencl/integral_sum.cl
modules/imgproc/src/opencl/integral_sum.cl
+90
-214
modules/imgproc/src/sumpixels.cpp
modules/imgproc/src/sumpixels.cpp
+4
-4
未找到文件。
modules/imgproc/src/opencl/integral_sum.cl
浏览文件 @
ce5bae1c
...
...
@@ -61,115 +61,64 @@
#
define
GET_CONFLICT_OFFSET
(
lid
)
((
lid
)
>>
LOG_NUM_BANKS
)
#
if
sdepth
==
4
#
define
sumT
int
#
define
vecSumT
int4
#
define
convertToSum4
convert_int4
#
elif
sdepth
==
5
#
define
sumT
float
#
define
vecSumT
float4
#
define
convertToSum4
convert_float4
#
endif
kernel
void
integral_sum_cols
(
__global
uchar4
*src,
__global
int
*sum
,
int
src_offset,
int
pre_invalid,
int
rows,
int
cols,
int
src_step,
int
dst_step
)
kernel
void
integral_sum_cols
(
__global
const
uchar4
*src,
__global
uchar
*sum_ptr
,
int
src_offset,
int
rows,
int
cols,
int
src_step,
int
dst_step
)
{
__global
sumT
*sum
=
(
__global
sumT
*
)
sum_ptr
;
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int4
src_t[2],
sum_t[2]
;
__local
int4
lm_sum[2][LSIZE
+
LOG_LSIZE]
;
__local
int
*
sum_p
;
vecSumT
src_t[2],
sum_t[2]
;
__local
vecSumT
lm_sum[2][LSIZE
+
LOG_LSIZE]
;
__local
sumT
*
sum_p
;
src_step
=
src_step
>>
2
;
gid
=
gid
<<
1
;
for
(
int
i
=
0
; i < rows; i =i + LSIZE_1)
int
lid_prim
=
((
lid
&
127
)
<<
1
)
+
1
;
for
(
int
i
=
0
; i < rows; i += LSIZE_1)
{
src_t[0]
=
(
i
+
lid
<
rows
?
convert_int4
(
src[src_offset
+
(
lid+i
)
*
src_step
+
gid]
)
:
0
)
;
src_t[1]
=
(
i
+
lid
<
rows
?
convert_int4
(
src[src_offset
+
(
lid+i
)
*
src_step
+
gid
+
1]
)
:
0
)
;
sum_t[0]
=
(
i
==
0
?
0
:
lm_sum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sum_t[1]
=
(
i
==
0
?
0
:
lm_sum[1][LSIZE_2
+
LOG_LSIZE]
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
bf_loc
=
lid
+
GET_CONFLICT_OFFSET
(
lid
)
;
lm_sum[0][bf_loc]
=
src_t[0]
;
lm_sum[1][bf_loc]
=
src_t[1]
;
int
offset
=
1
;
for
(
int
d
=
LSIZE
>>
1
; d > 0; d>>=1)
if
(
i
+
lid
<
rows
)
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
ai
=
offset
*
(((
lid
&
127
)
<<1
)
+1
)
-
1
,
bi
=
ai
+
offset
;
ai
+=
GET_CONFLICT_OFFSET
(
ai
)
;
bi
+=
GET_CONFLICT_OFFSET
(
bi
)
;
if
((
lid
&
127
)
<
d
)
{
lm_sum[lid
>>
7][bi]
+=
lm_sum[lid
>>
7][ai]
;
}
offset
<<=
1
;
int
src_index
=
mad24
((
lid+i
)
,
src_step,
gid
+
src_offset
)
;
src_t[0]
=
convertToSum4
(
src[src_index]
)
;
src_t[1]
=
convertToSum4
(
src[src_index
+
1]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
<
2
)
else
{
lm_sum[lid][LSIZE_2
+
LOG_LSIZE]
=
0
;
src_t[0]
=
(
vecSumT
)
0
;
src_t[1]
=
(
vecSumT
)
0
;
}
for
(
int
d
=
1
; d < LSIZE; d <<= 1)
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
offset
>>=
1
;
int
ai
=
offset
*
(((
lid
&
127
)
<<1
)
+1
)
-
1
,
bi
=
ai
+
offset
;
ai
+=
GET_CONFLICT_OFFSET
(
ai
)
;
bi
+=
GET_CONFLICT_OFFSET
(
bi
)
;
if
((
lid
&
127
)
<
d
)
{
lm_sum[lid
>>
7][bi]
+=
lm_sum[lid
>>
7][ai]
;
lm_sum[lid
>>
7][ai]
=
lm_sum[lid
>>
7][bi]
-
lm_sum[lid
>>
7][ai]
;
}
if
(
i
==
0
)
{
sum_t[0]
=
(
vecSumT
)
0
;
sum_t[1]
=
(
vecSumT
)
0
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
>
0
&&
(
i+lid
)
<=
rows
)
else
{
int
loc_s0
=
gid
*
dst_step
+
i
+
lid
-
1
-
pre_invalid
*
dst_step
/
4
,
loc_s1
=
loc_s0
+
dst_step
;
lm_sum[0][bf_loc]
+=
sum_t[0]
;
lm_sum[1][bf_loc]
+=
sum_t[1]
;
sum_p
=
(
__local
int*
)(
&
(
lm_sum[0][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
{
if
(
gid
*
4
+
k
>=
cols
+
pre_invalid
|
| gid * 4 + k < pre_invalid) continue;
sum[loc_s0 + k * dst_step / 4] = sum_p[k];
}
sum_p = (__local int*)(&(lm_sum[1][bf_loc]));
for(int k = 0; k < 4; k++)
{
if(gid * 4 + k + 4 >= cols + pre_invalid) break;
sum[loc_s1 + k * dst_step / 4] = sum_p[k];
}
sum_t[0]
=
lm_sum[0][LSIZE_2
+
LOG_LSIZE]
;
sum_t[1]
=
lm_sum[1][LSIZE_2
+
LOG_LSIZE]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
kernel void integral_sum_rows(__global int4 *srcsum, __global int *sum,
int rows, int cols, int src_step, int sum_step, int sum_offset)
{
int lid = get_local_id(0);
int gid = get_group_id(0);
int4 src_t[2], sum_t[2];
__local int4 lm_sum[2][LSIZE + LOG_LSIZE];
__local int *sum_p;
src_step = src_step >> 4;
for(int i = 0; i < rows; i =i + LSIZE_1)
{
src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2] : 0;
src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2 + 1] : 0;
sum_t[0] = (i == 0 ? 0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
sum_t[1] = (i == 0 ? 0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
barrier(CLK_LOCAL_MEM_FENCE);
int
bf_loc
=
lid
+
GET_CONFLICT_OFFSET
(
lid
)
;
lm_sum[0][bf_loc] = src_t[0];
lm_sum[0][bf_loc]
=
src_t[0]
;
lm_sum[1][bf_loc]
=
src_t[1]
;
int
offset
=
1
;
for(int d = LSIZE >> 1 ; d > 0; d>>=1)
for
(
int
d
=
LSIZE
>>
1
; d > 0; d>>=1)
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int ai = offset *
(((lid & 127)<<1) +1)
- 1,bi = ai + offset;
int
ai
=
offset
*
lid_prim
-
1
,
bi
=
ai
+
offset
;
ai
+=
GET_CONFLICT_OFFSET
(
ai
)
;
bi
+=
GET_CONFLICT_OFFSET
(
bi
)
;
...
...
@@ -180,15 +129,15 @@ kernel void integral_sum_rows(__global int4 *srcsum, __global int *sum,
offset
<<=
1
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if(lid < 2)
if
(
lid
<
2
)
{
lm_sum[lid][LSIZE_2
+
LOG_LSIZE]
=
0
;
}
for(int d = 1; d < LSIZE; d <<= 1)
for
(
int
d
=
1
; d < LSIZE; d <<= 1)
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
offset
>>=
1
;
int ai = offset *
(((lid & 127)<<1) +1)
- 1,bi = ai + offset;
int
ai
=
offset
*
lid_prim
-
1
,
bi
=
ai
+
offset
;
ai
+=
GET_CONFLICT_OFFSET
(
ai
)
;
bi
+=
GET_CONFLICT_OFFSET
(
bi
)
;
...
...
@@ -199,152 +148,78 @@ kernel void integral_sum_rows(__global int4 *srcsum, __global int *sum,
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if(gid == 0 && (i + lid) <= rows)
{
sum[sum_offset + i + lid] = 0;
}
if(i + lid == 0)
{
int loc0 = gid * 2 * sum_step;
for(int k = 1; k <= 8; k++)
{
if(gid * 8 + k > cols) break;
sum[sum_offset + loc0 + k * sum_step / 4] = 0;
}
}
if(lid > 0 && (i+lid) <= rows)
if
(
lid
>
0
&&
(
i+lid
)
<=
rows
)
{
int loc_s0 =
sum_offset + gid * 2 * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step
;
int
loc_s0
=
mad24
(
gid,
dst_step,
i
+
lid
-
1
)
,
loc_s1
=
loc_s0
+
dst_step
;
lm_sum[0][bf_loc]
+=
sum_t[0]
;
lm_sum[1][bf_loc]
+=
sum_t[1]
;
sum_p = (__local
int
*)(&(lm_sum[0][bf_loc]));
for(int k = 0; k < 4; k++)
sum_p
=
(
__local
sumT
*
)(
&
(
lm_sum[0][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
{
if(gid * 8 + k >= cols) break;
sum[loc_s0 + k * sum_step / 4] = sum_p[k];
if
(
gid
*
4
+
k
>=
cols
)
break
;
sum[loc_s0
+
k
*
dst_step
/
4]
=
sum_p[k]
;
}
sum_p = (__local
int
*)(&(lm_sum[1][bf_loc]));
for(int k = 0; k < 4; k++)
sum_p
=
(
__local
sumT
*
)(
&
(
lm_sum[1][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
{
if(gid * 8 + 4 + k >= cols) break;
sum[loc_s1 + k * sum_step / 4] = sum_p[k];
if
(
gid
*
4
+
k
+
4
>=
cols
)
break
;
sum[loc_s1
+
k
*
dst_step
/
4]
=
sum_p[k]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
#elif sdepth == 5
kernel void integral_sum_
cols(__global uchar4 *src, __global float *sum
,
int
src_offset, int pre_invalid, int rows, int cols, int src_step, int dst_step
)
kernel
void
integral_sum_
rows
(
__global
const
uchar
*srcsum_ptr,
__global
uchar
*sum_ptr
,
int
rows,
int
cols,
int
src_step,
int
sum_step,
int
sum_offset
)
{
__global
const
vecSumT
*srcsum
=
(
__global
const
vecSumT
*
)
srcsum_ptr
;
__global
sumT
*sum
=
(
__global
sumT
*
)
sum_ptr
;
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
float4
src_t[2], sum_t[2];
__local
float4
lm_sum[2][LSIZE + LOG_LSIZE];
__local
float*
sum_p;
src_step = src_step >>
2
;
gid = gid <<
1;
for
(int i = 0; i < rows; i =i +
LSIZE_1)
vecSumT
src_t[2],
sum_t[2]
;
__local
vecSumT
lm_sum[2][LSIZE
+
LOG_LSIZE]
;
__local
sumT
*
sum_p
;
src_step
=
src_step
>>
4
;
int
lid_prim
=
((
lid
&
127
)
<<
1
)
+
1
;
for
(
int
i
=
0
; i < rows; i +=
LSIZE_1)
{
src_t[0] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + gid]) : (float4)0);
src_t[1] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + gid + 1]) : (float4)0);
sum_t[0] = (i == 0 ? (float4)0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
sum_t[1] = (i == 0 ? (float4)0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
barrier(CLK_LOCAL_MEM_FENCE);
int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
lm_sum[0][bf_loc] = src_t[0];
lm_sum[1][bf_loc] = src_t[1];
int offset = 1;
for(int d = LSIZE >> 1 ; d > 0; d>>=1)
if
(
i
+
lid
<
rows
)
{
barrier(CLK_LOCAL_MEM_FENCE);
int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
ai += GET_CONFLICT_OFFSET(ai);
bi += GET_CONFLICT_OFFSET(bi);
if((lid & 127) < d)
{
lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
}
offset <<= 1;
int
sum_idx
=
mad24
(
lid
+
i,
src_step,
gid
*
2
)
;
src_t[0]
=
srcsum[sum_idx]
;
src_t[1]
=
srcsum[sum_idx
+
1]
;
}
barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 2)
else
{
lm_sum[lid][LSIZE_2 + LOG_LSIZE] = 0;
src_t[0]
=
0
;
src_t[1]
=
0
;
}
for(int d = 1; d < LSIZE; d <<= 1
)
if
(
i
==
0
)
{
barrier(CLK_LOCAL_MEM_FENCE);
offset >>= 1;
int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
ai += GET_CONFLICT_OFFSET(ai);
bi += GET_CONFLICT_OFFSET(bi);
if((lid & 127) < d)
{
lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
lm_sum[lid >> 7][ai] = lm_sum[lid >> 7][bi] - lm_sum[lid >> 7][ai];
}
sum_t[0]
=
0
;
sum_t[1]
=
0
;
}
barrier(CLK_LOCAL_MEM_FENCE);
if(lid > 0 && (i+lid) <= rows)
else
{
int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ;
lm_sum[0][bf_loc] += sum_t[0];
lm_sum[1][bf_loc] += sum_t[1];
sum_p = (__local float*)(&(lm_sum[0][bf_loc]));
for(int k = 0; k < 4; k++)
{
if(gid * 4 + k >= cols + pre_invalid |
|
gid
*
4
+
k
<
pre_invalid
)
continue
;
sum[loc_s0
+
k
*
dst_step
/
4]
=
sum_p[k]
;
}
sum_p
=
(
__local
float*
)(
&
(
lm_sum[1][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
{
if
(
gid
*
4
+
k
+
4
>=
cols
+
pre_invalid
)
break
;
sum[loc_s1
+
k
*
dst_step
/
4]
=
sum_p[k]
;
}
sum_t[0]
=
lm_sum[0][LSIZE_2
+
LOG_LSIZE]
;
sum_t[1]
=
lm_sum[1][LSIZE_2
+
LOG_LSIZE]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
kernel
void
integral_sum_rows
(
__global
float4
*srcsum,
__global
float
*sum,
int
rows,
int
cols,
int
src_step,
int
sum_step,
int
sum_offset
)
{
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
float4
src_t[2],
sum_t[2]
;
__local
float4
lm_sum[2][LSIZE
+
LOG_LSIZE]
;
__local
float
*sum_p
;
src_step
=
src_step
>>
4
;
for
(
int
i
=
0
; i < rows; i =i + LSIZE_1)
{
src_t[0]
=
i
+
lid
<
rows
?
srcsum[
(
lid+i
)
*
src_step
+
gid
*
2]
:
(
float4
)
0
;
src_t[1]
=
i
+
lid
<
rows
?
srcsum[
(
lid+i
)
*
src_step
+
gid
*
2
+
1]
:
(
float4
)
0
;
sum_t[0]
=
(
i
==
0
?
(
float4
)
0
:
lm_sum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sum_t[1]
=
(
i
==
0
?
(
float4
)
0
:
lm_sum[1][LSIZE_2
+
LOG_LSIZE]
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
bf_loc
=
lid
+
GET_CONFLICT_OFFSET
(
lid
)
;
lm_sum[0][bf_loc]
=
src_t[0]
;
lm_sum[0][bf_loc]
=
src_t[0]
;
lm_sum[1][bf_loc]
=
src_t[1]
;
int
offset
=
1
;
for
(
int
d
=
LSIZE
>>
1
; d > 0; d>>=1)
for
(
int
d
=
LSIZE
>>
1
; d > 0; d>>=1)
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
ai
=
offset
*
(((
lid
&
127
)
<<1
)
+1
)
-
1
,
bi
=
ai
+
offset
;
int
ai
=
offset
*
lid_prim
-
1
,
bi
=
ai
+
offset
;
ai
+=
GET_CONFLICT_OFFSET
(
ai
)
;
bi
+=
GET_CONFLICT_OFFSET
(
bi
)
;
...
...
@@ -355,59 +230,60 @@ kernel void integral_sum_rows(__global float4 *srcsum, __global float *sum,
offset
<<=
1
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
<
2
)
if
(
lid
<
2
)
{
lm_sum[lid][LSIZE_2
+
LOG_LSIZE]
=
0
;
}
for
(
int
d
=
1
; d < LSIZE; d <<= 1)
for
(
int
d
=
1
; d < LSIZE; d <<= 1)
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
offset
>>=
1
;
int
ai
=
offset
*
(((
lid
&
127
)
<<1
)
+1
)
-
1
,
bi
=
ai
+
offset
;
int
ai
=
offset
*
lid_prim
-
1
,
bi
=
ai
+
offset
;
ai
+=
GET_CONFLICT_OFFSET
(
ai
)
;
bi
+=
GET_CONFLICT_OFFSET
(
bi
)
;
if
((
lid
&
127
)
<
d
)
if
((
lid
&
127
)
<
d
)
{
lm_sum[lid
>>
7][bi]
+=
lm_sum[lid
>>
7][ai]
;
lm_sum[lid
>>
7][ai]
=
lm_sum[lid
>>
7][bi]
-
lm_sum[lid
>>
7][ai]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
gid
==
0
&&
(
i
+
lid
)
<=
rows
)
if
(
gid
==
0
&&
(
i
+
lid
)
<=
rows
)
{
sum[sum_offset
+
i
+
lid]
=
0
;
}
if
(
i
+
lid
==
0
)
if
(
i
+
lid
==
0
)
{
int
loc0
=
gid
*
2
*
sum_step
;
for
(
int
k
=
1
; k <= 8; k++)
{
if
(
gid
*
8
+
k
>
cols
)
break
;
if
(
gid
*
8
+
k
>
cols
)
break
;
sum[sum_offset
+
loc0
+
k
*
sum_step
/
4]
=
0
;
}
}
if
(
lid
>
0
&&
(
i+lid
)
<=
rows
)
if
(
lid
>
0
&&
(
i+lid
)
<=
rows
)
{
int
loc_s0
=
sum_offset
+
gid
*
2
*
sum_step
+
sum_step
/
4
+
i
+
lid,
loc_s1
=
loc_s0
+
sum_step
;
lm_sum[0][bf_loc]
+=
sum_t[0]
;
lm_sum[1][bf_loc]
+=
sum_t[1]
;
sum_p
=
(
__local
float
*
)(
&
(
lm_sum[0][bf_loc]
))
;
sum_p
=
(
__local
sumT
*
)(
&
(
lm_sum[0][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
{
if
(
gid
*
8
+
k
>=
cols
)
break
;
if
(
gid
*
8
+
k
>=
cols
)
break
;
sum[loc_s0
+
k
*
sum_step
/
4]
=
sum_p[k]
;
}
sum_p
=
(
__local
float
*
)(
&
(
lm_sum[1][bf_loc]
))
;
sum_p
=
(
__local
sumT
*
)(
&
(
lm_sum[1][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
{
if
(
gid
*
8
+
4
+
k
>=
cols
)
break
;
if
(
gid
*
8
+
4
+
k
>=
cols
)
break
;
sum[loc_s1
+
k
*
sum_step
/
4]
=
sum_p[k]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
#
endif
modules/imgproc/src/sumpixels.cpp
浏览文件 @
ce5bae1c
...
...
@@ -254,19 +254,19 @@ static bool ocl_integral( InputArray _src, OutputArray _sum, int sdepth )
UMat
src
=
_src
.
getUMat
(),
t_sum
(
t_size
,
sdepth
),
sum
=
_sum
.
getUMat
();
t_sum
=
t_sum
(
Range
::
all
(),
Range
(
0
,
size
.
height
));
int
offset
=
(
int
)
src
.
offset
/
vlen
,
pre_invalid
=
(
int
)
src
.
offset
%
vlen
;
int
vcols
=
(
pre_invalid
+
src
.
cols
+
vlen
-
1
)
/
vlen
;
int
offset
=
(
int
)
src
.
offset
/
vlen
;
int
vcols
=
(
src
.
cols
+
vlen
-
1
)
/
vlen
;
int
sum_offset
=
(
int
)
sum
.
offset
/
vlen
;
k1
.
args
(
ocl
::
KernelArg
::
PtrReadOnly
(
src
),
ocl
::
KernelArg
::
PtrWriteOnly
(
t_sum
),
offset
,
pre_invalid
,
src
.
rows
,
src
.
cols
,
(
int
)
src
.
step
,
(
int
)
t_sum
.
step
);
offset
,
src
.
rows
,
src
.
cols
,
(
int
)
src
.
step
,
(
int
)
t_sum
.
step
);
size_t
gt
=
((
vcols
+
1
)
/
2
)
*
256
,
lt
=
256
;
if
(
!
k1
.
run
(
1
,
&
gt
,
&
lt
,
false
))
return
false
;
ocl
::
Kernel
k2
(
"integral_sum_rows"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
format
(
"-D sdepth=%d"
,
sdepth
));
k2
.
args
(
ocl
::
KernelArg
::
PtrRead
Write
(
t_sum
),
ocl
::
KernelArg
::
PtrWriteOnly
(
sum
),
k2
.
args
(
ocl
::
KernelArg
::
PtrRead
Only
(
t_sum
),
ocl
::
KernelArg
::
PtrWriteOnly
(
sum
),
t_sum
.
rows
,
t_sum
.
cols
,
(
int
)
t_sum
.
step
,
(
int
)
sum
.
step
,
sum_offset
);
size_t
gt2
=
t_sum
.
cols
*
32
,
lt2
=
256
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录