Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
56d62118
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
56d62118
编写于
3月 29, 2013
作者:
A
Andrey Kamaev
提交者:
OpenCV Buildbot
3月 29, 2013
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #707 from pengx17:2.4_surf
上级
b5dd26e4
f2ecf4f9
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
263 addition
and
61 deletion
+263
-61
modules/nonfree/src/opencl/surf.cl
modules/nonfree/src/opencl/surf.cl
+175
-51
modules/nonfree/src/surf.ocl.cpp
modules/nonfree/src/surf.ocl.cpp
+33
-10
modules/ocl/include/opencv2/ocl/private/util.hpp
modules/ocl/include/opencv2/ocl/private/util.hpp
+10
-0
modules/ocl/src/initialization.cpp
modules/ocl/src/initialization.cpp
+45
-0
未找到文件。
modules/nonfree/src/opencl/surf.cl
浏览文件 @
56d62118
...
...
@@ -747,21 +747,42 @@ void reduce_32_sum(volatile __local float * data, volatile float* partial_reduc
#
define
op
(
A,
B
)
(
*A
)
+
(
B
)
data[tid]
=
*partial_reduction
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
ifndef
WAVE_SIZE
#
define
WAVE_SIZE
1
#
endif
if
(
tid
<
16
)
{
data[tid]
=
*partial_reduction
=
op
(
partial_reduction,
data[tid
+
16]
)
;
#
if
WAVE_SIZE
<
16
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
#
endif
data[tid]
=
*partial_reduction
=
op
(
partial_reduction,
data[tid
+
8
]
)
;
#
if
WAVE_SIZE
<
8
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
4
)
{
#
endif
data[tid]
=
*partial_reduction
=
op
(
partial_reduction,
data[tid
+
4
]
)
;
#
if
WAVE_SIZE
<
4
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
2
)
{
#
endif
data[tid]
=
*partial_reduction
=
op
(
partial_reduction,
data[tid
+
2
]
)
;
#
if
WAVE_SIZE
<
2
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
1
)
{
#
endif
data[tid]
=
*partial_reduction
=
op
(
partial_reduction,
data[tid
+
1
]
)
;
}
#
undef
WAVE_SIZE
#
undef
op
}
...
...
@@ -1087,44 +1108,67 @@ void reduce_sum25(
int tid
)
{
#ifndef WAVE_SIZE
#define WAVE_SIZE 1
#endif
// first step is to reduce from 25 to 16
if (tid < 9)
// use 9 threads
if (tid < 9)
{
sdata1[tid] += sdata1[tid + 16];
sdata2[tid] += sdata2[tid + 16];
sdata3[tid] += sdata3[tid + 16];
sdata4[tid] += sdata4[tid + 16];
#if WAVE_SIZE < 16
}
// sum (reduce) from 16 to 1 (unrolled - aligned to a half-warp)
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 8)
{
#endif
sdata1[tid] += sdata1[tid + 8];
sdata1[tid] += sdata1[tid + 4];
sdata1[tid] += sdata1[tid + 2];
sdata1[tid] += sdata1[tid + 1];
sdata2[tid] += sdata2[tid + 8];
sdata2[tid] += sdata2[tid + 4];
sdata2[tid] += sdata2[tid + 2];
sdata2[tid] += sdata2[tid + 1];
sdata3[tid] += sdata3[tid + 8];
sdata3[tid] += sdata3[tid + 4];
sdata3[tid] += sdata3[tid + 2];
sdata3[tid] += sdata3[tid + 1];
sdata4[tid] += sdata4[tid + 8];
#if WAVE_SIZE < 8
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 4)
{
#endif
sdata1[tid] += sdata1[tid + 4];
sdata2[tid] += sdata2[tid + 4];
sdata3[tid] += sdata3[tid + 4];
sdata4[tid] += sdata4[tid + 4];
#if WAVE_SIZE < 4
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 2)
{
#endif
sdata1[tid] += sdata1[tid + 2];
sdata2[tid] += sdata2[tid + 2];
sdata3[tid] += sdata3[tid + 2];
sdata4[tid] += sdata4[tid + 2];
#if WAVE_SIZE < 2
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 1)
{
#endif
sdata1[tid] += sdata1[tid + 1];
sdata2[tid] += sdata2[tid + 1];
sdata3[tid] += sdata3[tid + 1];
sdata4[tid] += sdata4[tid + 1];
}
#undef WAVE_SIZE
}
__kernel
void compute_descriptors64(
IMAGE_INT8 imgTex,
volatile
__global float * descriptors,
__global float * descriptors,
__global const float * keypoints,
int descriptors_step,
int keypoints_step,
...
...
@@ -1158,14 +1202,13 @@ __kernel
sdyabs[tid] = fabs(sdy[tid]); // |dy| array
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 25)
{
reduce_sum25(sdx, sdy, sdxabs, sdyabs, tid);
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 25)
{
volatile
__global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 2);
__global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 2);
// write dx, dy, |dx|, |dy|
if (tid == 0)
...
...
@@ -1180,7 +1223,7 @@ __kernel
__kernel
void compute_descriptors128(
IMAGE_INT8 imgTex,
__global
volatile
float * descriptors,
__global float * descriptors,
__global float * keypoints,
int descriptors_step,
int keypoints_step,
...
...
@@ -1229,13 +1272,15 @@ __kernel
sd2[tid] = sdx[tid];
sdabs2[tid] = fabs(sdx[tid]);
}
//barrier(CLK_LOCAL_MEM_FENCE);
}
barrier(CLK_LOCAL_MEM_FENCE);
reduce_sum25(sd1, sd2, sdabs1, sdabs2, tid);
//barrier(CLK_LOCAL_MEM_FENCE);
volatile __global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 3);
barrier(CLK_LOCAL_MEM_FENCE);
__global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 3);
if (tid < 25)
{
// write dx (dy >= 0), |dx| (dy >= 0), dx (dy < 0), |dx| (dy < 0)
if (tid == 0)
{
...
...
@@ -1259,11 +1304,14 @@ __kernel
sd2[tid] = sdy[tid];
sdabs2[tid] = fabs(sdy[tid]);
}
//barrier(CLK_LOCAL_MEM_FENCE);
}
barrier(CLK_LOCAL_MEM_FENCE);
reduce_sum25(sd1, sd2, sdabs1, sdabs2, tid);
//
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 25)
{
// write dy (dx >= 0), |dy| (dx >= 0), dy (dx < 0), |dy
|
(
dx
<
0
)
if
(
tid
==
0
)
{
...
...
@@ -1274,6 +1322,103 @@ __kernel
}
}
}
void
reduce_sum128
(
volatile
__local
float*
smem,
int
tid
)
{
#
ifndef
WAVE_SIZE
#
define
WAVE_SIZE
1
#
endif
if
(
tid
<
64
)
{
smem[tid]
+=
smem[tid
+
64]
;
#
if
WAVE_SIZE
<
64
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
32
)
{
#
endif
smem[tid]
+=
smem[tid
+
32]
;
#
if
WAVE_SIZE
<
32
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
#
endif
smem[tid]
+=
smem[tid
+
16]
;
#
if
WAVE_SIZE
<
16
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
#
endif
smem[tid]
+=
smem[tid
+
8]
;
#
if
WAVE_SIZE
<
8
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
4
)
{
#
endif
smem[tid]
+=
smem[tid
+
4]
;
#
if
WAVE_SIZE
<
4
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
2
)
{
#
endif
smem[tid]
+=
smem[tid
+
2]
;
#
if
WAVE_SIZE
<
2
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
1
)
{
#
endif
smem[tid]
+=
smem[tid
+
1]
;
}
}
void
reduce_sum64
(
volatile
__local
float*
smem,
int
tid
)
{
#
ifndef
WAVE_SIZE
#
define
WAVE_SIZE
1
#
endif
if
(
tid
<
32
)
{
smem[tid]
+=
smem[tid
+
32]
;
#
if
WAVE_SIZE
<
32
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
#
endif
smem[tid]
+=
smem[tid
+
16]
;
#
if
WAVE_SIZE
<
16
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
#
endif
smem[tid]
+=
smem[tid
+
8]
;
#
if
WAVE_SIZE
<
8
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
4
)
{
#
endif
smem[tid]
+=
smem[tid
+
4]
;
#
if
WAVE_SIZE
<
4
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
2
)
{
#
endif
smem[tid]
+=
smem[tid
+
2]
;
#
if
WAVE_SIZE
<
2
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
1
)
{
#
endif
smem[tid]
+=
smem[tid
+
1]
;
}
}
__kernel
void
normalize_descriptors128
(
__global
float
*
descriptors,
int
descriptors_step
)
...
...
@@ -1288,22 +1433,10 @@ __kernel
sqDesc[get_local_id
(
0
)
]
=
lookup
*
lookup
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
get_local_id
(
0
)
<
64
)
sqDesc[get_local_id
(
0
)
]
+=
sqDesc[get_local_id
(
0
)
+
64]
;
reduce_sum128
(
sqDesc,
get_local_id
(
0
))
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
reduction
to
get
total
if
(
get_local_id
(
0
)
<
32
)
{
volatile
__local
float*
smem
=
sqDesc
;
smem[get_local_id
(
0
)
]
+=
smem[get_local_id
(
0
)
+
32]
;
smem[get_local_id
(
0
)
]
+=
smem[get_local_id
(
0
)
+
16]
;
smem[get_local_id
(
0
)
]
+=
smem[get_local_id
(
0
)
+
8]
;
smem[get_local_id
(
0
)
]
+=
smem[get_local_id
(
0
)
+
4]
;
smem[get_local_id
(
0
)
]
+=
smem[get_local_id
(
0
)
+
2]
;
smem[get_local_id
(
0
)
]
+=
smem[get_local_id
(
0
)
+
1]
;
}
//
compute
length
(
square
root
)
volatile
__local
float
len
;
...
...
@@ -1329,18 +1462,9 @@ __kernel
sqDesc[get_local_id
(
0
)
]
=
lookup
*
lookup
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
reduction
to
get
total
if
(
get_local_id
(
0
)
<
32
)
{
volatile
__local
float*
smem
=
sqDesc
;
smem[get_local_id
(
0
)
]
+=
smem[get_local_id
(
0
)
+
32]
;
smem[get_local_id
(
0
)
]
+=
smem[get_local_id
(
0
)
+
16]
;
smem[get_local_id
(
0
)
]
+=
smem[get_local_id
(
0
)
+
8]
;
smem[get_local_id
(
0
)
]
+=
smem[get_local_id
(
0
)
+
4]
;
smem[get_local_id
(
0
)
]
+=
smem[get_local_id
(
0
)
+
2]
;
smem[get_local_id
(
0
)
]
+=
smem[get_local_id
(
0
)
+
1]
;
}
reduce_sum64
(
sqDesc,
get_local_id
(
0
))
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
compute
length
(
square
root
)
volatile
__local
float
len
;
...
...
modules/nonfree/src/surf.ocl.cpp
浏览文件 @
56d62118
...
...
@@ -43,6 +43,7 @@
//
//M*/
#include "precomp.hpp"
#include <cstdio>
#ifdef HAVE_OPENCV_OCL
...
...
@@ -57,25 +58,35 @@ namespace cv
///////////////////////////OpenCL kernel strings///////////////////////////
extern
const
char
*
surf
;
const
char
*
noImage2dOption
=
"-D DISABLE_IMAGE2D"
;
const
char
noImage2dOption
[]
=
"-D DISABLE_IMAGE2D"
;
static
char
SURF_OPTIONS
[
1024
]
=
""
;
static
bool
USE_IMAGE2d
=
false
;
static
void
openCLExecuteKernelSURF
(
Context
*
clCxt
,
const
char
**
source
,
string
kernelName
,
size_t
globalThreads
[
3
],
size_t
localThreads
[
3
],
vector
<
pair
<
size_t
,
const
void
*>
>
&
args
,
int
channels
,
int
depth
)
{
if
(
support_image2d
())
char
*
pSURF_OPTIONS
=
SURF_OPTIONS
;
static
bool
OPTION_INIT
=
false
;
if
(
!
OPTION_INIT
)
{
openCLExecuteKernel
(
clCxt
,
source
,
kernelName
,
globalThreads
,
localThreads
,
args
,
channels
,
depth
);
}
else
{
openCLExecuteKernel
(
clCxt
,
source
,
kernelName
,
globalThreads
,
localThreads
,
args
,
channels
,
depth
,
noImage2dOption
);
if
(
!
USE_IMAGE2d
)
{
strcat
(
pSURF_OPTIONS
,
noImage2dOption
);
pSURF_OPTIONS
+=
strlen
(
noImage2dOption
);
}
size_t
wave_size
=
0
;
queryDeviceInfo
(
WAVEFRONT_SIZE
,
&
wave_size
);
std
::
sprintf
(
pSURF_OPTIONS
,
" -D WAVE_SIZE=%d"
,
static_cast
<
int
>
(
wave_size
));
OPTION_INIT
=
true
;
}
openCLExecuteKernel
(
clCxt
,
source
,
kernelName
,
globalThreads
,
localThreads
,
args
,
channels
,
depth
,
SURF_OPTIONS
);
}
}
}
static
inline
in
t
divUp
(
size_t
total
,
size_t
grain
)
static
inline
size_
t
divUp
(
size_t
total
,
size_t
grain
)
{
return
(
total
+
grain
-
1
)
/
grain
;
}
...
...
@@ -152,8 +163,20 @@ public:
integral
(
img
,
surf_
.
sum
);
if
(
support_image2d
())
{
bindImgTex
(
img
,
imgTex
);
bindImgTex
(
surf_
.
sum
,
sumTex
);
try
{
bindImgTex
(
img
,
imgTex
);
bindImgTex
(
surf_
.
sum
,
sumTex
);
USE_IMAGE2d
=
true
;
}
catch
(
const
cv
::
Exception
&
e
)
{
USE_IMAGE2d
=
false
;
if
(
e
.
code
!=
CL_IMAGE_FORMAT_NOT_SUPPORTED
&&
e
.
code
!=
-
217
)
{
throw
e
;
}
}
}
maskSumTex
=
0
;
...
...
modules/ocl/include/opencv2/ocl/private/util.hpp
浏览文件 @
56d62118
...
...
@@ -123,6 +123,16 @@ namespace cv
// returns whether the current context supports image2d_t format or not
bool
CV_EXPORTS
support_image2d
(
Context
*
clCxt
=
Context
::
getContext
());
// the enums are used to query device information
// currently only support wavefront size queries
enum
DEVICE_INFO
{
WAVEFRONT_SIZE
,
//in AMD speak
WARP_SIZE
=
WAVEFRONT_SIZE
//in nvidia speak
};
//info should have been pre-allocated
void
CV_EXPORTS
queryDeviceInfo
(
DEVICE_INFO
info_type
,
void
*
info
);
}
//namespace ocl
}
//namespace cv
...
...
modules/ocl/src/initialization.cpp
浏览文件 @
56d62118
...
...
@@ -353,6 +353,51 @@ namespace cv
{
return
&
(
Context
::
getContext
()
->
impl
->
clCmdQueue
);
}
void
queryDeviceInfo
(
DEVICE_INFO
info_type
,
void
*
info
)
{
static
Info
::
Impl
*
impl
=
Context
::
getContext
()
->
impl
;
switch
(
info_type
)
{
case
WAVEFRONT_SIZE
:
{
#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD
try
{
openCLSafeCall
(
clGetDeviceInfo
(
Context
::
getContext
()
->
impl
->
devices
[
0
],
CL_DEVICE_WAVEFRONT_WIDTH_AMD
,
sizeof
(
size_t
),
info
,
0
));
}
catch
(
const
cv
::
Exception
&
)
#elif defined (CL_DEVICE_WARP_SIZE_NV)
const
int
EXT_LEN
=
4096
+
1
;
char
extends_set
[
EXT_LEN
];
size_t
extends_size
;
openCLSafeCall
(
clGetDeviceInfo
(
impl
->
devices
[
impl
->
devnum
],
CL_DEVICE_EXTENSIONS
,
EXT_LEN
,
(
void
*
)
extends_set
,
&
extends_size
));
extends_set
[
EXT_LEN
-
1
]
=
0
;
if
(
std
::
string
(
extends_set
).
find
(
"cl_nv_device_attribute_query"
)
!=
std
::
string
::
npos
)
{
openCLSafeCall
(
clGetDeviceInfo
(
Context
::
getContext
()
->
impl
->
devices
[
0
],
CL_DEVICE_WARP_SIZE_NV
,
sizeof
(
size_t
),
info
,
0
));
}
else
#endif
{
// if no way left for us to query the warp size, we can get it from kernel group info
static
const
char
*
_kernel_string
=
"__kernel void test_func() {}"
;
cl_kernel
kernel
;
kernel
=
openCLGetKernelFromSource
(
Context
::
getContext
(),
&
_kernel_string
,
"test_func"
);
openCLSafeCall
(
clGetKernelWorkGroupInfo
(
kernel
,
impl
->
devices
[
impl
->
devnum
],
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE
,
sizeof
(
size_t
),
info
,
NULL
));
}
}
break
;
default:
CV_Error
(
-
1
,
"Invalid device info type"
);
break
;
}
}
void
openCLReadBuffer
(
Context
*
clCxt
,
cl_mem
dst_buffer
,
void
*
host_buffer
,
size_t
size
)
{
cl_int
status
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录