Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
6952b90e
O
Opencv
项目概览
Greenplum
/
Opencv
大约 1 年 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
6952b90e
编写于
6月 20, 2014
作者:
A
Alexander Alekhin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2790 from akarsakov:ocl_pyrUp_unroll
上级
01a98fae
06fb5da7
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
138 addition
and
65 deletion
+138
-65
modules/imgproc/src/opencl/pyr_up.cl
modules/imgproc/src/opencl/pyr_up.cl
+124
-59
modules/imgproc/src/pyramids.cpp
modules/imgproc/src/pyramids.cpp
+14
-6
未找到文件。
modules/imgproc/src/opencl/pyr_up.cl
浏览文件 @
6952b90e
...
...
@@ -68,8 +68,9 @@
#
define
PIXSIZE
((
int
)
sizeof
(
T1
)
*3
)
#
endif
#
define
noconvert
#
define
EXTRAPOLATE
(
x,
maxV
)
min
(
maxV
-
1
,
(
int
)
abs
(
x
))
#
define
noconvert
__kernel
void
pyrUp
(
__global
const
uchar
*
src,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols
)
...
...
@@ -77,28 +78,19 @@ __kernel void pyrUp(__global const uchar * src, int src_step, int src_offset, in
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
const
int
lsizex
=
get_local_size
(
0
)
;
const
int
lsizey
=
get_local_size
(
1
)
;
const
int
tidx
=
get_local_id
(
0
)
;
const
int
tidy
=
get_local_id
(
1
)
;
__local
FT
s_srcPatch[
10][10
]
;
__local
FT
s_dstPatch[
20][16
]
;
__local
FT
s_srcPatch[
LOCAL_SIZE/2
+
2][LOCAL_SIZE/2
+
2
]
;
__local
FT
s_dstPatch[
LOCAL_SIZE/2
+
2][LOCAL_SIZE
]
;
__global
uchar
*
dstData
=
dst
+
dst_offset
;
__global
const
uchar
*
srcData
=
src
+
src_offset
;
if
(
tidx
<
10
&&
tidy
<
10
)
if
(
tidx
<
(
LOCAL_SIZE/2
+
2
)
&&
tidy
<
LOCAL_SIZE/2
+
2
)
{
int
srcx
=
mad24
((
int
)
get_group_id
(
0
)
,
lsizex>>1,
tidx
)
-
1
;
int
srcy
=
mad24
((
int
)
get_group_id
(
1
)
,
lsizey>>1,
tidy
)
-
1
;
srcx
=
abs
(
srcx
)
;
srcx
=
min
(
src_cols
-
1
,
srcx
)
;
srcy
=
abs
(
srcy
)
;
srcy
=
min
(
src_rows
-
1
,
srcy
)
;
int
srcx
=
EXTRAPOLATE
(
mad24
((
int
)
get_group_id
(
0
)
,
LOCAL_SIZE/2,
tidx
)
-
1
,
src_cols
)
;
int
srcy
=
EXTRAPOLATE
(
mad24
((
int
)
get_group_id
(
1
)
,
LOCAL_SIZE/2,
tidy
)
-
1
,
src_rows
)
;
s_srcPatch[tidy][tidx]
=
convertToFT
(
loadpix
(
srcData
+
srcy
*
src_step
+
srcx
*
PIXSIZE
))
;
}
...
...
@@ -106,64 +98,137 @@ __kernel void pyrUp(__global const uchar * src, int src_step, int src_offset, in
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
FT
sum
=
0.f
;
const
FT
evenFlag
=
(
FT
)((
tidx
&
1
)
==
0
)
;
const
FT
oddFlag
=
(
FT
)((
tidx
&
1
)
!=
0
)
;
const
bool
eveny
=
((
tidy
&
1
)
==
0
)
;
const
FT
co1
=
0.
3
75f
;
const
FT
co2
=
0.
2
5f
;
const
FT
co3
=
0.
06
25f
;
const
FT
co1
=
0.75f
;
const
FT
co2
=
0.5f
;
const
FT
co3
=
0.
1
25f
;
if
(
eveny
)
const
FT
coef1
=
(
tidx
&
1
)
==
0
?
co1
:
(
FT
)
0
;
const
FT
coef2
=
(
tidx
&
1
)
==
0
?
co3
:
co2
;
const
FT
coefy1
=
(
tidy
&
1
)
==
0
?
co1
:
(
FT
)
0
;
const
FT
coefy2
=
(
tidy
&
1
)
==
0
?
co3
:
co2
;
if
(
tidy
<
LOCAL_SIZE/2
+
2
)
{
sum
=
(
evenFlag*
co3
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag*
co1
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
+
1
)
>>
1
)
]
;
s
um
=
sum
+
(
evenFlag*
co3
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
+
2
)
>>
1
)
]
;
sum
=
coef2*
s_srcPatch[tidy][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
mad
(
coef1,
s_srcPatch[tidy][1
+
((
tidx
)
>>
1
)
],
sum
)
;
sum
=
mad
(
coef2,
s_srcPatch[tidy][1
+
((
tidx
+
2
)
>>
1
)
],
sum
)
;
s
_dstPatch[tidy][tidx]
=
sum
;
}
s_dstPatch[2
+
tidy][tidx]
=
sum
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
sum
=
coefy2*
s_dstPatch[1
+
((
tidy
-
1
)
>>
1
)
][tidx]
;
sum
=
mad
(
coefy1,
s_dstPatch[1
+
((
tidy
)
>>
1
)
][tidx],
sum
)
;
sum
=
mad
(
coefy2,
s_dstPatch[1
+
((
tidy
+
2
)
>>
1
)
][tidx],
sum
)
;
if
(
tidy
<
2
)
{
sum
=
0
;
if
((
x
<
dst_cols
)
&&
(
y
<
dst_rows
))
storepix
(
convertToT
(
sum
)
,
dstData
+
y
*
dst_step
+
x
*
PIXSIZE
)
;
}
__kernel
void
pyrUp_unrolled
(
__global
const
uchar
*
src,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols
)
{
const
int
lx
=
2*get_local_id
(
0
)
;
const
int
ly
=
2*get_local_id
(
1
)
;
__local
FT
s_srcPatch[LOCAL_SIZE+2][LOCAL_SIZE+2]
;
__local
FT
s_dstPatch[LOCAL_SIZE+2][2*LOCAL_SIZE]
;
if
(
eveny
)
__global
uchar
*
dstData
=
dst
+
dst_offset
;
__global
const
uchar
*
srcData
=
src
+
src_offset
;
if
(
lx
<
(
LOCAL_SIZE+2
)
&&
ly
<
(
LOCAL_SIZE+2
)
)
{
sum
=
(
evenFlag
*
co3
)
*
s_srcPatch[lsizey-16][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[lsizey-16][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co1
)
*
s_srcPatch[lsizey-16][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[lsizey-16][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[lsizey-16][1
+
((
tidx
+
2
)
>>
1
)
]
;
int
srcx
=
mad24
((
int
)
get_group_id
(
0
)
,
LOCAL_SIZE,
lx
)
-
1
;
int
srcy
=
mad24
((
int
)
get_group_id
(
1
)
,
LOCAL_SIZE,
ly
)
-
1
;
int
srcx1
=
EXTRAPOLATE
(
srcx,
src_cols
)
;
int
srcx2
=
EXTRAPOLATE
(
srcx+1,
src_cols
)
;
int
srcy1
=
EXTRAPOLATE
(
srcy,
src_rows
)
;
int
srcy2
=
EXTRAPOLATE
(
srcy+1,
src_rows
)
;
s_srcPatch[ly][lx]
=
convertToFT
(
loadpix
(
srcData
+
srcy1
*
src_step
+
srcx1
*
PIXSIZE
))
;
s_srcPatch[ly+1][lx]
=
convertToFT
(
loadpix
(
srcData
+
srcy2
*
src_step
+
srcx1
*
PIXSIZE
))
;
s_srcPatch[ly][lx+1]
=
convertToFT
(
loadpix
(
srcData
+
srcy1
*
src_step
+
srcx2
*
PIXSIZE
))
;
s_srcPatch[ly+1][lx+1]
=
convertToFT
(
loadpix
(
srcData
+
srcy2
*
src_step
+
srcx2
*
PIXSIZE
))
;
}
s_dstPatch[tidy][tidx]
=
sum
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tidy
>
13
)
{
sum
=
0
;
FT
sum
;
const
FT
co1
=
0.75f
;
const
FT
co2
=
0.5f
;
const
FT
co3
=
0.125f
;
//
(
x,y
)
sum
=
co3
*
s_srcPatch[1
+
(
ly
>>
1
)
][1
+
((
lx
-
2
)
>>
1
)
]
;
sum
=
sum
+
co1
*
s_srcPatch[1
+
(
ly
>>
1
)
][1
+
((
lx
)
>>
1
)
]
;
sum
=
sum
+
co3
*
s_srcPatch[1
+
(
ly
>>
1
)
][1
+
((
lx
+
2
)
>>
1
)
]
;
if
(
eveny
)
s_dstPatch[1
+
get_local_id
(
1
)
][lx]
=
sum
;
//
(
x+1,y
)
sum
=
co2
*
s_srcPatch[1
+
(
ly
>>
1
)
][1
+
((
lx
+
1
-
1
)
>>
1
)
]
;
sum
=
sum
+
co2
*
s_srcPatch[1
+
(
ly
>>
1
)
][1
+
((
lx
+
1
+
1
)
>>
1
)
]
;
s_dstPatch[1
+
get_local_id
(
1
)
][lx+1]
=
sum
;
if
(
ly
<
1
)
{
sum
=
(
evenFlag
*
co3
)
*
s_srcPatch[lsizey-7][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[lsizey-7][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co1
)
*
s_srcPatch[lsizey-7][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[lsizey-7][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[lsizey-7][1
+
((
tidx
+
2
)
>>
1
)
]
;
//
(
x,y
)
sum
=
co3
*
s_srcPatch[0][1
+
((
lx
-
2
)
>>
1
)
]
;
sum
=
sum
+
co1
*
s_srcPatch[0][1
+
((
lx
)
>>
1
)
]
;
sum
=
sum
+
co3
*
s_srcPatch[0][1
+
((
lx
+
2
)
>>
1
)
]
;
s_dstPatch[0][lx]
=
sum
;
//
(
x+1,y
)
sum
=
co2
*
s_srcPatch[0][1
+
((
lx
+
1
-
1
)
>>
1
)
]
;
sum
=
sum
+
co2
*
s_srcPatch[0][1
+
((
lx
+
1
+
1
)
>>
1
)
]
;
s_dstPatch[0][lx+1]
=
sum
;
}
s_dstPatch[4
+
tidy][tidx]
=
sum
;
if
(
ly
>
2*LOCAL_SIZE-3
)
{
//
(
x,y
)
sum
=
co3
*
s_srcPatch[LOCAL_SIZE+1][1
+
((
lx
-
2
)
>>
1
)
]
;
sum
=
sum
+
co1
*
s_srcPatch[LOCAL_SIZE+1][1
+
((
lx
)
>>
1
)
]
;
sum
=
sum
+
co3
*
s_srcPatch[LOCAL_SIZE+1][1
+
((
lx
+
2
)
>>
1
)
]
;
s_dstPatch[LOCAL_SIZE+1][lx]
=
sum
;
//
(
x+1,y
)
sum
=
co2
*
s_srcPatch[LOCAL_SIZE+1][1
+
((
lx
+
1
-
1
)
>>
1
)
]
;
sum
=
sum
+
co2
*
s_srcPatch[LOCAL_SIZE+1][1
+
((
lx
+
1
+
1
)
>>
1
)
]
;
s_dstPatch[LOCAL_SIZE+1][lx+1]
=
sum
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
dst_x
=
2*get_global_id
(
0
)
;
int
dst_y
=
2*get_global_id
(
1
)
;
sum
=
co3
*
s_dstPatch[2
+
tidy
-
2][tidx]
;
sum
=
sum
+
co2
*
s_dstPatch[2
+
tidy
-
1][tidx]
;
sum
=
sum
+
co1
*
s_dstPatch[2
+
tidy
][tidx]
;
sum
=
sum
+
co2
*
s_dstPatch[2
+
tidy
+
1][tidx]
;
sum
=
sum
+
co3
*
s_dstPatch[2
+
tidy
+
2][tidx]
;
if
((
x
<
dst_cols
)
&&
(
y
<
dst_rows
))
storepix
(
convertToT
(
4.0f
*
sum
)
,
dstData
+
y
*
dst_step
+
x
*
PIXSIZE
)
;
if
((
dst_x
<
dst_cols
)
&&
(
dst_y
<
dst_rows
))
{
//
(
x,y
)
sum
=
co3
*
s_dstPatch[1
+
get_local_id
(
1
)
-
1][lx]
;
sum
=
sum
+
co1
*
s_dstPatch[1
+
get_local_id
(
1
)
][lx]
;
sum
=
sum
+
co3
*
s_dstPatch[1
+
get_local_id
(
1
)
+
1][lx]
;
storepix
(
convertToT
(
sum
)
,
dstData
+
dst_y
*
dst_step
+
dst_x
*
PIXSIZE
)
;
//
(
x+1,y
)
sum
=
co3
*
s_dstPatch[1
+
get_local_id
(
1
)
-
1][lx+1]
;
sum
=
sum
+
co1
*
s_dstPatch[1
+
get_local_id
(
1
)
][lx+1]
;
sum
=
sum
+
co3
*
s_dstPatch[1
+
get_local_id
(
1
)
+
1][lx+1]
;
storepix
(
convertToT
(
sum
)
,
dstData
+
dst_y
*
dst_step
+
(
dst_x+1
)
*
PIXSIZE
)
;
//
(
x,y+1
)
sum
=
co2
*
s_dstPatch[1
+
get_local_id
(
1
)
][lx]
;
sum
=
sum
+
co2
*
s_dstPatch[1
+
get_local_id
(
1
)
+
1][lx]
;
storepix
(
convertToT
(
sum
)
,
dstData
+
(
dst_y+1
)
*
dst_step
+
dst_x
*
PIXSIZE
)
;
//
(
x+1,y+1
)
sum
=
co2
*
s_dstPatch[1
+
get_local_id
(
1
)
][lx+1]
;
sum
=
sum
+
co2
*
s_dstPatch[1
+
get_local_id
(
1
)
+
1][lx+1]
;
storepix
(
convertToT
(
sum
)
,
dstData
+
(
dst_y+1
)
*
dst_step
+
(
dst_x+1
)
*
PIXSIZE
)
;
}
}
modules/imgproc/src/pyramids.cpp
浏览文件 @
6952b90e
...
...
@@ -467,24 +467,32 @@ static bool ocl_pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int
UMat
dst
=
_dst
.
getUMat
();
int
float_depth
=
depth
==
CV_64F
?
CV_64F
:
CV_32F
;
const
int
local_size
=
16
;
char
cvt
[
2
][
50
];
String
buildOptions
=
format
(
"-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s "
"-D T1=%s -D cn=%d"
,
"-D T1=%s -D cn=%d
-D LOCAL_SIZE=%d
"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
CV_MAKETYPE
(
float_depth
,
channels
)),
ocl
::
convertTypeStr
(
float_depth
,
depth
,
channels
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
depth
,
float_depth
,
channels
,
cvt
[
1
]),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
ocl
::
typeToStr
(
depth
),
channels
ocl
::
typeToStr
(
depth
),
channels
,
local_size
);
ocl
::
Kernel
k
(
"pyrUp"
,
ocl
::
imgproc
::
pyr_up_oclsrc
,
buildOptions
);
size_t
globalThreads
[
2
]
=
{
dst
.
cols
,
dst
.
rows
};
size_t
localThreads
[
2
]
=
{
local_size
,
local_size
};
ocl
::
Kernel
k
;
if
(
ocl
::
Device
::
getDefault
().
isIntel
()
&&
channels
==
1
)
{
k
.
create
(
"pyrUp_unrolled"
,
ocl
::
imgproc
::
pyr_up_oclsrc
,
buildOptions
);
globalThreads
[
0
]
=
dst
.
cols
/
2
;
globalThreads
[
1
]
=
dst
.
rows
/
2
;
}
else
k
.
create
(
"pyrUp"
,
ocl
::
imgproc
::
pyr_up_oclsrc
,
buildOptions
);
if
(
k
.
empty
())
return
false
;
k
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
));
size_t
globalThreads
[
2
]
=
{
dst
.
cols
,
dst
.
rows
};
size_t
localThreads
[
2
]
=
{
16
,
16
};
return
k
.
run
(
2
,
globalThreads
,
localThreads
,
false
);
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录