Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
a16a11f3
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
a16a11f3
编写于
11月 13, 2014
作者:
A
Alexander Alekhin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #3413 from wangyan42164:denoising_opencl_improvement
上级
4763d803
a8b4c99e
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
12 addition
and
22 deletion
+12
-22
modules/photo/src/fast_nlmeans_denoising_opencl.hpp
modules/photo/src/fast_nlmeans_denoising_opencl.hpp
+7
-5
modules/photo/src/opencl/nlmeans.cl
modules/photo/src/opencl/nlmeans.cl
+5
-17
未找到文件。
modules/photo/src/fast_nlmeans_denoising_opencl.hpp
浏览文件 @
a16a11f3
...
...
@@ -19,7 +19,8 @@ enum
{
BLOCK_ROWS
=
32
,
BLOCK_COLS
=
32
,
CTA_SIZE
=
256
CTA_SIZE_INTEL
=
64
,
CTA_SIZE_DEFAULT
=
256
};
static
int
divUp
(
int
a
,
int
b
)
...
...
@@ -70,6 +71,7 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
int
templateWindowSize
,
int
searchWindowSize
)
{
int
type
=
_src
.
type
(),
cn
=
CV_MAT_CN
(
type
);
int
ctaSize
=
ocl
::
Device
::
getDefault
().
isIntel
()
?
CTA_SIZE_INTEL
:
CTA_SIZE_DEFAULT
;
Size
size
=
_src
.
size
();
if
(
type
!=
CV_8UC1
&&
type
!=
CV_8UC2
&&
type
!=
CV_8UC4
)
...
...
@@ -86,12 +88,12 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
String
opts
=
format
(
"-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d"
" -D uchar_t=%s -D int_t=%s -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"
" -D convert_int_t=%s -D cn=%d -D
CTA_SIZE2=%d -D
convert_uchar_t=%s"
,
" -D convert_int_t=%s -D cn=%d -D convert_uchar_t=%s"
,
templateWindowSize
,
searchWindowSize
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
CV_32SC
(
cn
)),
BLOCK_COLS
,
BLOCK_ROWS
,
CTA_SIZE
,
ocl
::
typeToStr
(
CV_32SC
(
cn
)),
BLOCK_COLS
,
BLOCK_ROWS
,
ctaSize
,
templateWindowHalfWize
,
searchWindowHalfSize
,
ocl
::
convertTypeStr
(
CV_8U
,
CV_32S
,
cn
,
cvt
[
0
]),
cn
,
CTA_SIZE
>>
1
,
ocl
::
convertTypeStr
(
CV_32S
,
CV_8U
,
cn
,
cvt
[
1
]));
ocl
::
convertTypeStr
(
CV_32S
,
CV_8U
,
cn
,
cvt
[
1
]));
ocl
::
Kernel
k
(
"fastNlMeansDenoising"
,
ocl
::
photo
::
nlmeans_oclsrc
,
opts
);
if
(
k
.
empty
())
...
...
@@ -120,7 +122,7 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
ocl
::
KernelArg
::
PtrReadOnly
(
almostDist2Weight
),
ocl
::
KernelArg
::
PtrReadOnly
(
buffer
),
almostTemplateWindowSizeSqBinShift
);
size_t
globalsize
[
2
]
=
{
nblocksx
*
CTA_SIZE
,
nblocksy
},
localsize
[
2
]
=
{
CTA_SIZE
,
1
};
size_t
globalsize
[
2
]
=
{
nblocksx
*
ctaSize
,
nblocksy
},
localsize
[
2
]
=
{
ctaSize
,
1
};
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
}
...
...
modules/photo/src/opencl/nlmeans.cl
浏览文件 @
a16a11f3
...
...
@@ -206,22 +206,11 @@ inline void convolveWindow(__global const uchar * src, int src_step, int src_off
weighted_sum
+=
(
int_t
)(
weight
)
*
src_value
;
}
if
(
id
>=
CTA_SIZE2
)
{
int
id2
=
id
-
CTA_SIZE2
;
weights_local[id2]
=
weights
;
weighted_sum_local[id2]
=
weighted_sum
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
id
<
CTA_SIZE2
)
{
weights_local[id]
+=
weights
;
weighted_sum_local[id]
+=
weighted_sum
;
}
weights_local[id]
=
weights
;
weighted_sum_local[id]
=
weighted_sum
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
lsize
=
CTA_SIZE
2
>>
1
; lsize > 2; lsize >>= 1)
for
(
int
lsize
=
CTA_SIZE
>>
1
; lsize > 2; lsize >>= 1)
{
if
(
id
<
lsize
)
{
...
...
@@ -252,8 +241,8 @@ __kernel void fastNlMeansDenoising(__global const uchar * src, int src_step, int
int
block_y
=
get_group_id
(
1
)
;
int
id
=
get_local_id
(
0
)
,
first
;
__local
int
dists[SEARCH_SIZE_SQ],
weights[CTA_SIZE
2
]
;
__local
int_t
weighted_sum[CTA_SIZE
2
]
;
__local
int
dists[SEARCH_SIZE_SQ],
weights[CTA_SIZE]
;
__local
int_t
weighted_sum[CTA_SIZE]
;
int
x0
=
block_x
*
BLOCK_COLS,
x1
=
min
(
x0
+
BLOCK_COLS,
dst_cols
)
;
int
y0
=
block_y
*
BLOCK_ROWS,
y1
=
min
(
y0
+
BLOCK_ROWS,
dst_rows
)
;
...
...
@@ -281,7 +270,6 @@ __kernel void fastNlMeansDenoising(__global const uchar * src, int src_step, int
first
=
(
first
+
1
)
%
TEMPLATE_SIZE
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
convolveWindow
(
src,
src_step,
src_offset,
dists,
almostDist2Weight,
dst,
dst_step,
dst_offset,
y,
x,
id,
weights,
weighted_sum,
almostTemplateWindowSizeSqBinShift
)
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录