Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
39700c5d
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
39700c5d
编写于
12月 13, 2010
作者:
A
Alexey Spizhevoy
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
added some gpu::matchTemplate kernels (other parts after NPP Staging integration)
上级
a81b41fb
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
64 addition
and
8 deletion
+64
-8
modules/gpu/src/cuda/match_template.cu
modules/gpu/src/cuda/match_template.cu
+64
-8
未找到文件。
modules/gpu/src/cuda/match_template.cu
浏览文件 @
39700c5d
...
...
@@ -175,7 +175,7 @@ void multiplyAndNormalizeSpects(int n, float scale, const cufftComplex* a,
__global__
void
matchTemplatePreparedKernel_8U_SQDIFF
(
int
w
,
int
h
,
const
PtrStep
f
image_sumsq
,
float
templ_sumsq
,
int
w
,
int
h
,
const
PtrStep
_
<
unsigned
long
long
>
image_sqsum
,
float
templ_sqsum
,
DevMem2Df
result
)
{
const
int
x
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
...
...
@@ -183,24 +183,80 @@ __global__ void matchTemplatePreparedKernel_8U_SQDIFF(
if
(
x
<
result
.
cols
&&
y
<
result
.
rows
)
{
float
image_sq
=
image_sumsq
.
ptr
(
y
+
h
)[
x
+
w
]
-
image_sumsq
.
ptr
(
y
)[
x
+
w
]
-
image_sumsq
.
ptr
(
y
+
h
)[
x
]
+
image_sumsq
.
ptr
(
y
)[
x
];
float
image_sq
=
(
float
)(
(
image_sqsum
.
ptr
(
y
+
h
)[
x
+
w
]
-
image_sqsum
.
ptr
(
y
)[
x
+
w
])
-
(
image_sqsum
.
ptr
(
y
+
h
)[
x
]
-
image_sqsum
.
ptr
(
y
)[
x
]));
float
ccorr
=
result
.
ptr
(
y
)[
x
];
result
.
ptr
(
y
)[
x
]
=
image_sq
-
2.
f
*
ccorr
+
templ_s
umsq
;
result
.
ptr
(
y
)[
x
]
=
image_sq
-
2.
f
*
ccorr
+
templ_s
qsum
;
}
}
void
matchTemplatePrepared_8U_SQDIFF
(
int
w
,
int
h
,
const
DevMem2D
f
image_sumsq
,
float
templ_sumsq
,
int
w
,
int
h
,
const
DevMem2D
_
<
unsigned
long
long
>
image_sqsum
,
float
templ_sqsum
,
DevMem2Df
result
)
{
dim3
threads
(
32
,
8
);
dim3
grid
(
divUp
(
result
.
cols
,
threads
.
x
),
divUp
(
result
.
rows
,
threads
.
y
));
matchTemplatePreparedKernel_8U_SQDIFF
<<<
grid
,
threads
>>>
(
w
,
h
,
image_sumsq
,
templ_sumsq
,
result
);
w
,
h
,
image_sqsum
,
templ_sqsum
,
result
);
cudaSafeCall
(
cudaThreadSynchronize
());
}
__global__
void
matchTemplatePreparedKernel_8U_SQDIFF_NORMED
(
int
w
,
int
h
,
const
PtrStep_
<
unsigned
long
long
>
image_sqsum
,
float
templ_sqsum
,
DevMem2Df
result
)
{
const
int
x
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
const
int
y
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
if
(
x
<
result
.
cols
&&
y
<
result
.
rows
)
{
float
image_sq
=
(
float
)(
(
image_sqsum
.
ptr
(
y
+
h
)[
x
+
w
]
-
image_sqsum
.
ptr
(
y
)[
x
+
w
])
-
(
image_sqsum
.
ptr
(
y
+
h
)[
x
]
-
image_sqsum
.
ptr
(
y
)[
x
]));
float
ccorr
=
result
.
ptr
(
y
)[
x
];
result
.
ptr
(
y
)[
x
]
=
(
image_sq
-
2.
f
*
ccorr
+
templ_sqsum
)
*
rsqrtf
(
image_sq
*
templ_sqsum
);
}
}
void
matchTemplatePrepared_8U_SQDIFF_NORMED
(
int
w
,
int
h
,
const
DevMem2D_
<
unsigned
long
long
>
image_sqsum
,
float
templ_sqsum
,
DevMem2Df
result
)
{
dim3
threads
(
32
,
8
);
dim3
grid
(
divUp
(
result
.
cols
,
threads
.
x
),
divUp
(
result
.
rows
,
threads
.
y
));
matchTemplatePreparedKernel_8U_SQDIFF_NORMED
<<<
grid
,
threads
>>>
(
w
,
h
,
image_sqsum
,
templ_sqsum
,
result
);
cudaSafeCall
(
cudaThreadSynchronize
());
}
__global__
void
normalizeKernel_8U
(
int
w
,
int
h
,
const
PtrStep_
<
unsigned
long
long
>
image_sqsum
,
float
templ_sqsum
,
DevMem2Df
result
)
{
const
int
x
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
const
int
y
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
if
(
x
<
result
.
cols
&&
y
<
result
.
rows
)
{
float
image_sq
=
(
float
)(
(
image_sqsum
.
ptr
(
y
+
h
)[
x
+
w
]
-
image_sqsum
.
ptr
(
y
)[
x
+
w
])
-
(
image_sqsum
.
ptr
(
y
+
h
)[
x
]
-
image_sqsum
.
ptr
(
y
)[
x
]));
result
.
ptr
(
y
)[
x
]
*=
rsqrtf
(
image_sq
*
templ_sqsum
);
}
}
void
normalize_8U
(
int
w
,
int
h
,
const
DevMem2D_
<
unsigned
long
long
>
image_sqsum
,
float
templ_sqsum
,
DevMem2Df
result
)
{
dim3
threads
(
32
,
8
);
dim3
grid
(
divUp
(
result
.
cols
,
threads
.
x
),
divUp
(
result
.
rows
,
threads
.
y
));
normalizeKernel_8U
<<<
grid
,
threads
>>>
(
w
,
h
,
image_sqsum
,
templ_sqsum
,
result
);
cudaSafeCall
(
cudaThreadSynchronize
());
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录