Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
7d0feef7
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
7d0feef7
编写于
10月 20, 2010
作者:
V
Vladislav Vinogradov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
added vecmath to gpu module.
上级
0c3803a6
变更
7
展开全部
隐藏空白更改
内联
并排
Showing
7 changed file
with
1015 addition
and
207 deletion
+1015
-207
modules/gpu/src/cuda/filters.cu
modules/gpu/src/cuda/filters.cu
+117
-50
modules/gpu/src/cuda/saturate_cast.hpp
modules/gpu/src/cuda/saturate_cast.hpp
+0
-86
modules/gpu/src/cuda/vecmath.hpp
modules/gpu/src/cuda/vecmath.hpp
+824
-21
modules/gpu/src/cudastream.cpp
modules/gpu/src/cudastream.cpp
+1
-1
modules/gpu/src/filtering.cpp
modules/gpu/src/filtering.cpp
+62
-47
modules/gpu/src/imgproc_gpu.cpp
modules/gpu/src/imgproc_gpu.cpp
+1
-1
tests/gpu/src/filters.cpp
tests/gpu/src/filters.cpp
+10
-1
未找到文件。
modules/gpu/src/cuda/filters.cu
浏览文件 @
7d0feef7
...
...
@@ -44,6 +44,7 @@
#include "saturate_cast.hpp"
#include "safe_call.hpp"
#include "cuda_shared.hpp"
#include "vecmath.hpp"
using
namespace
cv
::
gpu
;
...
...
@@ -71,7 +72,7 @@ namespace cv { namespace gpu { namespace filters
namespace
filter_krnls
{
template
<
int
BLOCK_DIM_X
,
int
BLOCK_DIM_Y
,
int
KERNEL_SIZE
,
typename
T
,
typename
D
>
template
<
int
BLOCK_DIM_X
,
int
BLOCK_DIM_Y
,
int
KERNEL_SIZE
,
int
CN
,
typename
T
,
typename
D
>
__global__
void
linearRowFilter
(
const
T
*
src
,
size_t
src_step
,
D
*
dst
,
size_t
dst_step
,
int
anchor
,
int
width
,
int
height
)
{
__shared__
T
smem
[
BLOCK_DIM_Y
*
BLOCK_DIM_X
*
3
];
...
...
@@ -91,23 +92,24 @@ namespace filter_krnls
{
const
T
*
rowSrc
=
src
+
threadY
*
src_step
;
sDataRow
[
threadIdx
.
x
+
blockDim
.
x
]
=
threadX
<
width
?
rowSrc
[
threadX
]
:
0
;
sDataRow
[
threadIdx
.
x
+
blockDim
.
x
]
=
threadX
<
width
?
rowSrc
[
threadX
]
:
VecTraits
<
T
>::
all
(
0
)
;
sDataRow
[
threadIdx
.
x
]
=
prevThreadX
>=
0
?
rowSrc
[
prevThreadX
]
:
0
;
sDataRow
[
threadIdx
.
x
]
=
prevThreadX
>=
0
?
rowSrc
[
prevThreadX
]
:
VecTraits
<
T
>::
all
(
0
)
;
sDataRow
[(
blockDim
.
x
<<
1
)
+
threadIdx
.
x
]
=
nextThreadX
<
width
?
rowSrc
[
nextThreadX
]
:
0
;
sDataRow
[(
blockDim
.
x
<<
1
)
+
threadIdx
.
x
]
=
nextThreadX
<
width
?
rowSrc
[
nextThreadX
]
:
VecTraits
<
T
>::
all
(
0
)
;
__syncthreads
();
if
(
threadX
<
width
)
{
float
sum
=
0
;
typedef
typename
TypeVec
<
float
,
CN
>::
vec_t
sum_t
;
sum_t
sum
=
VecTraits
<
sum_t
>::
all
(
0
);
sDataRow
+=
threadIdx
.
x
+
blockDim
.
x
-
anchor
;
#pragma unroll
for
(
int
i
=
0
;
i
<
KERNEL_SIZE
;
++
i
)
sum
+=
cLinearKernel
[
i
]
*
sDataRow
[
i
];
sum
=
sum
+
sDataRow
[
i
]
*
cLinearKernel
[
i
];
dst
[
threadY
*
dst_step
+
threadX
]
=
saturate_cast
<
D
>
(
sum
);
}
...
...
@@ -117,7 +119,7 @@ namespace filter_krnls
namespace
cv
{
namespace
gpu
{
namespace
filters
{
template
<
int
KERNEL_SIZE
,
typename
T
,
typename
D
>
template
<
int
KERNEL_SIZE
,
int
CN
,
typename
T
,
typename
D
>
void
linearRowFilter_caller
(
const
DevMem2D_
<
T
>&
src
,
const
DevMem2D_
<
D
>&
dst
,
int
anchor
)
{
const
int
BLOCK_DIM_X
=
16
;
...
...
@@ -126,51 +128,83 @@ namespace cv { namespace gpu { namespace filters
dim3
threads
(
BLOCK_DIM_X
,
BLOCK_DIM_Y
);
dim3
blocks
(
divUp
(
src
.
cols
,
BLOCK_DIM_X
),
divUp
(
src
.
rows
,
BLOCK_DIM_Y
));
filter_krnls
::
linearRowFilter
<
BLOCK_DIM_X
,
BLOCK_DIM_Y
,
KERNEL_SIZE
><<<
blocks
,
threads
>>>
(
src
.
ptr
,
src
.
elem_step
,
filter_krnls
::
linearRowFilter
<
BLOCK_DIM_X
,
BLOCK_DIM_Y
,
KERNEL_SIZE
,
CN
><<<
blocks
,
threads
>>>
(
src
.
ptr
,
src
.
elem_step
,
dst
.
ptr
,
dst
.
elem_step
,
anchor
,
src
.
cols
,
src
.
rows
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
template
<
typename
T
,
typename
D
>
template
<
int
CN
,
typename
T
,
typename
D
>
inline
void
linearRowFilter_gpu
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
typedef
void
(
*
caller_t
)(
const
DevMem2D_
<
T
>&
src
,
const
DevMem2D_
<
D
>&
dst
,
int
anchor
);
static
const
caller_t
callers
[]
=
{
linearRowFilter_caller
<
0
,
T
,
D
>
,
linearRowFilter_caller
<
1
,
T
,
D
>
,
linearRowFilter_caller
<
2
,
T
,
D
>
,
linearRowFilter_caller
<
3
,
T
,
D
>
,
linearRowFilter_caller
<
4
,
T
,
D
>
,
linearRowFilter_caller
<
5
,
T
,
D
>
,
linearRowFilter_caller
<
6
,
T
,
D
>
,
linearRowFilter_caller
<
7
,
T
,
D
>
,
linearRowFilter_caller
<
8
,
T
,
D
>
,
linearRowFilter_caller
<
9
,
T
,
D
>
,
linearRowFilter_caller
<
10
,
T
,
D
>
,
linearRowFilter_caller
<
11
,
T
,
D
>
,
linearRowFilter_caller
<
12
,
T
,
D
>
,
linearRowFilter_caller
<
13
,
T
,
D
>
,
linearRowFilter_caller
<
14
,
T
,
D
>
,
linearRowFilter_caller
<
15
,
T
,
D
>
};
{
linearRowFilter_caller
<
0
,
CN
,
T
,
D
>
,
linearRowFilter_caller
<
1
,
CN
,
T
,
D
>
,
linearRowFilter_caller
<
2
,
CN
,
T
,
D
>
,
linearRowFilter_caller
<
3
,
CN
,
T
,
D
>
,
linearRowFilter_caller
<
4
,
CN
,
T
,
D
>
,
linearRowFilter_caller
<
5
,
CN
,
T
,
D
>
,
linearRowFilter_caller
<
6
,
CN
,
T
,
D
>
,
linearRowFilter_caller
<
7
,
CN
,
T
,
D
>
,
linearRowFilter_caller
<
8
,
CN
,
T
,
D
>
,
linearRowFilter_caller
<
9
,
CN
,
T
,
D
>
,
linearRowFilter_caller
<
10
,
CN
,
T
,
D
>
,
linearRowFilter_caller
<
11
,
CN
,
T
,
D
>
,
linearRowFilter_caller
<
12
,
CN
,
T
,
D
>
,
linearRowFilter_caller
<
13
,
CN
,
T
,
D
>
,
linearRowFilter_caller
<
14
,
CN
,
T
,
D
>
,
linearRowFilter_caller
<
15
,
CN
,
T
,
D
>
};
loadLinearKernel
(
kernel
,
ksize
);
callers
[
ksize
]((
DevMem2D_
<
T
>
)
src
,
(
DevMem2D_
<
D
>
)
dst
,
anchor
);
}
void
linearRowFilter_gpu_
32s32s
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
void
linearRowFilter_gpu_
8u_8u_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
linearRowFilter_gpu
<
int
,
int
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
linearRowFilter_gpu
<
4
,
uchar4
,
uchar4
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
}
void
linearRowFilter_gpu_
32s32f
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
void
linearRowFilter_gpu_
8u_8s_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
linearRowFilter_gpu
<
int
,
float
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
linearRowFilter_gpu
<
4
,
uchar4
,
char4
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
}
void
linearRowFilter_gpu_
32f32s
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
void
linearRowFilter_gpu_
8s_8u_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
linearRowFilter_gpu
<
float
,
int
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
linearRowFilter_gpu
<
4
,
char4
,
uchar4
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
}
void
linearRowFilter_gpu_
32f32f
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
void
linearRowFilter_gpu_
8s_8s_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
linearRowFilter_gpu
<
float
,
float
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
linearRowFilter_gpu
<
4
,
char4
,
char4
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
}
void
linearRowFilter_gpu_16u_16u_c2
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
linearRowFilter_gpu
<
2
,
ushort2
,
ushort2
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
}
void
linearRowFilter_gpu_16u_16s_c2
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
linearRowFilter_gpu
<
2
,
ushort2
,
short2
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
}
void
linearRowFilter_gpu_16s_16u_c2
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
linearRowFilter_gpu
<
2
,
short2
,
ushort2
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
}
void
linearRowFilter_gpu_16s_16s_c2
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
linearRowFilter_gpu
<
2
,
short2
,
short2
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
}
void
linearRowFilter_gpu_32s_32s_c1
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
linearRowFilter_gpu
<
1
,
int
,
int
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
}
void
linearRowFilter_gpu_32s_32f_c1
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
linearRowFilter_gpu
<
1
,
int
,
float
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
}
void
linearRowFilter_gpu_32f_32s_c1
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
linearRowFilter_gpu
<
1
,
float
,
int
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
}
void
linearRowFilter_gpu_32f_32f_c1
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
linearRowFilter_gpu
<
1
,
float
,
float
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
}
}}}
namespace
filter_krnls
{
template
<
int
BLOCK_DIM_X
,
int
BLOCK_DIM_Y
,
int
KERNEL_SIZE
,
typename
T
,
typename
D
>
template
<
int
BLOCK_DIM_X
,
int
BLOCK_DIM_Y
,
int
KERNEL_SIZE
,
int
CN
,
typename
T
,
typename
D
>
__global__
void
linearColumnFilter
(
const
T
*
src
,
size_t
src_step
,
D
*
dst
,
size_t
dst_step
,
int
anchor
,
int
width
,
int
height
)
{
__shared__
T
smem
[
BLOCK_DIM_Y
*
BLOCK_DIM_X
*
3
];
...
...
@@ -192,23 +226,24 @@ namespace filter_krnls
{
const
T
*
colSrc
=
src
+
threadX
;
sDataColumn
[(
threadIdx
.
y
+
blockDim
.
y
)
*
smem_step
]
=
threadY
<
height
?
colSrc
[
threadY
*
src_step
]
:
0
;
sDataColumn
[(
threadIdx
.
y
+
blockDim
.
y
)
*
smem_step
]
=
threadY
<
height
?
colSrc
[
threadY
*
src_step
]
:
VecTraits
<
T
>::
all
(
0
)
;
sDataColumn
[
threadIdx
.
y
*
smem_step
]
=
prevThreadY
>=
0
?
colSrc
[
prevThreadY
*
src_step
]
:
0
;
sDataColumn
[
threadIdx
.
y
*
smem_step
]
=
prevThreadY
>=
0
?
colSrc
[
prevThreadY
*
src_step
]
:
VecTraits
<
T
>::
all
(
0
)
;
sDataColumn
[(
threadIdx
.
y
+
(
blockDim
.
y
<<
1
))
*
smem_step
]
=
nextThreadY
<
height
?
colSrc
[
nextThreadY
*
src_step
]
:
0
;
sDataColumn
[(
threadIdx
.
y
+
(
blockDim
.
y
<<
1
))
*
smem_step
]
=
nextThreadY
<
height
?
colSrc
[
nextThreadY
*
src_step
]
:
VecTraits
<
T
>::
all
(
0
)
;
__syncthreads
();
if
(
threadY
<
height
)
{
float
sum
=
0
;
typedef
typename
TypeVec
<
float
,
CN
>::
vec_t
sum_t
;
sum_t
sum
=
VecTraits
<
sum_t
>::
all
(
0
);
sDataColumn
+=
(
threadIdx
.
y
+
blockDim
.
y
-
anchor
)
*
smem_step
;
#pragma unroll
for
(
int
i
=
0
;
i
<
KERNEL_SIZE
;
++
i
)
sum
+=
cLinearKernel
[
i
]
*
sDataColumn
[
i
*
smem_step
];
sum
=
sum
+
sDataColumn
[
i
*
smem_step
]
*
cLinearKernel
[
i
];
dst
[
threadY
*
dst_step
+
threadX
]
=
saturate_cast
<
D
>
(
sum
);
}
...
...
@@ -218,7 +253,7 @@ namespace filter_krnls
namespace
cv
{
namespace
gpu
{
namespace
filters
{
template
<
int
KERNEL_SIZE
,
typename
T
,
typename
D
>
template
<
int
KERNEL_SIZE
,
int
CN
,
typename
T
,
typename
D
>
void
linearColumnFilter_caller
(
const
DevMem2D_
<
T
>&
src
,
const
DevMem2D_
<
D
>&
dst
,
int
anchor
)
{
const
int
BLOCK_DIM_X
=
16
;
...
...
@@ -227,45 +262,77 @@ namespace cv { namespace gpu { namespace filters
dim3
threads
(
BLOCK_DIM_X
,
BLOCK_DIM_Y
);
dim3
blocks
(
divUp
(
src
.
cols
,
BLOCK_DIM_X
),
divUp
(
src
.
rows
,
BLOCK_DIM_Y
));
filter_krnls
::
linearColumnFilter
<
BLOCK_DIM_X
,
BLOCK_DIM_Y
,
KERNEL_SIZE
><<<
blocks
,
threads
>>>
(
src
.
ptr
,
src
.
elem_step
,
filter_krnls
::
linearColumnFilter
<
BLOCK_DIM_X
,
BLOCK_DIM_Y
,
KERNEL_SIZE
,
CN
><<<
blocks
,
threads
>>>
(
src
.
ptr
,
src
.
elem_step
,
dst
.
ptr
,
dst
.
elem_step
,
anchor
,
src
.
cols
,
src
.
rows
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
template
<
typename
T
,
typename
D
>
template
<
int
CN
,
typename
T
,
typename
D
>
inline
void
linearColumnFilter_gpu
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
typedef
void
(
*
caller_t
)(
const
DevMem2D_
<
T
>&
src
,
const
DevMem2D_
<
D
>&
dst
,
int
anchor
);
static
const
caller_t
callers
[]
=
{
linearColumnFilter_caller
<
0
,
T
,
D
>
,
linearColumnFilter_caller
<
1
,
T
,
D
>
,
linearColumnFilter_caller
<
2
,
T
,
D
>
,
linearColumnFilter_caller
<
3
,
T
,
D
>
,
linearColumnFilter_caller
<
4
,
T
,
D
>
,
linearColumnFilter_caller
<
5
,
T
,
D
>
,
linearColumnFilter_caller
<
6
,
T
,
D
>
,
linearColumnFilter_caller
<
7
,
T
,
D
>
,
linearColumnFilter_caller
<
8
,
T
,
D
>
,
linearColumnFilter_caller
<
9
,
T
,
D
>
,
linearColumnFilter_caller
<
10
,
T
,
D
>
,
linearColumnFilter_caller
<
11
,
T
,
D
>
,
linearColumnFilter_caller
<
12
,
T
,
D
>
,
linearColumnFilter_caller
<
13
,
T
,
D
>
,
linearColumnFilter_caller
<
14
,
T
,
D
>
,
linearColumnFilter_caller
<
15
,
T
,
D
>
};
{
linearColumnFilter_caller
<
0
,
CN
,
T
,
D
>
,
linearColumnFilter_caller
<
1
,
CN
,
T
,
D
>
,
linearColumnFilter_caller
<
2
,
CN
,
T
,
D
>
,
linearColumnFilter_caller
<
3
,
CN
,
T
,
D
>
,
linearColumnFilter_caller
<
4
,
CN
,
T
,
D
>
,
linearColumnFilter_caller
<
5
,
CN
,
T
,
D
>
,
linearColumnFilter_caller
<
6
,
CN
,
T
,
D
>
,
linearColumnFilter_caller
<
7
,
CN
,
T
,
D
>
,
linearColumnFilter_caller
<
8
,
CN
,
T
,
D
>
,
linearColumnFilter_caller
<
9
,
CN
,
T
,
D
>
,
linearColumnFilter_caller
<
10
,
CN
,
T
,
D
>
,
linearColumnFilter_caller
<
11
,
CN
,
T
,
D
>
,
linearColumnFilter_caller
<
12
,
CN
,
T
,
D
>
,
linearColumnFilter_caller
<
13
,
CN
,
T
,
D
>
,
linearColumnFilter_caller
<
14
,
CN
,
T
,
D
>
,
linearColumnFilter_caller
<
15
,
CN
,
T
,
D
>
};
loadLinearKernel
(
kernel
,
ksize
);
callers
[
ksize
]((
DevMem2D_
<
T
>
)
src
,
(
DevMem2D_
<
D
>
)
dst
,
anchor
);
}
void
linearColumnFilter_gpu_32s32s
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
void
linearColumnFilter_gpu_8u_8u_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
linearColumnFilter_gpu
<
4
,
uchar4
,
uchar4
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
}
void
linearColumnFilter_gpu_8u_8s_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
linearColumnFilter_gpu
<
4
,
uchar4
,
char4
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
}
void
linearColumnFilter_gpu_8s_8u_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
linearColumnFilter_gpu
<
4
,
char4
,
uchar4
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
}
void
linearColumnFilter_gpu_8s_8s_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
linearColumnFilter_gpu
<
4
,
char4
,
char4
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
}
void
linearColumnFilter_gpu_16u_16u_c2
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
linearColumnFilter_gpu
<
2
,
ushort2
,
ushort2
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
}
void
linearColumnFilter_gpu_16u_16s_c2
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
linearColumnFilter_gpu
<
2
,
ushort2
,
short2
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
}
void
linearColumnFilter_gpu_16s_16u_c2
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
linearColumnFilter_gpu
<
2
,
short2
,
ushort2
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
}
void
linearColumnFilter_gpu_16s_16s_c2
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
linearColumnFilter_gpu
<
2
,
short2
,
short2
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
}
void
linearColumnFilter_gpu_32s_32s_c1
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
linearColumnFilter_gpu
<
int
,
int
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
linearColumnFilter_gpu
<
1
,
int
,
int
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
}
void
linearColumnFilter_gpu_32s
32f
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
void
linearColumnFilter_gpu_32s
_32f_c1
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
linearColumnFilter_gpu
<
int
,
float
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
linearColumnFilter_gpu
<
1
,
int
,
float
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
}
void
linearColumnFilter_gpu_32f
32s
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
void
linearColumnFilter_gpu_32f
_32s_c1
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
linearColumnFilter_gpu
<
float
,
int
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
linearColumnFilter_gpu
<
1
,
float
,
int
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
}
void
linearColumnFilter_gpu_32f
32f
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
void
linearColumnFilter_gpu_32f
_32f_c1
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
)
{
linearColumnFilter_gpu
<
float
,
float
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
linearColumnFilter_gpu
<
1
,
float
,
float
>
(
src
,
dst
,
kernel
,
ksize
,
anchor
);
}
}}}
...
...
modules/gpu/src/cuda/saturate_cast.hpp
浏览文件 @
7d0feef7
...
...
@@ -163,92 +163,6 @@ namespace cv
return
saturate_cast
<
uint
>
((
float
)
v
);
#endif
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
uchar4
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
char4
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
ushort4
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
short4
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
uint4
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
int4
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
float4
v
)
{
return
_Tp
(
v
);
}
template
<
>
static
__device__
uchar4
saturate_cast
<
uchar4
>
(
char4
v
)
{
return
make_uchar4
(
saturate_cast
<
uchar
>
(
v
.
x
),
saturate_cast
<
uchar
>
(
v
.
y
),
saturate_cast
<
uchar
>
(
v
.
z
),
saturate_cast
<
uchar
>
(
v
.
w
));
}
template
<
>
static
__device__
uchar4
saturate_cast
<
uchar4
>
(
ushort4
v
)
{
return
make_uchar4
(
saturate_cast
<
uchar
>
(
v
.
x
),
saturate_cast
<
uchar
>
(
v
.
y
),
saturate_cast
<
uchar
>
(
v
.
z
),
saturate_cast
<
uchar
>
(
v
.
w
));
}
template
<
>
static
__device__
uchar4
saturate_cast
<
uchar4
>
(
short4
v
)
{
return
make_uchar4
(
saturate_cast
<
uchar
>
(
v
.
x
),
saturate_cast
<
uchar
>
(
v
.
y
),
saturate_cast
<
uchar
>
(
v
.
z
),
saturate_cast
<
uchar
>
(
v
.
w
));
}
template
<
>
static
__device__
uchar4
saturate_cast
<
uchar4
>
(
uint4
v
)
{
return
make_uchar4
(
saturate_cast
<
uchar
>
(
v
.
x
),
saturate_cast
<
uchar
>
(
v
.
y
),
saturate_cast
<
uchar
>
(
v
.
z
),
saturate_cast
<
uchar
>
(
v
.
w
));
}
template
<
>
static
__device__
uchar4
saturate_cast
<
uchar4
>
(
int4
v
)
{
return
make_uchar4
(
saturate_cast
<
uchar
>
(
v
.
x
),
saturate_cast
<
uchar
>
(
v
.
y
),
saturate_cast
<
uchar
>
(
v
.
z
),
saturate_cast
<
uchar
>
(
v
.
w
));
}
template
<
>
static
__device__
uchar4
saturate_cast
<
uchar4
>
(
float4
v
)
{
return
make_uchar4
(
saturate_cast
<
uchar
>
(
v
.
x
),
saturate_cast
<
uchar
>
(
v
.
y
),
saturate_cast
<
uchar
>
(
v
.
z
),
saturate_cast
<
uchar
>
(
v
.
w
));
}
template
<
>
static
__device__
char4
saturate_cast
<
char4
>
(
uchar4
v
)
{
return
make_char4
(
saturate_cast
<
char
>
(
v
.
x
),
saturate_cast
<
char
>
(
v
.
y
),
saturate_cast
<
char
>
(
v
.
z
),
saturate_cast
<
char
>
(
v
.
w
));
}
template
<
>
static
__device__
char4
saturate_cast
<
char4
>
(
ushort4
v
)
{
return
make_char4
(
saturate_cast
<
char
>
(
v
.
x
),
saturate_cast
<
char
>
(
v
.
y
),
saturate_cast
<
char
>
(
v
.
z
),
saturate_cast
<
char
>
(
v
.
w
));
}
template
<
>
static
__device__
char4
saturate_cast
<
char4
>
(
short4
v
)
{
return
make_char4
(
saturate_cast
<
char
>
(
v
.
x
),
saturate_cast
<
char
>
(
v
.
y
),
saturate_cast
<
char
>
(
v
.
z
),
saturate_cast
<
char
>
(
v
.
w
));
}
template
<
>
static
__device__
char4
saturate_cast
<
char4
>
(
uint4
v
)
{
return
make_char4
(
saturate_cast
<
char
>
(
v
.
x
),
saturate_cast
<
char
>
(
v
.
y
),
saturate_cast
<
char
>
(
v
.
z
),
saturate_cast
<
char
>
(
v
.
w
));
}
template
<
>
static
__device__
char4
saturate_cast
<
char4
>
(
int4
v
)
{
return
make_char4
(
saturate_cast
<
char
>
(
v
.
x
),
saturate_cast
<
char
>
(
v
.
y
),
saturate_cast
<
char
>
(
v
.
z
),
saturate_cast
<
char
>
(
v
.
w
));
}
template
<
>
static
__device__
char4
saturate_cast
<
char4
>
(
float4
v
)
{
return
make_char4
(
saturate_cast
<
char
>
(
v
.
x
),
saturate_cast
<
char
>
(
v
.
y
),
saturate_cast
<
char
>
(
v
.
z
),
saturate_cast
<
char
>
(
v
.
w
));
}
template
<
>
static
__device__
ushort4
saturate_cast
<
ushort4
>
(
uchar4
v
)
{
return
make_ushort4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<
>
static
__device__
ushort4
saturate_cast
<
ushort4
>
(
char4
v
)
{
return
make_ushort4
(
saturate_cast
<
ushort
>
(
v
.
x
),
saturate_cast
<
ushort
>
(
v
.
y
),
saturate_cast
<
ushort
>
(
v
.
z
),
saturate_cast
<
ushort
>
(
v
.
w
));
}
template
<
>
static
__device__
ushort4
saturate_cast
<
ushort4
>
(
short4
v
)
{
return
make_ushort4
(
saturate_cast
<
ushort
>
(
v
.
x
),
saturate_cast
<
ushort
>
(
v
.
y
),
saturate_cast
<
ushort
>
(
v
.
z
),
saturate_cast
<
ushort
>
(
v
.
w
));
}
template
<
>
static
__device__
ushort4
saturate_cast
<
ushort4
>
(
uint4
v
)
{
return
make_ushort4
(
saturate_cast
<
ushort
>
(
v
.
x
),
saturate_cast
<
ushort
>
(
v
.
y
),
saturate_cast
<
ushort
>
(
v
.
z
),
saturate_cast
<
ushort
>
(
v
.
w
));
}
template
<
>
static
__device__
ushort4
saturate_cast
<
ushort4
>
(
int4
v
)
{
return
make_ushort4
(
saturate_cast
<
ushort
>
(
v
.
x
),
saturate_cast
<
ushort
>
(
v
.
y
),
saturate_cast
<
ushort
>
(
v
.
z
),
saturate_cast
<
ushort
>
(
v
.
w
));
}
template
<
>
static
__device__
ushort4
saturate_cast
<
ushort4
>
(
float4
v
)
{
return
make_ushort4
(
saturate_cast
<
ushort
>
(
v
.
x
),
saturate_cast
<
ushort
>
(
v
.
y
),
saturate_cast
<
ushort
>
(
v
.
z
),
saturate_cast
<
ushort
>
(
v
.
w
));
}
template
<
>
static
__device__
short4
saturate_cast
<
short4
>
(
uchar4
v
)
{
return
make_short4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<
>
static
__device__
short4
saturate_cast
<
short4
>
(
char4
v
)
{
return
make_short4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<
>
static
__device__
short4
saturate_cast
<
short4
>
(
ushort4
v
)
{
return
make_short4
(
saturate_cast
<
short
>
(
v
.
x
),
saturate_cast
<
short
>
(
v
.
y
),
saturate_cast
<
short
>
(
v
.
z
),
saturate_cast
<
short
>
(
v
.
w
));
}
template
<
>
static
__device__
short4
saturate_cast
<
short4
>
(
uint4
v
)
{
return
make_short4
(
saturate_cast
<
short
>
(
v
.
x
),
saturate_cast
<
short
>
(
v
.
y
),
saturate_cast
<
short
>
(
v
.
z
),
saturate_cast
<
short
>
(
v
.
w
));
}
template
<
>
static
__device__
short4
saturate_cast
<
short4
>
(
int4
v
)
{
return
make_short4
(
saturate_cast
<
short
>
(
v
.
x
),
saturate_cast
<
short
>
(
v
.
y
),
saturate_cast
<
short
>
(
v
.
z
),
saturate_cast
<
short
>
(
v
.
w
));
}
template
<
>
static
__device__
short4
saturate_cast
<
short4
>
(
float4
v
)
{
return
make_short4
(
saturate_cast
<
short
>
(
v
.
x
),
saturate_cast
<
short
>
(
v
.
y
),
saturate_cast
<
short
>
(
v
.
z
),
saturate_cast
<
short
>
(
v
.
w
));
}
template
<
>
static
__device__
uint4
saturate_cast
<
uint4
>
(
uchar4
v
)
{
return
make_uint4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<
>
static
__device__
uint4
saturate_cast
<
uint4
>
(
char4
v
)
{
return
make_uint4
(
saturate_cast
<
uint
>
(
v
.
x
),
saturate_cast
<
uint
>
(
v
.
y
),
saturate_cast
<
uint
>
(
v
.
z
),
saturate_cast
<
uint
>
(
v
.
w
));
}
template
<
>
static
__device__
uint4
saturate_cast
<
uint4
>
(
ushort4
v
)
{
return
make_uint4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<
>
static
__device__
uint4
saturate_cast
<
uint4
>
(
short4
v
)
{
return
make_uint4
(
saturate_cast
<
uint
>
(
v
.
x
),
saturate_cast
<
uint
>
(
v
.
y
),
saturate_cast
<
uint
>
(
v
.
z
),
saturate_cast
<
uint
>
(
v
.
w
));
}
template
<
>
static
__device__
uint4
saturate_cast
<
uint4
>
(
int4
v
)
{
return
make_uint4
(
saturate_cast
<
uint
>
(
v
.
x
),
saturate_cast
<
uint
>
(
v
.
y
),
saturate_cast
<
uint
>
(
v
.
z
),
saturate_cast
<
uint
>
(
v
.
w
));
}
template
<
>
static
__device__
uint4
saturate_cast
<
uint4
>
(
float4
v
)
{
return
make_uint4
(
saturate_cast
<
uint
>
(
v
.
x
),
saturate_cast
<
uint
>
(
v
.
y
),
saturate_cast
<
uint
>
(
v
.
z
),
saturate_cast
<
uint
>
(
v
.
w
));
}
template
<
>
static
__device__
int4
saturate_cast
<
int4
>
(
uchar4
v
)
{
return
make_int4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<
>
static
__device__
int4
saturate_cast
<
int4
>
(
char4
v
)
{
return
make_int4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<
>
static
__device__
int4
saturate_cast
<
int4
>
(
ushort4
v
)
{
return
make_int4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<
>
static
__device__
int4
saturate_cast
<
int4
>
(
short4
v
)
{
return
make_int4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<
>
static
__device__
int4
saturate_cast
<
int4
>
(
uint4
v
)
{
return
make_int4
(
saturate_cast
<
int
>
(
v
.
x
),
saturate_cast
<
int
>
(
v
.
y
),
saturate_cast
<
int
>
(
v
.
z
),
saturate_cast
<
int
>
(
v
.
w
));
}
template
<
>
static
__device__
int4
saturate_cast
<
int4
>
(
float4
v
)
{
return
make_int4
(
saturate_cast
<
int
>
(
v
.
x
),
saturate_cast
<
int
>
(
v
.
y
),
saturate_cast
<
int
>
(
v
.
z
),
saturate_cast
<
int
>
(
v
.
w
));
}
}
}
...
...
modules/gpu/src/cuda/vecmath.hpp
浏览文件 @
7d0feef7
此差异已折叠。
点击以展开。
modules/gpu/src/cudastream.cpp
浏览文件 @
7d0feef7
...
...
@@ -163,7 +163,7 @@ void cv::gpu::Stream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize(
void
cv
::
gpu
::
Stream
::
enqueueDownload
(
const
GpuMat
&
src
,
Mat
&
dst
)
{
// if not -> allocation will be done, but after that dst will not point to page locked memory
CV_Assert
(
src
.
cols
==
dst
.
cols
&&
src
.
rows
==
dst
.
rows
&&
src
.
type
()
==
dst
.
type
()
)
CV_Assert
(
src
.
cols
==
dst
.
cols
&&
src
.
rows
==
dst
.
rows
&&
src
.
type
()
==
dst
.
type
()
)
;
devcopy
(
src
,
dst
,
impl
->
stream
,
cudaMemcpyDeviceToHost
);
}
void
cv
::
gpu
::
Stream
::
enqueueDownload
(
const
GpuMat
&
src
,
CudaMem
&
dst
)
{
devcopy
(
src
,
dst
,
impl
->
stream
,
cudaMemcpyDeviceToHost
);
}
...
...
modules/gpu/src/filtering.cpp
浏览文件 @
7d0feef7
...
...
@@ -577,15 +577,31 @@ void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& ke
namespace
cv
{
namespace
gpu
{
namespace
filters
{
void
linearRowFilter_gpu_32s32s
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_32s32f
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_32f32s
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_32f32f
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_32s32s
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_32s32f
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_32f32s
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_32f32f
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_8u_8u_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_8u_8s_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_8s_8u_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_8s_8s_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_16u_16u_c2
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_16u_16s_c2
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_16s_16u_c2
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_16s_16s_c2
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_32s_32s_c1
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_32s_32f_c1
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_32f_32s_c1
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_32f_32f_c1
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_8u_8u_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_8u_8s_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_8s_8u_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_8s_8s_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_16u_16u_c2
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_16u_16s_c2
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_16s_16u_c2
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_16s_16s_c2
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_32s_32s_c1
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_32s_32f_c1
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_32f_32s_c1
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_32f_32f_c1
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
}}}
namespace
...
...
@@ -637,15 +653,15 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType,
static
const
nppFilter1D_t
nppFilter1D_callers
[]
=
{
0
,
nppiFilterRow_8u_C1R
,
0
,
0
,
nppiFilterRow_8u_C4R
};
static
const
gpuFilter1D_t
gpuFilter1D_callers
[
6
][
6
]
=
{
{
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
linearRowFilter_gpu_32s
32s
,
linearRowFilter_gpu_32s32f
},
{
0
,
0
,
0
,
0
,
linearRowFilter_gpu_32f
32s
,
linearRowFilter_gpu_32f32f
}
{
linearRowFilter_gpu_8u_8u_c4
,
linearRowFilter_gpu_8u_8s_c4
,
0
,
0
,
0
,
0
},
{
linearRowFilter_gpu_8s_8u_c4
,
linearRowFilter_gpu_8s_8s_c4
,
0
,
0
,
0
,
0
},
{
0
,
0
,
linearRowFilter_gpu_16u_16u_c2
,
linearRowFilter_gpu_16u_16s_c2
,
0
,
0
},
{
0
,
0
,
linearRowFilter_gpu_16s_16u_c2
,
linearRowFilter_gpu_16s_16s_c2
,
0
,
0
},
{
0
,
0
,
0
,
0
,
linearRowFilter_gpu_32s
_32s_c1
,
linearRowFilter_gpu_32s_32f_c1
},
{
0
,
0
,
0
,
0
,
linearRowFilter_gpu_32f
_32s_c1
,
linearRowFilter_gpu_32f_32f_c1
}
};
if
((
srcType
==
CV_8UC1
||
srcType
==
CV_8UC4
)
&&
bufType
==
srcType
)
if
((
bufType
==
srcType
)
&&
(
srcType
==
CV_8UC1
||
srcType
==
CV_8UC4
)
)
{
GpuMat
gpu_row_krnl
;
int
nDivisor
;
...
...
@@ -657,21 +673,19 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType,
return
Ptr
<
BaseRowFilter_GPU
>
(
new
NppLinearRowFilter
(
ksize
,
anchor
,
gpu_row_krnl
,
nDivisor
,
nppFilter1D_callers
[
CV_MAT_CN
(
srcType
)]));
}
else
if
((
srcType
==
CV_32SC1
||
srcType
==
CV_32FC1
)
&&
(
bufType
==
CV_32SC1
||
bufType
==
CV_32FC1
))
{
Mat
temp
(
rowKernel
.
size
(),
CV_32FC1
);
rowKernel
.
convertTo
(
temp
,
CV_32FC1
);
Mat
cont_krnl
=
temp
.
reshape
(
1
,
1
);
int
ksize
=
cont_krnl
.
cols
;
normalizeAnchor
(
anchor
,
ksize
);
CV_Assert
(
srcType
==
CV_8UC4
||
srcType
==
CV_8SC4
||
srcType
==
CV_16UC2
||
srcType
==
CV_16SC2
||
srcType
==
CV_32SC1
||
srcType
==
CV_32FC1
)
;
CV_Assert
(
bufType
==
CV_8UC4
||
bufType
==
CV_8SC4
||
bufType
==
CV_16UC2
||
bufType
==
CV_16SC2
||
bufType
==
CV_32SC1
||
bufType
==
CV_32FC1
);
return
Ptr
<
BaseRowFilter_GPU
>
(
new
GpuLinearRowFilter
(
ksize
,
anchor
,
cont_krnl
,
gpuFilter1D_callers
[
CV_MAT_DEPTH
(
srcType
)][
CV_MAT_DEPTH
(
bufType
)])
);
}
Mat
temp
(
rowKernel
.
size
(),
CV_32FC1
);
rowKernel
.
convertTo
(
temp
,
CV_32FC1
);
Mat
cont_krnl
=
temp
.
reshape
(
1
,
1
);
CV_Assert
(
!
"Unsupported types"
);
return
Ptr
<
BaseRowFilter_GPU
>
(
0
);
int
ksize
=
cont_krnl
.
cols
;
normalizeAnchor
(
anchor
,
ksize
);
return
Ptr
<
BaseRowFilter_GPU
>
(
new
GpuLinearRowFilter
(
ksize
,
anchor
,
cont_krnl
,
gpuFilter1D_callers
[
CV_MAT_DEPTH
(
srcType
)][
CV_MAT_DEPTH
(
bufType
)]));
}
namespace
...
...
@@ -718,15 +732,18 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds
static
const
nppFilter1D_t
nppFilter1D_callers
[]
=
{
0
,
nppiFilterColumn_8u_C1R
,
0
,
0
,
nppiFilterColumn_8u_C4R
};
static
const
gpuFilter1D_t
gpuFilter1D_callers
[
6
][
6
]
=
{
{
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
linearColumnFilter_gpu_32s
32s
,
linearColumnFilter_gpu_32s32f
},
{
0
,
0
,
0
,
0
,
linearColumnFilter_gpu_32f
32s
,
linearColumnFilter_gpu_32f32f
}
{
linearColumnFilter_gpu_8u_8u_c4
,
linearColumnFilter_gpu_8u_8s_c4
,
0
,
0
,
0
,
0
},
{
linearColumnFilter_gpu_8s_8u_c4
,
linearColumnFilter_gpu_8s_8s_c4
,
0
,
0
,
0
,
0
},
{
0
,
0
,
linearColumnFilter_gpu_16u_16u_c2
,
linearColumnFilter_gpu_16u_16s_c2
,
0
,
0
},
{
0
,
0
,
linearColumnFilter_gpu_16s_16u_c2
,
linearColumnFilter_gpu_16s_16s_c2
,
0
,
0
},
{
0
,
0
,
0
,
0
,
linearColumnFilter_gpu_32s
_32s_c1
,
linearColumnFilter_gpu_32s_32f_c1
},
{
0
,
0
,
0
,
0
,
linearColumnFilter_gpu_32f
_32s_c1
,
linearColumnFilter_gpu_32f_32f_c1
}
};
if
((
bufType
==
CV_8UC1
||
bufType
==
CV_8UC4
)
&&
dstType
==
bufType
)
double
kernelMin
;
minMaxLoc
(
columnKernel
,
&
kernelMin
);
if
((
bufType
==
dstType
)
&&
(
bufType
==
CV_8UC1
||
bufType
==
CV_8UC4
))
{
GpuMat
gpu_col_krnl
;
int
nDivisor
;
...
...
@@ -738,21 +755,19 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds
return
Ptr
<
BaseColumnFilter_GPU
>
(
new
NppLinearColumnFilter
(
ksize
,
anchor
,
gpu_col_krnl
,
nDivisor
,
nppFilter1D_callers
[
CV_MAT_CN
(
bufType
)]));
}
else
if
((
bufType
==
CV_32SC1
||
bufType
==
CV_32FC1
)
&&
(
dstType
==
CV_32SC1
||
dstType
==
CV_32FC1
))
{
Mat
temp
(
columnKernel
.
size
(),
CV_32FC1
);
columnKernel
.
convertTo
(
temp
,
CV_32FC1
);
Mat
cont_krnl
=
temp
.
reshape
(
1
,
1
);
int
ksize
=
cont_krnl
.
cols
;
normalizeAnchor
(
anchor
,
ksize
);
CV_Assert
(
dstType
==
CV_8UC4
||
dstType
==
CV_8SC4
||
dstType
==
CV_16UC2
||
dstType
==
CV_16SC2
||
dstType
==
CV_32SC1
||
dstType
==
CV_32FC1
)
;
CV_Assert
(
bufType
==
CV_8UC4
||
bufType
==
CV_8SC4
||
bufType
==
CV_16UC2
||
bufType
==
CV_16SC2
||
bufType
==
CV_32SC1
||
bufType
==
CV_32FC1
);
return
Ptr
<
BaseColumnFilter_GPU
>
(
new
GpuLinearColumnFilter
(
ksize
,
anchor
,
cont_krnl
,
gpuFilter1D_callers
[
CV_MAT_DEPTH
(
bufType
)][
CV_MAT_DEPTH
(
dstType
)]));
}
Mat
temp
(
columnKernel
.
size
(),
CV_32FC1
);
columnKernel
.
convertTo
(
temp
,
CV_32FC1
);
Mat
cont_krnl
=
temp
.
reshape
(
1
,
1
);
int
ksize
=
cont_krnl
.
cols
;
normalizeAnchor
(
anchor
,
ksize
);
CV_Assert
(
!
"Unsupported types"
);
return
Ptr
<
BaseColumnFilter_GPU
>
(
0
);
return
Ptr
<
BaseColumnFilter_GPU
>
(
new
GpuLinearColumnFilter
(
ksize
,
anchor
,
cont_krnl
,
gpuFilter1D_callers
[
CV_MAT_DEPTH
(
bufType
)][
CV_MAT_DEPTH
(
dstType
)])
);
}
Ptr
<
FilterEngine_GPU
>
cv
::
gpu
::
createSeparableLinearFilter_GPU
(
int
srcType
,
int
dstType
,
const
Mat
&
rowKernel
,
const
Mat
&
columnKernel
,
...
...
modules/gpu/src/imgproc_gpu.cpp
浏览文件 @
7d0feef7
...
...
@@ -652,7 +652,7 @@ void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, const
double
cv
::
gpu
::
threshold
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
thresh
)
{
CV_Assert
(
src
.
type
()
==
CV_32FC1
)
CV_Assert
(
src
.
type
()
==
CV_32FC1
)
;
dst
.
create
(
src
.
size
(),
src
.
type
()
);
...
...
tests/gpu/src/filters.cpp
浏览文件 @
7d0feef7
...
...
@@ -166,6 +166,8 @@ struct CV_GpuNppImageSobelTest : public CV_GpuNppFilterTest
int
test
(
const
Mat
&
img
)
{
if
(
img
.
type
()
!=
CV_8UC1
)
return
CvTS
::
OK
;
int
ksizes
[]
=
{
3
,
5
,
7
};
int
ksizes_num
=
sizeof
(
ksizes
)
/
sizeof
(
int
);
...
...
@@ -181,8 +183,10 @@ struct CV_GpuNppImageSobelTest : public CV_GpuNppFilterTest
cv
::
Sobel
(
img
,
cpudst
,
-
1
,
dx
,
dy
,
ksizes
[
i
]);
GpuMat
gpu1
(
img
);
gpu1
.
convertTo
(
gpu1
,
CV_32S
);
GpuMat
gpudst
;
cv
::
gpu
::
Sobel
(
gpu1
,
gpudst
,
-
1
,
dx
,
dy
,
ksizes
[
i
]);
gpudst
.
convertTo
(
gpudst
,
CV_8U
);
if
(
CheckNorm
(
cpudst
,
gpudst
,
Size
(
ksizes
[
i
],
ksizes
[
i
]))
!=
CvTS
::
OK
)
test_res
=
CvTS
::
FAIL_GENERIC
;
...
...
@@ -200,15 +204,20 @@ struct CV_GpuNppImageScharrTest : public CV_GpuNppFilterTest
int
test
(
const
Mat
&
img
)
{
if
(
img
.
type
()
!=
CV_8UC1
)
return
CvTS
::
OK
;
int
dx
=
1
,
dy
=
0
;
Mat
cpudst
;
cv
::
Scharr
(
img
,
cpudst
,
-
1
,
dx
,
dy
);
GpuMat
gpu1
(
img
);
gpu1
.
convertTo
(
gpu1
,
CV_32S
);
GpuMat
gpudst
;
cv
::
gpu
::
Scharr
(
gpu1
,
gpudst
,
-
1
,
dx
,
dy
);
gpudst
.
convertTo
(
gpudst
,
CV_8U
);
return
CheckNorm
(
cpudst
,
gpudst
,
Size
(
3
,
3
));
}
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录