Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
ef9917ec
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
ef9917ec
编写于
7月 29, 2013
作者:
V
Vladislav Vinogradov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
used new device layer for cv::gpu::compare
上级
43e81111
变更
4
展开全部
隐藏空白更改
内联
并排
Showing
4 changed file
with
241 addition
and
425 deletion
+241
-425
modules/cudaarithm/src/cuda/cmp_mat.cu
modules/cudaarithm/src/cuda/cmp_mat.cu
+122
-109
modules/cudaarithm/src/cuda/cmp_scalar.cu
modules/cudaarithm/src/cuda/cmp_scalar.cu
+116
-175
modules/cudaarithm/src/element_operations.cpp
modules/cudaarithm/src/element_operations.cpp
+2
-140
modules/cudev/include/opencv2/cudev/util/vec_traits.hpp
modules/cudev/include/opencv2/cudev/util/vec_traits.hpp
+1
-1
未找到文件。
modules/cudaarithm/src/cuda/cmp_mat.cu
浏览文件 @
ef9917ec
...
...
@@ -40,30 +40,54 @@
//
//M*/
#i
f !defined CUDA_DISABLER
#i
nclude "opencv2/opencv_modules.hpp"
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/simd_functions.hpp"
#ifndef HAVE_OPENCV_CUDEV
#
include "arithm_func_traits.hpp
"
#
error "opencv_cudev is required
"
using
namespace
cv
::
cuda
;
using
namespace
cv
::
cuda
::
device
;
#else
namespace
arithm
#include "opencv2/cudev.hpp"
using
namespace
cv
::
cudev
;
void
cmpMat
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
,
double
,
Stream
&
stream
,
int
cmpop
);
namespace
{
template
<
class
Op
,
typename
T
>
struct
CmpOp
:
binary_function
<
T
,
T
,
uchar
>
{
__device__
__forceinline__
uchar
operator
()(
T
a
,
T
b
)
const
{
Op
op
;
return
-
op
(
a
,
b
);
}
};
template
<
typename
ScalarDepth
>
struct
TransformPolicy
:
DefaultTransformPolicy
{
};
template
<
>
struct
TransformPolicy
<
double
>
:
DefaultTransformPolicy
{
enum
{
shift
=
1
};
};
template
<
template
<
typename
>
class
Op
,
typename
T
>
void
cmpMat_v1
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
Stream
&
stream
)
{
CmpOp
<
Op
<
T
>
,
T
>
op
;
gridTransformBinary_
<
TransformPolicy
<
T
>
>
(
globPtr
<
T
>
(
src1
),
globPtr
<
T
>
(
src2
),
globPtr
<
uchar
>
(
dst
),
op
,
stream
);
}
struct
VCmpEq4
:
binary_function
<
uint
,
uint
,
uint
>
{
__device__
__forceinline__
uint
operator
()(
uint
a
,
uint
b
)
const
{
return
vcmpeq4
(
a
,
b
);
}
__host__
__device__
__forceinline__
VCmpEq4
()
{}
__host__
__device__
__forceinline__
VCmpEq4
(
const
VCmpEq4
&
)
{}
};
struct
VCmpNe4
:
binary_function
<
uint
,
uint
,
uint
>
{
...
...
@@ -71,9 +95,6 @@ namespace arithm
{
return
vcmpne4
(
a
,
b
);
}
__host__
__device__
__forceinline__
VCmpNe4
()
{}
__host__
__device__
__forceinline__
VCmpNe4
(
const
VCmpNe4
&
)
{}
};
struct
VCmpLt4
:
binary_function
<
uint
,
uint
,
uint
>
{
...
...
@@ -81,9 +102,6 @@ namespace arithm
{
return
vcmplt4
(
a
,
b
);
}
__host__
__device__
__forceinline__
VCmpLt4
()
{}
__host__
__device__
__forceinline__
VCmpLt4
(
const
VCmpLt4
&
)
{}
};
struct
VCmpLe4
:
binary_function
<
uint
,
uint
,
uint
>
{
...
...
@@ -91,116 +109,111 @@ namespace arithm
{
return
vcmple4
(
a
,
b
);
}
__host__
__device__
__forceinline__
VCmpLe4
()
{}
__host__
__device__
__forceinline__
VCmpLe4
(
const
VCmpLe4
&
)
{}
};
template
<
class
Op
,
typename
T
>
struct
Cmp
:
binary_function
<
T
,
T
,
uchar
>
{
__device__
__forceinline__
uchar
operator
()(
T
a
,
T
b
)
const
{
Op
op
;
return
-
op
(
a
,
b
);
}
};
}
namespace
cv
{
namespace
cuda
{
namespace
device
{
template
<
>
struct
TransformFunctorTraits
<
arithm
::
VCmpEq4
>
:
arithm
::
ArithmFuncTraits
<
sizeof
(
uint
),
sizeof
(
uint
)
>
{
};
template
<
>
struct
TransformFunctorTraits
<
arithm
::
VCmpNe4
>
:
arithm
::
ArithmFuncTraits
<
sizeof
(
uint
),
sizeof
(
uint
)
>
{
};
template
<
>
struct
TransformFunctorTraits
<
arithm
::
VCmpLt4
>
:
arithm
::
ArithmFuncTraits
<
sizeof
(
uint
),
sizeof
(
uint
)
>
{
};
template
<
>
struct
TransformFunctorTraits
<
arithm
::
VCmpLe4
>
:
arithm
::
ArithmFuncTraits
<
sizeof
(
uint
),
sizeof
(
uint
)
>
void
cmpMatEq_v4
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
Stream
&
stream
)
{
}
;
const
int
vcols
=
src1
.
cols
>>
2
;
template
<
class
Op
,
typename
T
>
struct
TransformFunctorTraits
<
arithm
::
Cmp
<
Op
,
T
>
>
:
arithm
::
ArithmFuncTraits
<
sizeof
(
T
),
sizeof
(
uchar
)
>
{
};
}}}
GlobPtrSz
<
uint
>
src1_
=
globPtr
((
uint
*
)
src1
.
data
,
src1
.
step
,
src1
.
rows
,
vcols
);
GlobPtrSz
<
uint
>
src2_
=
globPtr
((
uint
*
)
src2
.
data
,
src2
.
step
,
src1
.
rows
,
vcols
);
GlobPtrSz
<
uint
>
dst_
=
globPtr
((
uint
*
)
dst
.
data
,
dst
.
step
,
src1
.
rows
,
vcols
);
namespace
arithm
{
void
cmpMatEq_v4
(
PtrStepSz
<
uint
>
src1
,
PtrStepSz
<
uint
>
src2
,
PtrStepSz
<
uint
>
dst
,
cudaStream_t
stream
)
{
device
::
transform
(
src1
,
src2
,
dst
,
VCmpEq4
(),
WithOutMask
(),
stream
);
gridTransformBinary
(
src1_
,
src2_
,
dst_
,
VCmpEq4
(),
stream
);
}
void
cmpMatNe_v4
(
PtrStepSz
<
uint
>
src1
,
PtrStepSz
<
uint
>
src2
,
PtrStepSz
<
uint
>
dst
,
cudaStream_t
stream
)
void
cmpMatNe_v4
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
Stream
&
stream
)
{
device
::
transform
(
src1
,
src2
,
dst
,
VCmpNe4
(),
WithOutMask
(),
stream
);
const
int
vcols
=
src1
.
cols
>>
2
;
GlobPtrSz
<
uint
>
src1_
=
globPtr
((
uint
*
)
src1
.
data
,
src1
.
step
,
src1
.
rows
,
vcols
);
GlobPtrSz
<
uint
>
src2_
=
globPtr
((
uint
*
)
src2
.
data
,
src2
.
step
,
src1
.
rows
,
vcols
);
GlobPtrSz
<
uint
>
dst_
=
globPtr
((
uint
*
)
dst
.
data
,
dst
.
step
,
src1
.
rows
,
vcols
);
gridTransformBinary
(
src1_
,
src2_
,
dst_
,
VCmpNe4
(),
stream
);
}
void
cmpMatLt_v4
(
PtrStepSz
<
uint
>
src1
,
PtrStepSz
<
uint
>
src2
,
PtrStepSz
<
uint
>
dst
,
cudaStream_t
stream
)
void
cmpMatLt_v4
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
Stream
&
stream
)
{
device
::
transform
(
src1
,
src2
,
dst
,
VCmpLt4
(),
WithOutMask
(),
stream
);
const
int
vcols
=
src1
.
cols
>>
2
;
GlobPtrSz
<
uint
>
src1_
=
globPtr
((
uint
*
)
src1
.
data
,
src1
.
step
,
src1
.
rows
,
vcols
);
GlobPtrSz
<
uint
>
src2_
=
globPtr
((
uint
*
)
src2
.
data
,
src2
.
step
,
src1
.
rows
,
vcols
);
GlobPtrSz
<
uint
>
dst_
=
globPtr
((
uint
*
)
dst
.
data
,
dst
.
step
,
src1
.
rows
,
vcols
);
gridTransformBinary
(
src1_
,
src2_
,
dst_
,
VCmpLt4
(),
stream
);
}
void
cmpMatLe_v4
(
PtrStepSz
<
uint
>
src1
,
PtrStepSz
<
uint
>
src2
,
PtrStepSz
<
uint
>
dst
,
cudaStream_t
stream
)
void
cmpMatLe_v4
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
Stream
&
stream
)
{
device
::
transform
(
src1
,
src2
,
dst
,
VCmpLe4
(),
WithOutMask
(),
stream
);
const
int
vcols
=
src1
.
cols
>>
2
;
GlobPtrSz
<
uint
>
src1_
=
globPtr
((
uint
*
)
src1
.
data
,
src1
.
step
,
src1
.
rows
,
vcols
);
GlobPtrSz
<
uint
>
src2_
=
globPtr
((
uint
*
)
src2
.
data
,
src2
.
step
,
src1
.
rows
,
vcols
);
GlobPtrSz
<
uint
>
dst_
=
globPtr
((
uint
*
)
dst
.
data
,
dst
.
step
,
src1
.
rows
,
vcols
);
gridTransformBinary
(
src1_
,
src2_
,
dst_
,
VCmpLe4
(),
stream
);
}
}
template
<
template
<
typename
>
class
Op
,
typename
T
>
void
cmpMat
(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
)
void
cmpMat
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
,
double
,
Stream
&
stream
,
int
cmpop
)
{
typedef
void
(
*
func_t
)(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
Stream
&
stream
);
static
const
func_t
funcs
[
7
][
4
]
=
{
{
cmpMat_v1
<
equal_to
,
uchar
>
,
cmpMat_v1
<
not_equal_to
,
uchar
>
,
cmpMat_v1
<
less
,
uchar
>
,
cmpMat_v1
<
less_equal
,
uchar
>
},
{
cmpMat_v1
<
equal_to
,
schar
>
,
cmpMat_v1
<
not_equal_to
,
schar
>
,
cmpMat_v1
<
less
,
schar
>
,
cmpMat_v1
<
less_equal
,
schar
>
},
{
cmpMat_v1
<
equal_to
,
ushort
>
,
cmpMat_v1
<
not_equal_to
,
ushort
>
,
cmpMat_v1
<
less
,
ushort
>
,
cmpMat_v1
<
less_equal
,
ushort
>
},
{
cmpMat_v1
<
equal_to
,
short
>
,
cmpMat_v1
<
not_equal_to
,
short
>
,
cmpMat_v1
<
less
,
short
>
,
cmpMat_v1
<
less_equal
,
short
>
},
{
cmpMat_v1
<
equal_to
,
int
>
,
cmpMat_v1
<
not_equal_to
,
int
>
,
cmpMat_v1
<
less
,
int
>
,
cmpMat_v1
<
less_equal
,
int
>
},
{
cmpMat_v1
<
equal_to
,
float
>
,
cmpMat_v1
<
not_equal_to
,
float
>
,
cmpMat_v1
<
less
,
float
>
,
cmpMat_v1
<
less_equal
,
float
>
},
{
cmpMat_v1
<
equal_to
,
double
>
,
cmpMat_v1
<
not_equal_to
,
double
>
,
cmpMat_v1
<
less
,
double
>
,
cmpMat_v1
<
less_equal
,
double
>
}
};
typedef
void
(
*
func_v4_t
)(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
Stream
&
stream
);
static
const
func_v4_t
funcs_v4
[]
=
{
Cmp
<
Op
<
T
>
,
T
>
op
;
device
::
transform
((
PtrStepSz
<
T
>
)
src1
,
(
PtrStepSz
<
T
>
)
src2
,
dst
,
op
,
WithOutMask
(),
stream
);
}
cmpMatEq_v4
,
cmpMatNe_v4
,
cmpMatLt_v4
,
cmpMatLe_v4
};
const
int
depth
=
src1
.
depth
();
template
<
typename
T
>
void
cmpMatEq
(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
)
CV_DbgAssert
(
depth
<=
CV_64F
);
static
const
int
codes
[]
=
{
cmpMat
<
equal_to
,
T
>
(
src1
,
src2
,
dst
,
stream
);
}
template
<
typename
T
>
void
cmpMatNe
(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
)
0
,
2
,
3
,
2
,
3
,
1
}
;
const
GpuMat
*
psrc1
[]
=
{
cmpMat
<
not_equal_to
,
T
>
(
src1
,
src2
,
dst
,
stream
);
}
template
<
typename
T
>
void
cmpMatLt
(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
)
&
src1
,
&
src2
,
&
src2
,
&
src1
,
&
src1
,
&
src1
}
;
const
GpuMat
*
psrc2
[]
=
{
cmpMat
<
less
,
T
>
(
src1
,
src2
,
dst
,
stream
);
}
template
<
typename
T
>
void
cmpMatLe
(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
)
&
src2
,
&
src1
,
&
src1
,
&
src2
,
&
src2
,
&
src2
};
const
int
code
=
codes
[
cmpop
];
GpuMat
src1_
=
psrc1
[
cmpop
]
->
reshape
(
1
);
GpuMat
src2_
=
psrc2
[
cmpop
]
->
reshape
(
1
);
GpuMat
dst_
=
dst
.
reshape
(
1
);
if
(
depth
==
CV_8U
&&
(
src1_
.
cols
&
3
)
==
0
)
{
cmpMat
<
less_equal
,
T
>
(
src1
,
src2
,
dst
,
stream
);
const
intptr_t
src1ptr
=
reinterpret_cast
<
intptr_t
>
(
src1_
.
data
);
const
intptr_t
src2ptr
=
reinterpret_cast
<
intptr_t
>
(
src2_
.
data
);
const
intptr_t
dstptr
=
reinterpret_cast
<
intptr_t
>
(
dst_
.
data
);
const
bool
isAllAligned
=
(
src1ptr
&
31
)
==
0
&&
(
src2ptr
&
31
)
==
0
&&
(
dstptr
&
31
)
==
0
;
if
(
isAllAligned
)
{
funcs_v4
[
code
](
src1_
,
src2_
,
dst_
,
stream
);
return
;
}
}
template
void
cmpMatEq
<
uchar
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatEq
<
schar
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatEq
<
ushort
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatEq
<
short
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatEq
<
int
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatEq
<
float
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatEq
<
double
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatNe
<
uchar
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatNe
<
schar
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatNe
<
ushort
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatNe
<
short
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatNe
<
int
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatNe
<
float
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatNe
<
double
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatLt
<
uchar
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatLt
<
schar
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatLt
<
ushort
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatLt
<
short
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatLt
<
int
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatLt
<
float
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatLt
<
double
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatLe
<
uchar
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatLe
<
schar
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatLe
<
ushort
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatLe
<
short
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatLe
<
int
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatLe
<
float
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
void
cmpMatLe
<
double
>(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
const
func_t
func
=
funcs
[
depth
][
code
];
func
(
src1_
,
src2_
,
dst_
,
stream
);
}
#endif
// CUDA_DISABLER
#endif
modules/cudaarithm/src/cuda/cmp_scalar.cu
浏览文件 @
ef9917ec
此差异已折叠。
点击以展开。
modules/cudaarithm/src/element_operations.cpp
浏览文件 @
ef9917ec
...
...
@@ -454,147 +454,9 @@ void cv::cuda::absdiff(InputArray src1, InputArray src2, OutputArray dst, Stream
//////////////////////////////////////////////////////////////////////////////
// compare
namespace
arithm
{
void
cmpMatEq_v4
(
PtrStepSz
<
uint
>
src1
,
PtrStepSz
<
uint
>
src2
,
PtrStepSz
<
uint
>
dst
,
cudaStream_t
stream
);
void
cmpMatNe_v4
(
PtrStepSz
<
uint
>
src1
,
PtrStepSz
<
uint
>
src2
,
PtrStepSz
<
uint
>
dst
,
cudaStream_t
stream
);
void
cmpMatLt_v4
(
PtrStepSz
<
uint
>
src1
,
PtrStepSz
<
uint
>
src2
,
PtrStepSz
<
uint
>
dst
,
cudaStream_t
stream
);
void
cmpMatLe_v4
(
PtrStepSz
<
uint
>
src1
,
PtrStepSz
<
uint
>
src2
,
PtrStepSz
<
uint
>
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
cmpMatEq
(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
cmpMatNe
(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
cmpMatLt
(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
cmpMatLe
(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
}
static
void
cmpMat
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
,
double
,
Stream
&
_stream
,
int
cmpop
)
{
using
namespace
arithm
;
typedef
void
(
*
func_t
)(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
4
]
=
{
{
cmpMatEq
<
unsigned
char
>
,
cmpMatNe
<
unsigned
char
>
,
cmpMatLt
<
unsigned
char
>
,
cmpMatLe
<
unsigned
char
>
},
{
cmpMatEq
<
signed
char
>
,
cmpMatNe
<
signed
char
>
,
cmpMatLt
<
signed
char
>
,
cmpMatLe
<
signed
char
>
},
{
cmpMatEq
<
unsigned
short
>
,
cmpMatNe
<
unsigned
short
>
,
cmpMatLt
<
unsigned
short
>
,
cmpMatLe
<
unsigned
short
>
},
{
cmpMatEq
<
short
>
,
cmpMatNe
<
short
>
,
cmpMatLt
<
short
>
,
cmpMatLe
<
short
>
},
{
cmpMatEq
<
int
>
,
cmpMatNe
<
int
>
,
cmpMatLt
<
int
>
,
cmpMatLe
<
int
>
},
{
cmpMatEq
<
float
>
,
cmpMatNe
<
float
>
,
cmpMatLt
<
float
>
,
cmpMatLe
<
float
>
},
{
cmpMatEq
<
double
>
,
cmpMatNe
<
double
>
,
cmpMatLt
<
double
>
,
cmpMatLe
<
double
>
}
};
typedef
void
(
*
func_v4_t
)(
PtrStepSz
<
uint
>
src1
,
PtrStepSz
<
uint
>
src2
,
PtrStepSz
<
uint
>
dst
,
cudaStream_t
stream
);
static
const
func_v4_t
funcs_v4
[]
=
{
cmpMatEq_v4
,
cmpMatNe_v4
,
cmpMatLt_v4
,
cmpMatLe_v4
};
const
int
depth
=
src1
.
depth
();
const
int
cn
=
src1
.
channels
();
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
static
const
int
codes
[]
=
{
0
,
2
,
3
,
2
,
3
,
1
};
const
GpuMat
*
psrc1
[]
=
{
&
src1
,
&
src2
,
&
src2
,
&
src1
,
&
src1
,
&
src1
};
const
GpuMat
*
psrc2
[]
=
{
&
src2
,
&
src1
,
&
src1
,
&
src2
,
&
src2
,
&
src2
};
const
int
code
=
codes
[
cmpop
];
PtrStepSzb
src1_
(
src1
.
rows
,
src1
.
cols
*
cn
,
psrc1
[
cmpop
]
->
data
,
psrc1
[
cmpop
]
->
step
);
PtrStepSzb
src2_
(
src1
.
rows
,
src1
.
cols
*
cn
,
psrc2
[
cmpop
]
->
data
,
psrc2
[
cmpop
]
->
step
);
PtrStepSzb
dst_
(
src1
.
rows
,
src1
.
cols
*
cn
,
dst
.
data
,
dst
.
step
);
if
(
depth
==
CV_8U
&&
(
src1_
.
cols
&
3
)
==
0
)
{
const
intptr_t
src1ptr
=
reinterpret_cast
<
intptr_t
>
(
src1_
.
data
);
const
intptr_t
src2ptr
=
reinterpret_cast
<
intptr_t
>
(
src2_
.
data
);
const
intptr_t
dstptr
=
reinterpret_cast
<
intptr_t
>
(
dst_
.
data
);
const
bool
isAllAligned
=
(
src1ptr
&
31
)
==
0
&&
(
src2ptr
&
31
)
==
0
&&
(
dstptr
&
31
)
==
0
;
if
(
isAllAligned
)
{
const
int
vcols
=
src1_
.
cols
>>
2
;
void
cmpMat
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
,
double
,
Stream
&
stream
,
int
cmpop
);
funcs_v4
[
code
](
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
src1_
.
data
,
src1_
.
step
),
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
src2_
.
data
,
src2_
.
step
),
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
dst_
.
data
,
dst_
.
step
),
stream
);
return
;
}
}
const
func_t
func
=
funcs
[
depth
][
code
];
func
(
src1_
,
src2_
,
dst_
,
stream
);
}
namespace
arithm
{
template
<
typename
T
>
void
cmpScalarEq
(
PtrStepSzb
src
,
int
cn
,
double
val
[
4
],
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
cmpScalarNe
(
PtrStepSzb
src
,
int
cn
,
double
val
[
4
],
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
cmpScalarLt
(
PtrStepSzb
src
,
int
cn
,
double
val
[
4
],
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
cmpScalarLe
(
PtrStepSzb
src
,
int
cn
,
double
val
[
4
],
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
cmpScalarGt
(
PtrStepSzb
src
,
int
cn
,
double
val
[
4
],
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
cmpScalarGe
(
PtrStepSzb
src
,
int
cn
,
double
val
[
4
],
PtrStepSzb
dst
,
cudaStream_t
stream
);
}
namespace
{
template
<
typename
T
>
void
castScalar
(
Scalar
&
sc
)
{
sc
.
val
[
0
]
=
saturate_cast
<
T
>
(
sc
.
val
[
0
]);
sc
.
val
[
1
]
=
saturate_cast
<
T
>
(
sc
.
val
[
1
]);
sc
.
val
[
2
]
=
saturate_cast
<
T
>
(
sc
.
val
[
2
]);
sc
.
val
[
3
]
=
saturate_cast
<
T
>
(
sc
.
val
[
3
]);
}
}
static
void
cmpScalar
(
const
GpuMat
&
src
,
Scalar
val
,
bool
inv
,
GpuMat
&
dst
,
const
GpuMat
&
,
double
,
Stream
&
stream
,
int
cmpop
)
{
using
namespace
arithm
;
typedef
void
(
*
func_t
)(
PtrStepSzb
src
,
int
cn
,
double
val
[
4
],
PtrStepSzb
dst
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
6
]
=
{
{
cmpScalarEq
<
unsigned
char
>
,
cmpScalarGt
<
unsigned
char
>
,
cmpScalarGe
<
unsigned
char
>
,
cmpScalarLt
<
unsigned
char
>
,
cmpScalarLe
<
unsigned
char
>
,
cmpScalarNe
<
unsigned
char
>
},
{
cmpScalarEq
<
signed
char
>
,
cmpScalarGt
<
signed
char
>
,
cmpScalarGe
<
signed
char
>
,
cmpScalarLt
<
signed
char
>
,
cmpScalarLe
<
signed
char
>
,
cmpScalarNe
<
signed
char
>
},
{
cmpScalarEq
<
unsigned
short
>
,
cmpScalarGt
<
unsigned
short
>
,
cmpScalarGe
<
unsigned
short
>
,
cmpScalarLt
<
unsigned
short
>
,
cmpScalarLe
<
unsigned
short
>
,
cmpScalarNe
<
unsigned
short
>
},
{
cmpScalarEq
<
short
>
,
cmpScalarGt
<
short
>
,
cmpScalarGe
<
short
>
,
cmpScalarLt
<
short
>
,
cmpScalarLe
<
short
>
,
cmpScalarNe
<
short
>
},
{
cmpScalarEq
<
int
>
,
cmpScalarGt
<
int
>
,
cmpScalarGe
<
int
>
,
cmpScalarLt
<
int
>
,
cmpScalarLe
<
int
>
,
cmpScalarNe
<
int
>
},
{
cmpScalarEq
<
float
>
,
cmpScalarGt
<
float
>
,
cmpScalarGe
<
float
>
,
cmpScalarLt
<
float
>
,
cmpScalarLe
<
float
>
,
cmpScalarNe
<
float
>
},
{
cmpScalarEq
<
double
>
,
cmpScalarGt
<
double
>
,
cmpScalarGe
<
double
>
,
cmpScalarLt
<
double
>
,
cmpScalarLe
<
double
>
,
cmpScalarNe
<
double
>
}
};
typedef
void
(
*
cast_func_t
)(
Scalar
&
sc
);
static
const
cast_func_t
cast_func
[]
=
{
castScalar
<
unsigned
char
>
,
castScalar
<
signed
char
>
,
castScalar
<
unsigned
short
>
,
castScalar
<
short
>
,
castScalar
<
int
>
,
castScalar
<
float
>
,
castScalar
<
double
>
};
if
(
inv
)
{
// src1 is a scalar; swap it with src2
cmpop
=
cmpop
==
CMP_LT
?
CMP_GT
:
cmpop
==
CMP_LE
?
CMP_GE
:
cmpop
==
CMP_GE
?
CMP_LE
:
cmpop
==
CMP_GT
?
CMP_LT
:
cmpop
;
}
const
int
depth
=
src
.
depth
();
const
int
cn
=
src
.
channels
();
cast_func
[
depth
](
val
);
funcs
[
depth
][
cmpop
](
src
,
cn
,
val
.
val
,
dst
,
StreamAccessor
::
getStream
(
stream
));
}
void
cmpScalar
(
const
GpuMat
&
src
,
Scalar
val
,
bool
inv
,
GpuMat
&
dst
,
const
GpuMat
&
,
double
,
Stream
&
stream
,
int
cmpop
);
void
cv
::
cuda
::
compare
(
InputArray
src1
,
InputArray
src2
,
OutputArray
dst
,
int
cmpop
,
Stream
&
stream
)
{
...
...
modules/cudev/include/opencv2/cudev/util/vec_traits.hpp
浏览文件 @
ef9917ec
...
...
@@ -70,7 +70,7 @@ CV_CUDEV_MAKE_VEC_INST(double)
#undef CV_CUDEV_MAKE_VEC_INST
template
<
>
struct
MakeVec
<
schar
,
1
>
{
typedef
char
type
;
};
template
<
>
struct
MakeVec
<
schar
,
1
>
{
typedef
schar
type
;
};
template
<
>
struct
MakeVec
<
schar
,
2
>
{
typedef
char2
type
;
};
template
<
>
struct
MakeVec
<
schar
,
3
>
{
typedef
char3
type
;
};
template
<
>
struct
MakeVec
<
schar
,
4
>
{
typedef
char4
type
;
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录