Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
b2603828
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
b2603828
编写于
11月 29, 2010
作者:
A
Alexey Spizhevoy
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
added masks support into gpu::minMaxLoc
上级
7c4cff99
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
164 addition
and
91 deletion
+164
-91
modules/gpu/include/opencv2/gpu/gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+3
-2
modules/gpu/src/arithm.cpp
modules/gpu/src/arithm.cpp
+63
-68
modules/gpu/src/cuda/mathfunc.cu
modules/gpu/src/cuda/mathfunc.cu
+87
-16
tests/gpu/src/arithm.cpp
tests/gpu/src/arithm.cpp
+11
-5
未找到文件。
modules/gpu/include/opencv2/gpu/gpu.hpp
浏览文件 @
b2603828
...
...
@@ -431,11 +431,12 @@ namespace cv
CV_EXPORTS
void
minMax
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
const
GpuMat
&
mask
,
GpuMat
&
buf
);
//! finds global minimum and maximum array elements and returns their values with locations
CV_EXPORTS
void
minMaxLoc
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
=
0
,
Point
*
minLoc
=
0
,
Point
*
maxLoc
=
0
);
CV_EXPORTS
void
minMaxLoc
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
=
0
,
Point
*
minLoc
=
0
,
Point
*
maxLoc
=
0
,
const
GpuMat
&
mask
=
GpuMat
());
//! finds global minimum and maximum array elements and returns their values with locations
CV_EXPORTS
void
minMaxLoc
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
Point
*
minLoc
,
Point
*
maxLoc
,
GpuMat
&
valbuf
,
GpuMat
&
locbuf
);
const
GpuMat
&
mask
,
GpuMat
&
valbuf
,
GpuMat
&
locbuf
);
//! counts non-zero array elements
CV_EXPORTS
int
countNonZero
(
const
GpuMat
&
src
);
...
...
modules/gpu/src/arithm.cpp
浏览文件 @
b2603828
...
...
@@ -67,8 +67,8 @@ void cv::gpu::flip(const GpuMat&, GpuMat&, int) { throw_nogpu(); }
Scalar
cv
::
gpu
::
sum
(
const
GpuMat
&
)
{
throw_nogpu
();
return
Scalar
();
}
void
cv
::
gpu
::
minMax
(
const
GpuMat
&
,
double
*
,
double
*
,
const
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
minMax
(
const
GpuMat
&
,
double
*
,
double
*
,
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
minMaxLoc
(
const
GpuMat
&
,
double
*
,
double
*
,
Point
*
,
Point
*
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
minMaxLoc
(
const
GpuMat
&
,
double
*
,
double
*
,
Point
*
,
Point
*
,
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
minMaxLoc
(
const
GpuMat
&
,
double
*
,
double
*
,
Point
*
,
Point
*
,
const
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
minMaxLoc
(
const
GpuMat
&
,
double
*
,
double
*
,
Point
*
,
Point
*
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
int
cv
::
gpu
::
countNonZero
(
const
GpuMat
&
)
{
throw_nogpu
();
return
0
;
}
int
cv
::
gpu
::
countNonZero
(
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
return
0
;
}
void
cv
::
gpu
::
LUT
(
const
GpuMat
&
,
const
Mat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
...
...
@@ -523,6 +523,8 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
using
namespace
mathfunc
::
minmax
;
typedef
void
(
*
Caller
)(
const
DevMem2D
,
double
*
,
double
*
,
PtrStep
);
typedef
void
(
*
MaskedCaller
)(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
PtrStep
);
static
const
Caller
callers
[
2
][
7
]
=
{
{
min_max_multipass_caller
<
unsigned
char
>
,
min_max_multipass_caller
<
char
>
,
min_max_multipass_caller
<
unsigned
short
>
,
min_max_multipass_caller
<
short
>
,
...
...
@@ -531,7 +533,6 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
min_max_caller
<
unsigned
short
>
,
min_max_caller
<
short
>
,
min_max_caller
<
int
>
,
min_max_caller
<
float
>
,
min_max_caller
<
double
>
}
};
typedef
void
(
*
MaskedCaller
)(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
PtrStep
);
static
const
MaskedCaller
masked_callers
[
2
][
7
]
=
{
{
min_max_mask_multipass_caller
<
unsigned
char
>
,
min_max_mask_multipass_caller
<
char
>
,
min_max_mask_multipass_caller
<
unsigned
short
>
,
min_max_mask_multipass_caller
<
short
>
,
...
...
@@ -580,23 +581,54 @@ namespace cv { namespace gpu { namespace mathfunc { namespace minmaxloc {
void
min_max_loc_caller
(
const
DevMem2D
src
,
double
*
minval
,
double
*
maxval
,
int
minloc
[
2
],
int
maxloc
[
2
],
PtrStep
valbuf
,
PtrStep
locbuf
);
template
<
typename
T
>
void
min_max_loc_mask_caller
(
const
DevMem2D
src
,
const
PtrStep
mask
,
double
*
minval
,
double
*
maxval
,
int
minloc
[
2
],
int
maxloc
[
2
],
PtrStep
valbuf
,
PtrStep
locbuf
);
template
<
typename
T
>
void
min_max_loc_multipass_caller
(
const
DevMem2D
src
,
double
*
minval
,
double
*
maxval
,
int
minloc
[
2
],
int
maxloc
[
2
],
PtrStep
valbuf
,
PtrStep
locbuf
);
int
minloc
[
2
],
int
maxloc
[
2
],
PtrStep
valbuf
,
PtrStep
locbuf
);
template
<
typename
T
>
void
min_max_loc_mask_multipass_caller
(
const
DevMem2D
src
,
const
PtrStep
mask
,
double
*
minval
,
double
*
maxval
,
int
minloc
[
2
],
int
maxloc
[
2
],
PtrStep
valbuf
,
PtrStep
locbuf
);
}}}}
void
cv
::
gpu
::
minMaxLoc
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
Point
*
minLoc
,
Point
*
maxLoc
)
void
cv
::
gpu
::
minMaxLoc
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
Point
*
minLoc
,
Point
*
maxLoc
,
const
GpuMat
&
mask
)
{
GpuMat
valbuf
,
locbuf
;
minMaxLoc
(
src
,
minVal
,
maxVal
,
minLoc
,
maxLoc
,
valbuf
,
locbuf
);
minMaxLoc
(
src
,
minVal
,
maxVal
,
minLoc
,
maxLoc
,
mask
,
valbuf
,
locbuf
);
}
void
cv
::
gpu
::
minMaxLoc
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
Point
*
minLoc
,
Point
*
maxLoc
,
GpuMat
&
valbuf
,
GpuMat
&
locbuf
)
void
cv
::
gpu
::
minMaxLoc
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
Point
*
minLoc
,
Point
*
maxLoc
,
const
GpuMat
&
mask
,
GpuMat
&
valbuf
,
GpuMat
&
locbuf
)
{
using
namespace
mathfunc
::
minmaxloc
;
typedef
void
(
*
Caller
)(
const
DevMem2D
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
typedef
void
(
*
MaskedCaller
)(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
static
const
Caller
callers
[
2
][
7
]
=
{
{
min_max_loc_multipass_caller
<
unsigned
char
>
,
min_max_loc_multipass_caller
<
char
>
,
min_max_loc_multipass_caller
<
unsigned
short
>
,
min_max_loc_multipass_caller
<
short
>
,
min_max_loc_multipass_caller
<
int
>
,
min_max_loc_multipass_caller
<
float
>
,
0
},
{
min_max_loc_caller
<
unsigned
char
>
,
min_max_loc_caller
<
char
>
,
min_max_loc_caller
<
unsigned
short
>
,
min_max_loc_caller
<
short
>
,
min_max_loc_caller
<
int
>
,
min_max_loc_caller
<
float
>
,
min_max_loc_caller
<
double
>
}
};
static
const
MaskedCaller
masked_callers
[
2
][
7
]
=
{
{
min_max_loc_mask_multipass_caller
<
unsigned
char
>
,
min_max_loc_mask_multipass_caller
<
char
>
,
min_max_loc_mask_multipass_caller
<
unsigned
short
>
,
min_max_loc_mask_multipass_caller
<
short
>
,
min_max_loc_mask_multipass_caller
<
int
>
,
min_max_loc_mask_multipass_caller
<
float
>
,
0
},
{
min_max_loc_mask_caller
<
unsigned
char
>
,
min_max_loc_mask_caller
<
char
>
,
min_max_loc_mask_caller
<
unsigned
short
>
,
min_max_loc_mask_caller
<
short
>
,
min_max_loc_mask_caller
<
int
>
,
min_max_loc_mask_caller
<
float
>
,
min_max_loc_mask_caller
<
double
>
}
};
CV_Assert
(
src
.
channels
()
==
1
);
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8U
&&
src
.
size
()
==
mask
.
size
()));
CV_Assert
(
src
.
type
()
!=
CV_64F
||
hasNativeDoubleSupport
(
getDevice
()));
double
minVal_
;
if
(
!
minVal
)
minVal
=
&
minVal_
;
double
maxVal_
;
if
(
!
maxVal
)
maxVal
=
&
maxVal_
;
...
...
@@ -609,38 +641,17 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
valbuf
.
create
(
valbuf_size
,
CV_8U
);
locbuf
.
create
(
locbuf_size
,
CV_8U
);
int
device
=
getDevice
();
if
(
hasAtomicsSupport
(
device
))
{
switch
(
src
.
type
())
{
case
CV_8U
:
min_max_loc_caller
<
unsigned
char
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_8S
:
min_max_loc_caller
<
char
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_16U
:
min_max_loc_caller
<
unsigned
short
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_16S
:
min_max_loc_caller
<
short
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_32S
:
min_max_loc_caller
<
int
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_32F
:
min_max_loc_caller
<
float
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_64F
:
if
(
hasNativeDoubleSupport
(
device
))
{
min_max_loc_caller
<
double
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
}
default:
CV_Error
(
CV_StsBadArg
,
"minMaxLoc: unsupported type"
);
}
if
(
mask
.
empty
())
{
Caller
caller
=
callers
[
hasAtomicsSupport
(
getDevice
())][
src
.
type
()];
if
(
!
caller
)
CV_Error
(
CV_StsBadArg
,
"minMaxLoc: unsupported type"
);
caller
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
}
else
{
switch
(
src
.
type
())
{
case
CV_8U
:
min_max_loc_multipass_caller
<
unsigned
char
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_8S
:
min_max_loc_multipass_caller
<
char
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_16U
:
min_max_loc_multipass_caller
<
unsigned
short
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_16S
:
min_max_loc_multipass_caller
<
short
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_32S
:
min_max_loc_multipass_caller
<
int
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_32F
:
min_max_loc_multipass_caller
<
float
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
default:
CV_Error
(
CV_StsBadArg
,
"minMaxLoc: unsupported type"
);
}
MaskedCaller
caller
=
masked_callers
[
hasAtomicsSupport
(
getDevice
())][
src
.
type
()];
if
(
!
caller
)
CV_Error
(
CV_StsBadArg
,
"minMaxLoc: unsupported type"
);
caller
(
src
,
mask
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
}
if
(
minLoc
)
{
minLoc
->
x
=
minLoc_
[
0
];
minLoc
->
y
=
minLoc_
[
1
];
}
...
...
@@ -671,43 +682,27 @@ int cv::gpu::countNonZero(const GpuMat& src)
int
cv
::
gpu
::
countNonZero
(
const
GpuMat
&
src
,
GpuMat
&
buf
)
{
using
namespace
mathfunc
::
countnonzero
;
typedef
int
(
*
Caller
)(
const
DevMem2D
src
,
PtrStep
buf
);
static
const
Caller
callers
[
2
][
7
]
=
{
{
count_non_zero_multipass_caller
<
unsigned
char
>
,
count_non_zero_multipass_caller
<
char
>
,
count_non_zero_multipass_caller
<
unsigned
short
>
,
count_non_zero_multipass_caller
<
short
>
,
count_non_zero_multipass_caller
<
int
>
,
count_non_zero_multipass_caller
<
float
>
,
0
},
{
count_non_zero_caller
<
unsigned
char
>
,
count_non_zero_caller
<
char
>
,
count_non_zero_caller
<
unsigned
short
>
,
count_non_zero_caller
<
short
>
,
count_non_zero_caller
<
int
>
,
count_non_zero_caller
<
float
>
,
count_non_zero_caller
<
double
>
}
};
CV_Assert
(
src
.
channels
()
==
1
);
CV_Assert
(
src
.
type
()
!=
CV_64F
||
hasNativeDoubleSupport
(
getDevice
()));
Size
buf_size
;
get_buf_size_required
(
buf_size
.
width
,
buf_size
.
height
);
buf
.
create
(
buf_size
,
CV_8U
);
int
device
=
getDevice
();
if
(
hasAtomicsSupport
(
device
))
{
switch
(
src
.
type
())
{
case
CV_8U
:
return
count_non_zero_caller
<
unsigned
char
>
(
src
,
buf
);
case
CV_8S
:
return
count_non_zero_caller
<
char
>
(
src
,
buf
);
case
CV_16U
:
return
count_non_zero_caller
<
unsigned
short
>
(
src
,
buf
);
case
CV_16S
:
return
count_non_zero_caller
<
short
>
(
src
,
buf
);
case
CV_32S
:
return
count_non_zero_caller
<
int
>
(
src
,
buf
);
case
CV_32F
:
return
count_non_zero_caller
<
float
>
(
src
,
buf
);
case
CV_64F
:
if
(
hasNativeDoubleSupport
(
device
))
return
count_non_zero_caller
<
double
>
(
src
,
buf
);
}
}
else
{
switch
(
src
.
type
())
{
case
CV_8U
:
return
count_non_zero_multipass_caller
<
unsigned
char
>
(
src
,
buf
);
case
CV_8S
:
return
count_non_zero_multipass_caller
<
char
>
(
src
,
buf
);
case
CV_16U
:
return
count_non_zero_multipass_caller
<
unsigned
short
>
(
src
,
buf
);
case
CV_16S
:
return
count_non_zero_multipass_caller
<
short
>
(
src
,
buf
);
case
CV_32S
:
return
count_non_zero_multipass_caller
<
int
>
(
src
,
buf
);
case
CV_32F
:
return
count_non_zero_multipass_caller
<
float
>
(
src
,
buf
);
}
}
CV_Error
(
CV_StsBadArg
,
"countNonZero: unsupported type"
);
return
0
;
Caller
caller
=
callers
[
hasAtomicsSupport
(
getDevice
())][
src
.
type
()];
if
(
!
caller
)
CV_Error
(
CV_StsBadArg
,
"countNonZero: unsupported type"
);
return
caller
(
src
,
buf
);
}
////////////////////////////////////////////////////////////////////////
...
...
modules/gpu/src/cuda/mathfunc.cu
浏览文件 @
b2603828
...
...
@@ -248,10 +248,10 @@ namespace cv { namespace gpu { namespace mathfunc
struct
Mask8U
{
explicit
Mask8U
(
PtrStep
mask
)
:
mask
(
mask
)
{}
__device__
bool
operator
()(
int
y
,
int
x
)
{
return
mask
.
ptr
(
y
)[
x
];
}
__device__
bool
operator
()(
int
y
,
int
x
)
const
{
return
mask
.
ptr
(
y
)[
x
];
}
PtrStep
mask
;
};
struct
MaskTrue
{
__device__
bool
operator
()(
int
y
,
int
x
)
{
return
true
;
}
};
struct
MaskTrue
{
__device__
bool
operator
()(
int
y
,
int
x
)
const
{
return
true
;
}
};
// Unary operations
...
...
@@ -788,8 +788,8 @@ namespace cv { namespace gpu { namespace mathfunc
}
template
<
int
nthreads
,
typename
T
>
__global__
void
min_max_loc_kernel
(
const
DevMem2D
src
,
T
*
minval
,
T
*
maxval
,
template
<
int
nthreads
,
typename
T
,
typename
Mask
>
__global__
void
min_max_loc_kernel
(
const
DevMem2D
src
,
Mask
mask
,
T
*
minval
,
T
*
maxval
,
unsigned
int
*
minloc
,
unsigned
int
*
maxloc
)
{
typedef
typename
MinMaxTypeTraits
<
T
>::
best_type
best_type
;
...
...
@@ -814,16 +814,11 @@ namespace cv { namespace gpu { namespace mathfunc
const
T
*
ptr
=
(
const
T
*
)
src
.
ptr
(
y
);
for
(
unsigned
int
x
=
x0
;
x
<
x_end
;
x
+=
blockDim
.
x
)
{
T
val
=
ptr
[
x
];
if
(
val
<=
mymin
)
{
mymin
=
val
;
myminloc
=
y
*
src
.
cols
+
x
;
}
if
(
val
>=
mymax
)
if
(
mask
(
y
,
x
))
{
mymax
=
val
;
mymaxloc
=
y
*
src
.
cols
+
x
;
T
val
=
ptr
[
x
];
if
(
val
<=
mymin
)
{
mymin
=
val
;
myminloc
=
y
*
src
.
cols
+
x
;
}
if
(
val
>=
mymax
)
{
mymax
=
val
;
mymaxloc
=
y
*
src
.
cols
+
x
;
}
}
}
}
...
...
@@ -886,6 +881,44 @@ namespace cv { namespace gpu { namespace mathfunc
}
template
<
typename
T
>
void
min_max_loc_mask_caller
(
const
DevMem2D
src
,
const
PtrStep
mask
,
double
*
minval
,
double
*
maxval
,
int
minloc
[
2
],
int
maxloc
[
2
],
PtrStep
valbuf
,
PtrStep
locbuf
)
{
dim3
threads
,
grid
;
estimate_thread_cfg
(
threads
,
grid
);
estimate_kernel_consts
(
src
.
cols
,
src
.
rows
,
threads
,
grid
);
T
*
minval_buf
=
(
T
*
)
valbuf
.
ptr
(
0
);
T
*
maxval_buf
=
(
T
*
)
valbuf
.
ptr
(
1
);
unsigned
int
*
minloc_buf
=
(
unsigned
int
*
)
locbuf
.
ptr
(
0
);
unsigned
int
*
maxloc_buf
=
(
unsigned
int
*
)
locbuf
.
ptr
(
1
);
min_max_loc_kernel
<
256
,
T
,
Mask8U
><<<
grid
,
threads
>>>
(
src
,
Mask8U
(
mask
),
minval_buf
,
maxval_buf
,
minloc_buf
,
maxloc_buf
);
cudaSafeCall
(
cudaThreadSynchronize
());
T
minval_
,
maxval_
;
cudaSafeCall
(
cudaMemcpy
(
&
minval_
,
minval_buf
,
sizeof
(
T
),
cudaMemcpyDeviceToHost
));
cudaSafeCall
(
cudaMemcpy
(
&
maxval_
,
maxval_buf
,
sizeof
(
T
),
cudaMemcpyDeviceToHost
));
*
minval
=
minval_
;
*
maxval
=
maxval_
;
unsigned
int
minloc_
,
maxloc_
;
cudaSafeCall
(
cudaMemcpy
(
&
minloc_
,
minloc_buf
,
sizeof
(
int
),
cudaMemcpyDeviceToHost
));
cudaSafeCall
(
cudaMemcpy
(
&
maxloc_
,
maxloc_buf
,
sizeof
(
int
),
cudaMemcpyDeviceToHost
));
minloc
[
1
]
=
minloc_
/
src
.
cols
;
minloc
[
0
]
=
minloc_
-
minloc
[
1
]
*
src
.
cols
;
maxloc
[
1
]
=
maxloc_
/
src
.
cols
;
maxloc
[
0
]
=
maxloc_
-
maxloc
[
1
]
*
src
.
cols
;
}
template
void
min_max_loc_mask_caller
<
unsigned
char
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
template
void
min_max_loc_mask_caller
<
char
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
template
void
min_max_loc_mask_caller
<
unsigned
short
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
template
void
min_max_loc_mask_caller
<
short
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
template
void
min_max_loc_mask_caller
<
int
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
template
void
min_max_loc_mask_caller
<
float
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
template
void
min_max_loc_mask_caller
<
double
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
template
<
typename
T
>
void
min_max_loc_caller
(
const
DevMem2D
src
,
double
*
minval
,
double
*
maxval
,
int
minloc
[
2
],
int
maxloc
[
2
],
PtrStep
valbuf
,
PtrStep
locbuf
)
...
...
@@ -899,7 +932,7 @@ namespace cv { namespace gpu { namespace mathfunc
unsigned
int
*
minloc_buf
=
(
unsigned
int
*
)
locbuf
.
ptr
(
0
);
unsigned
int
*
maxloc_buf
=
(
unsigned
int
*
)
locbuf
.
ptr
(
1
);
min_max_loc_kernel
<
256
,
T
><<<
grid
,
threads
>>>
(
src
,
minval_buf
,
maxval_buf
,
minloc_buf
,
maxloc_buf
);
min_max_loc_kernel
<
256
,
T
,
MaskTrue
><<<
grid
,
threads
>>>
(
src
,
MaskTrue
()
,
minval_buf
,
maxval_buf
,
minloc_buf
,
maxloc_buf
);
cudaSafeCall
(
cudaThreadSynchronize
());
T
minval_
,
maxval_
;
...
...
@@ -956,9 +989,47 @@ namespace cv { namespace gpu { namespace mathfunc
}
template
<
typename
T
>
void
min_max_loc_mask_multipass_caller
(
const
DevMem2D
src
,
const
PtrStep
mask
,
double
*
minval
,
double
*
maxval
,
int
minloc
[
2
],
int
maxloc
[
2
],
PtrStep
valbuf
,
PtrStep
locbuf
)
{
dim3
threads
,
grid
;
estimate_thread_cfg
(
threads
,
grid
);
estimate_kernel_consts
(
src
.
cols
,
src
.
rows
,
threads
,
grid
);
T
*
minval_buf
=
(
T
*
)
valbuf
.
ptr
(
0
);
T
*
maxval_buf
=
(
T
*
)
valbuf
.
ptr
(
1
);
unsigned
int
*
minloc_buf
=
(
unsigned
int
*
)
locbuf
.
ptr
(
0
);
unsigned
int
*
maxloc_buf
=
(
unsigned
int
*
)
locbuf
.
ptr
(
1
);
min_max_loc_kernel
<
256
,
T
,
Mask8U
><<<
grid
,
threads
>>>
(
src
,
Mask8U
(
mask
),
minval_buf
,
maxval_buf
,
minloc_buf
,
maxloc_buf
);
min_max_loc_pass2_kernel
<
256
,
T
><<<
1
,
256
>>>
(
minval_buf
,
maxval_buf
,
minloc_buf
,
maxloc_buf
,
grid
.
x
*
grid
.
y
);
cudaSafeCall
(
cudaThreadSynchronize
());
T
minval_
,
maxval_
;
cudaSafeCall
(
cudaMemcpy
(
&
minval_
,
minval_buf
,
sizeof
(
T
),
cudaMemcpyDeviceToHost
));
cudaSafeCall
(
cudaMemcpy
(
&
maxval_
,
maxval_buf
,
sizeof
(
T
),
cudaMemcpyDeviceToHost
));
*
minval
=
minval_
;
*
maxval
=
maxval_
;
unsigned
int
minloc_
,
maxloc_
;
cudaSafeCall
(
cudaMemcpy
(
&
minloc_
,
minloc_buf
,
sizeof
(
int
),
cudaMemcpyDeviceToHost
));
cudaSafeCall
(
cudaMemcpy
(
&
maxloc_
,
maxloc_buf
,
sizeof
(
int
),
cudaMemcpyDeviceToHost
));
minloc
[
1
]
=
minloc_
/
src
.
cols
;
minloc
[
0
]
=
minloc_
-
minloc
[
1
]
*
src
.
cols
;
maxloc
[
1
]
=
maxloc_
/
src
.
cols
;
maxloc
[
0
]
=
maxloc_
-
maxloc
[
1
]
*
src
.
cols
;
}
template
void
min_max_loc_mask_multipass_caller
<
unsigned
char
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
template
void
min_max_loc_mask_multipass_caller
<
char
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
template
void
min_max_loc_mask_multipass_caller
<
unsigned
short
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
template
void
min_max_loc_mask_multipass_caller
<
short
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
template
void
min_max_loc_mask_multipass_caller
<
int
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
template
void
min_max_loc_mask_multipass_caller
<
float
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
template
<
typename
T
>
void
min_max_loc_multipass_caller
(
const
DevMem2D
src
,
double
*
minval
,
double
*
maxval
,
int
minloc
[
2
],
int
maxloc
[
2
],
PtrStep
valbuf
,
PtrStep
locbuf
)
int
minloc
[
2
],
int
maxloc
[
2
],
PtrStep
valbuf
,
PtrStep
locbuf
)
{
dim3
threads
,
grid
;
estimate_thread_cfg
(
threads
,
grid
);
...
...
@@ -969,7 +1040,7 @@ namespace cv { namespace gpu { namespace mathfunc
unsigned
int
*
minloc_buf
=
(
unsigned
int
*
)
locbuf
.
ptr
(
0
);
unsigned
int
*
maxloc_buf
=
(
unsigned
int
*
)
locbuf
.
ptr
(
1
);
min_max_loc_kernel
<
256
,
T
><<<
grid
,
threads
>>>
(
src
,
minval_buf
,
maxval_buf
,
minloc_buf
,
maxloc_buf
);
min_max_loc_kernel
<
256
,
T
,
MaskTrue
><<<
grid
,
threads
>>>
(
src
,
MaskTrue
()
,
minval_buf
,
maxval_buf
,
minloc_buf
,
maxloc_buf
);
min_max_loc_pass2_kernel
<
256
,
T
><<<
1
,
256
>>>
(
minval_buf
,
maxval_buf
,
minloc_buf
,
maxloc_buf
,
grid
.
x
*
grid
.
y
);
cudaSafeCall
(
cudaThreadSynchronize
());
...
...
tests/gpu/src/arithm.cpp
浏览文件 @
b2603828
...
...
@@ -684,7 +684,7 @@ struct CV_GpuMinMaxTest: public CvTest
if
(
cv
::
gpu
::
hasNativeDoubleSupport
(
cv
::
gpu
::
getDevice
()))
depth_end
=
CV_64F
;
else
depth_end
=
CV_32F
;
for
(
int
depth
=
CV_8U
;
depth
<=
depth_end
;
++
depth
)
{
for
(
int
i
=
0
;
i
<
1
;
++
i
)
for
(
int
i
=
0
;
i
<
3
;
++
i
)
{
int
rows
=
1
+
rand
()
%
1000
;
int
cols
=
1
+
rand
()
%
1000
;
...
...
@@ -829,11 +829,14 @@ struct CV_GpuMinMaxLocTest: public CvTest
rng
.
fill
(
row
,
RNG
::
UNIFORM
,
Scalar
(
0
),
Scalar
(
256
));
}
cv
::
Mat
mask
(
src
.
size
(),
CV_8U
);
rng
.
fill
(
mask
,
RNG
::
UNIFORM
,
Scalar
(
0
),
Scalar
(
2
));
double
minVal
,
maxVal
;
cv
::
Point
minLoc
,
maxLoc
;
if
(
depth
!=
CV_8S
)
cv
::
minMaxLoc
(
src
,
&
minVal
,
&
maxVal
,
&
minLoc
,
&
maxLoc
);
cv
::
minMaxLoc
(
src
,
&
minVal
,
&
maxVal
,
&
minLoc
,
&
maxLoc
,
mask
);
else
{
// OpenCV's minMaxLoc doesn't support CV_8S type
...
...
@@ -843,14 +846,17 @@ struct CV_GpuMinMaxLocTest: public CvTest
for
(
int
j
=
0
;
j
<
src
.
cols
;
++
j
)
{
char
val
=
src
.
at
<
char
>
(
i
,
j
);
if
(
val
<
minVal
)
{
minVal
=
val
;
minLoc
=
cv
::
Point
(
j
,
i
);
}
if
(
val
>
maxVal
)
{
maxVal
=
val
;
maxLoc
=
cv
::
Point
(
j
,
i
);
}
if
(
mask
.
at
<
unsigned
char
>
(
i
,
j
))
{
if
(
val
<
minVal
)
{
minVal
=
val
;
minLoc
=
cv
::
Point
(
j
,
i
);
}
if
(
val
>
maxVal
)
{
maxVal
=
val
;
maxLoc
=
cv
::
Point
(
j
,
i
);
}
}
}
}
double
minVal_
,
maxVal_
;
cv
::
Point
minLoc_
,
maxLoc_
;
cv
::
gpu
::
minMaxLoc
(
cv
::
gpu
::
GpuMat
(
src
),
&
minVal_
,
&
maxVal_
,
&
minLoc_
,
&
maxLoc_
,
valbuf
,
locbuf
);
cv
::
gpu
::
minMaxLoc
(
cv
::
gpu
::
GpuMat
(
src
),
&
minVal_
,
&
maxVal_
,
&
minLoc_
,
&
maxLoc_
,
cv
::
gpu
::
GpuMat
(
mask
),
valbuf
,
locbuf
);
CHECK
(
minVal
==
minVal_
,
CvTS
::
FAIL_INVALID_OUTPUT
);
CHECK
(
maxVal
==
maxVal_
,
CvTS
::
FAIL_INVALID_OUTPUT
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录