Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
437ac1a2
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
437ac1a2
编写于
11月 29, 2010
作者:
A
Alexey Spizhevoy
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
added mask support into gpu::minMax
上级
bb532239
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
221 addition
and
105 deletion
+221
-105
modules/gpu/include/opencv2/gpu/gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+2
-2
modules/gpu/src/arithm.cpp
modules/gpu/src/arithm.cpp
+57
-51
modules/gpu/src/cuda/mathfunc.cu
modules/gpu/src/cuda/mathfunc.cu
+101
-40
tests/gpu/src/arithm.cpp
tests/gpu/src/arithm.cpp
+61
-12
未找到文件。
modules/gpu/include/opencv2/gpu/gpu.hpp
浏览文件 @
437ac1a2
...
...
@@ -425,10 +425,10 @@ namespace cv
CV_EXPORTS
Scalar
sum
(
const
GpuMat
&
m
);
//! finds global minimum and maximum array elements and returns their values
CV_EXPORTS
void
minMax
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
=
0
);
CV_EXPORTS
void
minMax
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
=
0
,
const
GpuMat
&
mask
=
GpuMat
()
);
//! finds global minimum and maximum array elements and returns their values
CV_EXPORTS
void
minMax
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
GpuMat
&
buf
);
CV_EXPORTS
void
minMax
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
const
GpuMat
&
mask
,
GpuMat
&
buf
);
//! finds global minimum and maximum array elements and returns their values with locations
CV_EXPORTS
void
minMaxLoc
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
=
0
,
Point
*
minLoc
=
0
,
Point
*
maxLoc
=
0
);
...
...
modules/gpu/src/arithm.cpp
浏览文件 @
437ac1a2
...
...
@@ -65,8 +65,8 @@ double cv::gpu::norm(const GpuMat&, int) { throw_nogpu(); return 0.0; }
double
cv
::
gpu
::
norm
(
const
GpuMat
&
,
const
GpuMat
&
,
int
)
{
throw_nogpu
();
return
0.0
;
}
void
cv
::
gpu
::
flip
(
const
GpuMat
&
,
GpuMat
&
,
int
)
{
throw_nogpu
();
}
Scalar
cv
::
gpu
::
sum
(
const
GpuMat
&
)
{
throw_nogpu
();
return
Scalar
();
}
void
cv
::
gpu
::
minMax
(
const
GpuMat
&
,
double
*
,
double
*
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
minMax
(
const
GpuMat
&
,
double
*
,
double
*
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
minMax
(
const
GpuMat
&
,
double
*
,
double
*
,
const
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
minMax
(
const
GpuMat
&
,
double
*
,
double
*
,
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
minMaxLoc
(
const
GpuMat
&
,
double
*
,
double
*
,
Point
*
,
Point
*
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
minMaxLoc
(
const
GpuMat
&
,
double
*
,
double
*
,
Point
*
,
Point
*
,
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
int
cv
::
gpu
::
countNonZero
(
const
GpuMat
&
)
{
throw_nogpu
();
return
0
;
}
...
...
@@ -502,62 +502,68 @@ namespace cv { namespace gpu { namespace mathfunc { namespace minmax {
void
min_max_caller
(
const
DevMem2D
src
,
double
*
minval
,
double
*
maxval
,
PtrStep
buf
);
template
<
typename
T
>
void
min_max_caller_2steps
(
const
DevMem2D
src
,
double
*
minval
,
double
*
maxval
,
PtrStep
buf
);
void
min_max_mask_caller
(
const
DevMem2D
src
,
const
PtrStep
mask
,
double
*
minval
,
double
*
maxval
,
PtrStep
buf
);
template
<
typename
T
>
void
min_max_multipass_caller
(
const
DevMem2D
src
,
double
*
minval
,
double
*
maxval
,
PtrStep
buf
);
template
<
typename
T
>
void
min_max_mask_multipass_caller
(
const
DevMem2D
src
,
const
PtrStep
mask
,
double
*
minval
,
double
*
maxval
,
PtrStep
buf
);
}}}}
void
cv
::
gpu
::
minMax
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
)
void
cv
::
gpu
::
minMax
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
const
GpuMat
&
mask
)
{
GpuMat
buf
;
minMax
(
src
,
minVal
,
maxVal
,
buf
);
minMax
(
src
,
minVal
,
maxVal
,
mask
,
buf
);
}
void
cv
::
gpu
::
minMax
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
GpuMat
&
buf
)
void
cv
::
gpu
::
minMax
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
const
GpuMat
&
mask
,
GpuMat
&
buf
)
{
using
namespace
mathfunc
::
minmax
;
typedef
void
(
*
Caller
)(
const
DevMem2D
,
double
*
,
double
*
,
PtrStep
);
static
const
Caller
callers
[
2
][
7
]
=
{
{
min_max_multipass_caller
<
unsigned
char
>
,
min_max_multipass_caller
<
signed
char
>
,
min_max_multipass_caller
<
unsigned
short
>
,
min_max_multipass_caller
<
signed
short
>
,
min_max_multipass_caller
<
int
>
,
min_max_multipass_caller
<
float
>
,
0
},
{
min_max_caller
<
unsigned
char
>
,
min_max_caller
<
signed
char
>
,
min_max_caller
<
unsigned
short
>
,
min_max_caller
<
signed
short
>
,
min_max_caller
<
int
>
,
min_max_caller
<
float
>
,
min_max_caller
<
double
>
}
};
typedef
void
(
*
MaskedCaller
)(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
PtrStep
);
static
const
MaskedCaller
masked_callers
[
2
][
7
]
=
{
{
min_max_mask_multipass_caller
<
unsigned
char
>
,
min_max_mask_multipass_caller
<
signed
char
>
,
min_max_mask_multipass_caller
<
unsigned
short
>
,
min_max_mask_multipass_caller
<
signed
short
>
,
min_max_mask_multipass_caller
<
int
>
,
min_max_mask_multipass_caller
<
float
>
,
0
},
{
min_max_mask_caller
<
unsigned
char
>
,
min_max_mask_caller
<
signed
char
>
,
min_max_mask_caller
<
unsigned
short
>
,
min_max_mask_caller
<
signed
short
>
,
min_max_mask_caller
<
int
>
,
min_max_mask_caller
<
float
>
,
min_max_mask_caller
<
double
>
}
};
CV_Assert
(
src
.
channels
()
==
1
);
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8U
&&
src
.
size
()
==
mask
.
size
()));
CV_Assert
(
src
.
type
()
!=
CV_64F
||
hasNativeDoubleSupport
(
getDevice
()));
double
minVal_
;
if
(
!
minVal
)
minVal
=
&
minVal_
;
double
maxVal_
;
if
(
!
maxVal
)
maxVal
=
&
maxVal_
;
GpuMat
src_
=
src
.
reshape
(
1
);
Size
bufSize
;
get_buf_size_required
(
src
.
elemSize
(),
bufSize
.
width
,
bufSize
.
height
);
buf
.
create
(
bufSize
,
CV_8U
);
int
device
=
getDevice
();
if
(
hasAtomicsSupport
(
device
))
if
(
mask
.
empty
())
{
switch
(
src_
.
type
())
{
case
CV_8U
:
min_max_caller
<
unsigned
char
>
(
src_
,
minVal
,
maxVal
,
buf
);
break
;
case
CV_8S
:
min_max_caller
<
signed
char
>
(
src_
,
minVal
,
maxVal
,
buf
);
break
;
case
CV_16U
:
min_max_caller
<
unsigned
short
>
(
src_
,
minVal
,
maxVal
,
buf
);
break
;
case
CV_16S
:
min_max_caller
<
signed
short
>
(
src_
,
minVal
,
maxVal
,
buf
);
break
;
case
CV_32S
:
min_max_caller
<
int
>
(
src_
,
minVal
,
maxVal
,
buf
);
break
;
case
CV_32F
:
min_max_caller
<
float
>
(
src_
,
minVal
,
maxVal
,
buf
);
break
;
case
CV_64F
:
if
(
hasNativeDoubleSupport
(
device
))
{
min_max_caller
<
double
>
(
src_
,
minVal
,
maxVal
,
buf
);
break
;
}
default:
CV_Error
(
CV_StsBadArg
,
"minMax: unsupported type"
);
}
Caller
caller
=
callers
[
hasAtomicsSupport
(
getDevice
())][
src
.
type
()];
if
(
!
caller
)
CV_Error
(
CV_StsBadArg
,
"minMax: unsupported type"
);
caller
(
src
,
minVal
,
maxVal
,
buf
);
}
else
{
switch
(
src_
.
type
())
{
case
CV_8U
:
min_max_caller_2steps
<
unsigned
char
>
(
src_
,
minVal
,
maxVal
,
buf
);
break
;
case
CV_8S
:
min_max_caller_2steps
<
signed
char
>
(
src_
,
minVal
,
maxVal
,
buf
);
break
;
case
CV_16U
:
min_max_caller_2steps
<
unsigned
short
>
(
src_
,
minVal
,
maxVal
,
buf
);
break
;
case
CV_16S
:
min_max_caller_2steps
<
signed
short
>
(
src_
,
minVal
,
maxVal
,
buf
);
break
;
case
CV_32S
:
min_max_caller_2steps
<
int
>
(
src_
,
minVal
,
maxVal
,
buf
);
break
;
case
CV_32F
:
min_max_caller_2steps
<
float
>
(
src_
,
minVal
,
maxVal
,
buf
);
break
;
default:
CV_Error
(
CV_StsBadArg
,
"minMax: unsupported type"
);
}
MaskedCaller
caller
=
masked_callers
[
hasAtomicsSupport
(
getDevice
())][
src
.
type
()];
if
(
!
caller
)
CV_Error
(
CV_StsBadArg
,
"minMax: unsupported type"
);
caller
(
src
,
mask
,
minVal
,
maxVal
,
buf
);
}
}
...
...
@@ -575,7 +581,7 @@ namespace cv { namespace gpu { namespace mathfunc { namespace minmaxloc {
int
minloc
[
2
],
int
maxloc
[
2
],
PtrStep
valbuf
,
PtrStep
locbuf
);
template
<
typename
T
>
void
min_max_loc_
caller_2steps
(
const
DevMem2D
src
,
double
*
minval
,
double
*
maxval
,
void
min_max_loc_
multipass_caller
(
const
DevMem2D
src
,
double
*
minval
,
double
*
maxval
,
int
minloc
[
2
],
int
maxloc
[
2
],
PtrStep
valbuf
,
PtrStep
locbuf
);
}}}}
...
...
@@ -627,12 +633,12 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
{
switch
(
src
.
type
())
{
case
CV_8U
:
min_max_loc_
caller_2steps
<
unsigned
char
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_8S
:
min_max_loc_
caller_2steps
<
signed
char
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_16U
:
min_max_loc_
caller_2steps
<
unsigned
short
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_16S
:
min_max_loc_
caller_2steps
<
signed
short
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_32S
:
min_max_loc_
caller_2steps
<
int
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_32F
:
min_max_loc_
caller_2steps
<
float
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_8U
:
min_max_loc_
multipass_caller
<
unsigned
char
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_8S
:
min_max_loc_
multipass_caller
<
signed
char
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_16U
:
min_max_loc_
multipass_caller
<
unsigned
short
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_16S
:
min_max_loc_
multipass_caller
<
signed
short
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_32S
:
min_max_loc_
multipass_caller
<
int
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_32F
:
min_max_loc_
multipass_caller
<
float
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
default:
CV_Error
(
CV_StsBadArg
,
"minMaxLoc: unsupported type"
);
}
}
...
...
@@ -652,7 +658,7 @@ namespace cv { namespace gpu { namespace mathfunc { namespace countnonzero {
int
count_non_zero_caller
(
const
DevMem2D
src
,
PtrStep
buf
);
template
<
typename
T
>
int
count_non_zero_
caller_2steps
(
const
DevMem2D
src
,
PtrStep
buf
);
int
count_non_zero_
multipass_caller
(
const
DevMem2D
src
,
PtrStep
buf
);
}}}}
...
...
@@ -691,12 +697,12 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
{
switch
(
src
.
type
())
{
case
CV_8U
:
return
count_non_zero_
caller_2steps
<
unsigned
char
>
(
src
,
buf
);
case
CV_8S
:
return
count_non_zero_
caller_2steps
<
signed
char
>
(
src
,
buf
);
case
CV_16U
:
return
count_non_zero_
caller_2steps
<
unsigned
short
>
(
src
,
buf
);
case
CV_16S
:
return
count_non_zero_
caller_2steps
<
signed
short
>
(
src
,
buf
);
case
CV_32S
:
return
count_non_zero_
caller_2steps
<
int
>
(
src
,
buf
);
case
CV_32F
:
return
count_non_zero_
caller_2steps
<
float
>
(
src
,
buf
);
case
CV_8U
:
return
count_non_zero_
multipass_caller
<
unsigned
char
>
(
src
,
buf
);
case
CV_8S
:
return
count_non_zero_
multipass_caller
<
signed
char
>
(
src
,
buf
);
case
CV_16U
:
return
count_non_zero_
multipass_caller
<
unsigned
short
>
(
src
,
buf
);
case
CV_16S
:
return
count_non_zero_
multipass_caller
<
signed
short
>
(
src
,
buf
);
case
CV_32S
:
return
count_non_zero_
multipass_caller
<
int
>
(
src
,
buf
);
case
CV_32F
:
return
count_non_zero_
multipass_caller
<
float
>
(
src
,
buf
);
}
}
...
...
modules/gpu/src/cuda/mathfunc.cu
浏览文件 @
437ac1a2
...
...
@@ -480,8 +480,8 @@ namespace cv { namespace gpu { namespace mathfunc
}
template
<
int
nthreads
,
typename
T
>
__global__
void
min_max_kernel
(
const
DevMem2D
src
,
T
*
minval
,
T
*
maxval
)
template
<
int
nthreads
,
typename
T
,
typename
Mask
>
__global__
void
min_max_kernel
(
const
DevMem2D
src
,
Mask
mask
,
T
*
minval
,
T
*
maxval
)
{
typedef
typename
MinMaxTypeTraits
<
T
>::
best_type
best_type
;
__shared__
best_type
sminval
[
nthreads
];
...
...
@@ -491,17 +491,21 @@ namespace cv { namespace gpu { namespace mathfunc
unsigned
int
y0
=
blockIdx
.
y
*
blockDim
.
y
*
ctheight
+
threadIdx
.
y
;
unsigned
int
tid
=
threadIdx
.
y
*
blockDim
.
x
+
threadIdx
.
x
;
T
val
;
T
mymin
=
numeric_limits_gpu
<
T
>::
max
();
T
mymax
=
numeric_limits_gpu
<
T
>::
min
();
for
(
unsigned
int
y
=
0
;
y
<
ctheight
&&
y0
+
y
*
blockDim
.
y
<
src
.
rows
;
++
y
)
unsigned
int
y_end
=
min
(
y0
+
(
ctheight
-
1
)
*
blockDim
.
y
+
1
,
src
.
rows
);
unsigned
int
x_end
=
min
(
x0
+
(
ctwidth
-
1
)
*
blockDim
.
x
+
1
,
src
.
cols
);
for
(
unsigned
int
y
=
y0
;
y
<
y_end
;
y
+=
blockDim
.
y
)
{
const
T
*
ptr
=
(
const
T
*
)
src
.
ptr
(
y0
+
y
*
blockDim
.
y
);
for
(
unsigned
int
x
=
0
;
x
<
ctwidth
&&
x0
+
x
*
blockDim
.
x
<
src
.
cols
;
++
x
)
const
T
*
src_row
=
(
const
T
*
)
src
.
ptr
(
y
);
for
(
unsigned
int
x
=
x0
;
x
<
x_end
;
x
+=
blockDim
.
x
)
{
val
=
ptr
[
x0
+
x
*
blockDim
.
x
];
mymin
=
min
(
mymin
,
val
);
mymax
=
max
(
mymax
,
val
);
T
val
=
src_row
[
x
];
if
(
mask
(
y
,
x
))
{
mymin
=
min
(
mymin
,
val
);
mymax
=
max
(
mymax
,
val
);
}
}
}
...
...
@@ -559,6 +563,35 @@ namespace cv { namespace gpu { namespace mathfunc
}
template
<
typename
T
>
void
min_max_mask_caller
(
const
DevMem2D
src
,
const
PtrStep
mask
,
double
*
minval
,
double
*
maxval
,
PtrStep
buf
)
{
dim3
threads
,
grid
;
estimate_thread_cfg
(
threads
,
grid
);
estimate_kernel_consts
(
src
.
cols
,
src
.
rows
,
threads
,
grid
);
T
*
minval_buf
=
(
T
*
)
buf
.
ptr
(
0
);
T
*
maxval_buf
=
(
T
*
)
buf
.
ptr
(
1
);
min_max_kernel
<
256
,
T
,
Mask8U
><<<
grid
,
threads
>>>
(
src
,
Mask8U
(
mask
),
minval_buf
,
maxval_buf
);
cudaSafeCall
(
cudaThreadSynchronize
());
T
minval_
,
maxval_
;
cudaSafeCall
(
cudaMemcpy
(
&
minval_
,
minval_buf
,
sizeof
(
T
),
cudaMemcpyDeviceToHost
));
cudaSafeCall
(
cudaMemcpy
(
&
maxval_
,
maxval_buf
,
sizeof
(
T
),
cudaMemcpyDeviceToHost
));
*
minval
=
minval_
;
*
maxval
=
maxval_
;
}
template
void
min_max_mask_caller
<
unsigned
char
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_mask_caller
<
signed
char
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_mask_caller
<
unsigned
short
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_mask_caller
<
signed
short
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_mask_caller
<
int
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_mask_caller
<
float
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_mask_caller
<
double
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
PtrStep
);
template
<
typename
T
>
void
min_max_caller
(
const
DevMem2D
src
,
double
*
minval
,
double
*
maxval
,
PtrStep
buf
)
{
...
...
@@ -569,7 +602,7 @@ namespace cv { namespace gpu { namespace mathfunc
T
*
minval_buf
=
(
T
*
)
buf
.
ptr
(
0
);
T
*
maxval_buf
=
(
T
*
)
buf
.
ptr
(
1
);
min_max_kernel
<
256
,
T
><<<
grid
,
threads
>>>
(
src
,
minval_buf
,
maxval_buf
);
min_max_kernel
<
256
,
T
,
MaskTrue
><<<
grid
,
threads
>>>
(
src
,
MaskTrue
()
,
minval_buf
,
maxval_buf
);
cudaSafeCall
(
cudaThreadSynchronize
());
T
minval_
,
maxval_
;
...
...
@@ -584,13 +617,12 @@ namespace cv { namespace gpu { namespace mathfunc
template
void
min_max_caller
<
unsigned
short
>(
const
DevMem2D
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_caller
<
signed
short
>(
const
DevMem2D
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_caller
<
int
>(
const
DevMem2D
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_caller
<
float
>(
const
DevMem2D
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_caller
<
float
>(
const
DevMem2D
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_caller
<
double
>(
const
DevMem2D
,
double
*
,
double
*
,
PtrStep
);
// This kernel will be used only when compute capability is 1.0
template
<
int
nthreads
,
typename
T
>
__global__
void
min_max_
kernel_2ndstep
(
T
*
minval
,
T
*
maxval
,
int
size
)
__global__
void
min_max_
pass2_kernel
(
T
*
minval
,
T
*
maxval
,
int
size
)
{
typedef
typename
MinMaxTypeTraits
<
T
>::
best_type
best_type
;
__shared__
best_type
sminval
[
nthreads
];
...
...
@@ -615,7 +647,36 @@ namespace cv { namespace gpu { namespace mathfunc
template
<
typename
T
>
void
min_max_caller_2steps
(
const
DevMem2D
src
,
double
*
minval
,
double
*
maxval
,
PtrStep
buf
)
void
min_max_mask_multipass_caller
(
const
DevMem2D
src
,
const
PtrStep
mask
,
double
*
minval
,
double
*
maxval
,
PtrStep
buf
)
{
dim3
threads
,
grid
;
estimate_thread_cfg
(
threads
,
grid
);
estimate_kernel_consts
(
src
.
cols
,
src
.
rows
,
threads
,
grid
);
T
*
minval_buf
=
(
T
*
)
buf
.
ptr
(
0
);
T
*
maxval_buf
=
(
T
*
)
buf
.
ptr
(
1
);
min_max_kernel
<
256
,
T
,
Mask8U
><<<
grid
,
threads
>>>
(
src
,
Mask8U
(
mask
),
minval_buf
,
maxval_buf
);
min_max_pass2_kernel
<
256
,
T
><<<
1
,
256
>>>
(
minval_buf
,
maxval_buf
,
grid
.
x
*
grid
.
y
);
cudaSafeCall
(
cudaThreadSynchronize
());
T
minval_
,
maxval_
;
cudaSafeCall
(
cudaMemcpy
(
&
minval_
,
minval_buf
,
sizeof
(
T
),
cudaMemcpyDeviceToHost
));
cudaSafeCall
(
cudaMemcpy
(
&
maxval_
,
maxval_buf
,
sizeof
(
T
),
cudaMemcpyDeviceToHost
));
*
minval
=
minval_
;
*
maxval
=
maxval_
;
}
template
void
min_max_mask_multipass_caller
<
unsigned
char
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_mask_multipass_caller
<
signed
char
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_mask_multipass_caller
<
unsigned
short
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_mask_multipass_caller
<
signed
short
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_mask_multipass_caller
<
int
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_mask_multipass_caller
<
float
>(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
PtrStep
);
template
<
typename
T
>
void
min_max_multipass_caller
(
const
DevMem2D
src
,
double
*
minval
,
double
*
maxval
,
PtrStep
buf
)
{
dim3
threads
,
grid
;
estimate_thread_cfg
(
threads
,
grid
);
...
...
@@ -624,8 +685,8 @@ namespace cv { namespace gpu { namespace mathfunc
T
*
minval_buf
=
(
T
*
)
buf
.
ptr
(
0
);
T
*
maxval_buf
=
(
T
*
)
buf
.
ptr
(
1
);
min_max_kernel
<
256
,
T
><<<
grid
,
threads
>>>
(
src
,
minval_buf
,
maxval_buf
);
min_max_
kernel_2ndstep
<
256
,
T
><<<
1
,
256
>>>
(
minval_buf
,
maxval_buf
,
grid
.
x
*
grid
.
y
);
min_max_kernel
<
256
,
T
,
MaskTrue
><<<
grid
,
threads
>>>
(
src
,
MaskTrue
()
,
minval_buf
,
maxval_buf
);
min_max_
pass2_kernel
<
256
,
T
><<<
1
,
256
>>>
(
minval_buf
,
maxval_buf
,
grid
.
x
*
grid
.
y
);
cudaSafeCall
(
cudaThreadSynchronize
());
T
minval_
,
maxval_
;
...
...
@@ -635,12 +696,12 @@ namespace cv { namespace gpu { namespace mathfunc
*
maxval
=
maxval_
;
}
template
void
min_max_
caller_2steps
<
unsigned
char
>(
const
DevMem2D
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_
caller_2steps
<
signed
char
>(
const
DevMem2D
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_
caller_2steps
<
unsigned
short
>(
const
DevMem2D
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_
caller_2steps
<
signed
short
>(
const
DevMem2D
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_
caller_2steps
<
int
>(
const
DevMem2D
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_
caller_2steps
<
float
>(
const
DevMem2D
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_
multipass_caller
<
unsigned
char
>(
const
DevMem2D
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_
multipass_caller
<
signed
char
>(
const
DevMem2D
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_
multipass_caller
<
unsigned
short
>(
const
DevMem2D
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_
multipass_caller
<
signed
short
>(
const
DevMem2D
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_
multipass_caller
<
int
>(
const
DevMem2D
,
double
*
,
double
*
,
PtrStep
);
template
void
min_max_
multipass_caller
<
float
>(
const
DevMem2D
,
double
*
,
double
*
,
PtrStep
);
}
// namespace minmax
...
...
@@ -861,7 +922,7 @@ namespace cv { namespace gpu { namespace mathfunc
// This kernel will be used only when compute capability is 1.0
template
<
int
nthreads
,
typename
T
>
__global__
void
min_max_loc_
kernel_2ndstep
(
T
*
minval
,
T
*
maxval
,
unsigned
int
*
minloc
,
unsigned
int
*
maxloc
,
int
size
)
__global__
void
min_max_loc_
pass2_kernel
(
T
*
minval
,
T
*
maxval
,
unsigned
int
*
minloc
,
unsigned
int
*
maxloc
,
int
size
)
{
typedef
typename
MinMaxTypeTraits
<
T
>::
best_type
best_type
;
__shared__
best_type
sminval
[
nthreads
];
...
...
@@ -892,7 +953,7 @@ namespace cv { namespace gpu { namespace mathfunc
template
<
typename
T
>
void
min_max_loc_
caller_2steps
(
const
DevMem2D
src
,
double
*
minval
,
double
*
maxval
,
void
min_max_loc_
multipass_caller
(
const
DevMem2D
src
,
double
*
minval
,
double
*
maxval
,
int
minloc
[
2
],
int
maxloc
[
2
],
PtrStep
valbuf
,
PtrStep
locbuf
)
{
dim3
threads
,
grid
;
...
...
@@ -905,7 +966,7 @@ namespace cv { namespace gpu { namespace mathfunc
unsigned
int
*
maxloc_buf
=
(
unsigned
int
*
)
locbuf
.
ptr
(
1
);
min_max_loc_kernel
<
256
,
T
><<<
grid
,
threads
>>>
(
src
,
minval_buf
,
maxval_buf
,
minloc_buf
,
maxloc_buf
);
min_max_loc_
kernel_2ndstep
<
256
,
T
><<<
1
,
256
>>>
(
minval_buf
,
maxval_buf
,
minloc_buf
,
maxloc_buf
,
grid
.
x
*
grid
.
y
);
min_max_loc_
pass2_kernel
<
256
,
T
><<<
1
,
256
>>>
(
minval_buf
,
maxval_buf
,
minloc_buf
,
maxloc_buf
,
grid
.
x
*
grid
.
y
);
cudaSafeCall
(
cudaThreadSynchronize
());
T
minval_
,
maxval_
;
...
...
@@ -921,12 +982,12 @@ namespace cv { namespace gpu { namespace mathfunc
maxloc
[
1
]
=
maxloc_
/
src
.
cols
;
maxloc
[
0
]
=
maxloc_
-
maxloc
[
1
]
*
src
.
cols
;
}
template
void
min_max_loc_
caller_2steps
<
unsigned
char
>(
const
DevMem2D
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
template
void
min_max_loc_
caller_2steps
<
signed
char
>(
const
DevMem2D
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
template
void
min_max_loc_
caller_2steps
<
unsigned
short
>(
const
DevMem2D
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
template
void
min_max_loc_
caller_2steps
<
signed
short
>(
const
DevMem2D
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
template
void
min_max_loc_
caller_2steps
<
int
>(
const
DevMem2D
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
template
void
min_max_loc_
caller_2steps
<
float
>(
const
DevMem2D
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
template
void
min_max_loc_
multipass_caller
<
unsigned
char
>(
const
DevMem2D
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
template
void
min_max_loc_
multipass_caller
<
signed
char
>(
const
DevMem2D
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
template
void
min_max_loc_
multipass_caller
<
unsigned
short
>(
const
DevMem2D
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
template
void
min_max_loc_
multipass_caller
<
signed
short
>(
const
DevMem2D
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
template
void
min_max_loc_
multipass_caller
<
int
>(
const
DevMem2D
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
template
void
min_max_loc_
multipass_caller
<
float
>(
const
DevMem2D
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
}
// namespace minmaxloc
...
...
@@ -1070,7 +1131,7 @@ namespace cv { namespace gpu { namespace mathfunc
template
<
int
nthreads
,
typename
T
>
__global__
void
count_non_zero_
kernel_2ndstep
(
unsigned
int
*
count
,
int
size
)
__global__
void
count_non_zero_
pass2_kernel
(
unsigned
int
*
count
,
int
size
)
{
__shared__
unsigned
int
scount
[
nthreads
];
unsigned
int
tid
=
threadIdx
.
y
*
blockDim
.
x
+
threadIdx
.
x
;
...
...
@@ -1087,7 +1148,7 @@ namespace cv { namespace gpu { namespace mathfunc
template
<
typename
T
>
int
count_non_zero_
caller_2steps
(
const
DevMem2D
src
,
PtrStep
buf
)
int
count_non_zero_
multipass_caller
(
const
DevMem2D
src
,
PtrStep
buf
)
{
dim3
threads
,
grid
;
estimate_thread_cfg
(
threads
,
grid
);
...
...
@@ -1096,7 +1157,7 @@ namespace cv { namespace gpu { namespace mathfunc
unsigned
int
*
count_buf
=
(
unsigned
int
*
)
buf
.
ptr
(
0
);
count_non_zero_kernel
<
256
,
T
><<<
grid
,
threads
>>>
(
src
,
count_buf
);
count_non_zero_
kernel_2ndstep
<
256
,
T
><<<
1
,
256
>>>
(
count_buf
,
grid
.
x
*
grid
.
y
);
count_non_zero_
pass2_kernel
<
256
,
T
><<<
1
,
256
>>>
(
count_buf
,
grid
.
x
*
grid
.
y
);
cudaSafeCall
(
cudaThreadSynchronize
());
unsigned
int
count
;
...
...
@@ -1105,12 +1166,12 @@ namespace cv { namespace gpu { namespace mathfunc
return
count
;
}
template
int
count_non_zero_
caller_2steps
<
unsigned
char
>(
const
DevMem2D
,
PtrStep
);
template
int
count_non_zero_
caller_2steps
<
signed
char
>(
const
DevMem2D
,
PtrStep
);
template
int
count_non_zero_
caller_2steps
<
unsigned
short
>(
const
DevMem2D
,
PtrStep
);
template
int
count_non_zero_
caller_2steps
<
signed
short
>(
const
DevMem2D
,
PtrStep
);
template
int
count_non_zero_
caller_2steps
<
int
>(
const
DevMem2D
,
PtrStep
);
template
int
count_non_zero_
caller_2steps
<
float
>(
const
DevMem2D
,
PtrStep
);
template
int
count_non_zero_
multipass_caller
<
unsigned
char
>(
const
DevMem2D
,
PtrStep
);
template
int
count_non_zero_
multipass_caller
<
signed
char
>(
const
DevMem2D
,
PtrStep
);
template
int
count_non_zero_
multipass_caller
<
unsigned
short
>(
const
DevMem2D
,
PtrStep
);
template
int
count_non_zero_
multipass_caller
<
signed
short
>(
const
DevMem2D
,
PtrStep
);
template
int
count_non_zero_
multipass_caller
<
int
>(
const
DevMem2D
,
PtrStep
);
template
int
count_non_zero_
multipass_caller
<
float
>(
const
DevMem2D
,
PtrStep
);
}
// namespace countnonzero
...
...
tests/gpu/src/arithm.cpp
浏览文件 @
437ac1a2
...
...
@@ -682,16 +682,16 @@ struct CV_GpuMinMaxTest: public CvTest
{
int
depth_end
;
if
(
cv
::
gpu
::
hasNativeDoubleSupport
(
cv
::
gpu
::
getDevice
()))
depth_end
=
CV_64F
;
else
depth_end
=
CV_32F
;
for
(
int
cn
=
1
;
cn
<=
4
;
++
cn
)
for
(
int
depth
=
CV_8U
;
depth
<=
depth_end
;
++
depth
)
for
(
int
depth
=
CV_8U
;
depth
<=
depth_end
;
++
depth
)
{
for
(
int
i
=
0
;
i
<
1
;
++
i
)
{
for
(
int
i
=
0
;
i
<
1
;
++
i
)
{
int
rows
=
1
+
rand
()
%
1000
;
int
cols
=
1
+
rand
()
%
1000
;
test
(
rows
,
cols
,
cn
,
depth
);
}
int
rows
=
1
+
rand
()
%
1000
;
int
cols
=
1
+
rand
()
%
1000
;
test
(
rows
,
cols
,
1
,
depth
);
test_masked
(
rows
,
cols
,
1
,
depth
);
}
}
}
void
test
(
int
rows
,
int
cols
,
int
cn
,
int
depth
)
...
...
@@ -707,10 +707,59 @@ struct CV_GpuMinMaxTest: public CvTest
double
minVal
,
maxVal
;
cv
::
Point
minLoc
,
maxLoc
;
if
(
depth
!=
CV_8S
)
{
cv
::
minMaxLoc
(
src
,
&
minVal
,
&
maxVal
,
&
minLoc
,
&
maxLoc
);
}
else
{
minVal
=
std
::
numeric_limits
<
double
>::
max
();
maxVal
=
std
::
numeric_limits
<
double
>::
min
();
for
(
int
i
=
0
;
i
<
src
.
rows
;
++
i
)
for
(
int
j
=
0
;
j
<
src
.
cols
;
++
j
)
{
signed
char
val
=
src
.
at
<
signed
char
>
(
i
,
j
);
if
(
val
<
minVal
)
minVal
=
val
;
if
(
val
>
maxVal
)
maxVal
=
val
;
}
}
double
minVal_
,
maxVal_
;
cv
::
Point
minLoc_
,
maxLoc_
;
cv
::
gpu
::
minMax
(
cv
::
gpu
::
GpuMat
(
src
),
&
minVal_
,
&
maxVal_
,
cv
::
gpu
::
GpuMat
(),
buf
);
if
(
abs
(
minVal
-
minVal_
)
>
1e-3
f
)
{
ts
->
printf
(
CvTS
::
CONSOLE
,
"
\n
fail: minVal=%f minVal_=%f rows=%d cols=%d depth=%d cn=%d
\n
"
,
minVal
,
minVal_
,
rows
,
cols
,
depth
,
cn
);
ts
->
set_failed_test_info
(
CvTS
::
FAIL_INVALID_OUTPUT
);
}
if
(
abs
(
maxVal
-
maxVal_
)
>
1e-3
f
)
{
ts
->
printf
(
CvTS
::
CONSOLE
,
"
\n
fail: maxVal=%f maxVal_=%f rows=%d cols=%d depth=%d cn=%d
\n
"
,
maxVal
,
maxVal_
,
rows
,
cols
,
depth
,
cn
);
ts
->
set_failed_test_info
(
CvTS
::
FAIL_INVALID_OUTPUT
);
}
}
void
test_masked
(
int
rows
,
int
cols
,
int
cn
,
int
depth
)
{
cv
::
Mat
src
(
rows
,
cols
,
CV_MAKE_TYPE
(
depth
,
cn
));
cv
::
RNG
rng
;
for
(
int
i
=
0
;
i
<
src
.
rows
;
++
i
)
{
Mat
row
(
1
,
src
.
cols
*
src
.
elemSize
(),
CV_8U
,
src
.
ptr
(
i
));
rng
.
fill
(
row
,
RNG
::
UNIFORM
,
Scalar
(
0
),
Scalar
(
255
));
}
cv
::
Mat
mask
(
src
.
size
(),
CV_8U
);
rng
.
fill
(
mask
,
RNG
::
UNIFORM
,
Scalar
(
0
),
Scalar
(
2
));
double
minVal
,
maxVal
;
cv
::
Point
minLoc
,
maxLoc
;
Mat
src_
=
src
.
reshape
(
1
);
if
(
depth
!=
CV_8S
)
{
cv
::
minMaxLoc
(
src_
,
&
minVal
,
&
maxVal
,
&
minLoc
,
&
maxLoc
);
cv
::
minMaxLoc
(
src_
,
&
minVal
,
&
maxVal
,
&
minLoc
,
&
maxLoc
,
mask
);
}
else
{
...
...
@@ -721,14 +770,14 @@ struct CV_GpuMinMaxTest: public CvTest
for
(
int
j
=
0
;
j
<
src_
.
cols
;
++
j
)
{
char
val
=
src_
.
at
<
char
>
(
i
,
j
);
if
(
val
<
minVal
)
minVal
=
val
;
if
(
val
>
maxVal
)
maxVal
=
val
;
if
(
mask
.
at
<
unsigned
char
>
(
i
,
j
))
{
if
(
val
<
minVal
)
minVal
=
val
;
}
if
(
mask
.
at
<
unsigned
char
>
(
i
,
j
))
{
if
(
val
>
maxVal
)
maxVal
=
val
;
}
}
}
double
minVal_
,
maxVal_
;
cv
::
Point
minLoc_
,
maxLoc_
;
cv
::
gpu
::
minMax
(
cv
::
gpu
::
GpuMat
(
src
),
&
minVal_
,
&
maxVal_
,
buf
);
cv
::
gpu
::
minMax
(
cv
::
gpu
::
GpuMat
(
src
),
&
minVal_
,
&
maxVal_
,
cv
::
gpu
::
GpuMat
(
mask
),
buf
);
if
(
abs
(
minVal
-
minVal_
)
>
1e-3
f
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录