Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
39da17a0
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
39da17a0
编写于
11月 26, 2012
作者:
M
marina.kolpakova
浏览文件
操作
浏览文件
下载
差异文件
Merge pull requst #177 from cuda-geek/another-one-integral-fix
上级
23011ffd
a22edb03
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
39 addition
and
55 deletion
+39
-55
modules/gpu/include/opencv2/gpu/device/utility.hpp
modules/gpu/include/opencv2/gpu/device/utility.hpp
+1
-1
modules/gpu/src/cuda/integral_image.cu
modules/gpu/src/cuda/integral_image.cu
+4
-3
modules/gpu/src/cuda/lbp.cu
modules/gpu/src/cuda/lbp.cu
+1
-0
modules/gpu/src/cuda/surf.cu
modules/gpu/src/cuda/surf.cu
+16
-16
modules/gpu/src/imgproc.cpp
modules/gpu/src/imgproc.cpp
+12
-30
modules/gpu/src/surf.cpp
modules/gpu/src/surf.cpp
+5
-5
未找到文件。
modules/gpu/include/opencv2/gpu/device/utility.hpp
浏览文件 @
39da17a0
...
@@ -150,7 +150,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -150,7 +150,7 @@ namespace cv { namespace gpu { namespace device
return
true
;
return
true
;
}
}
static
__device__
__forceinline__
bool
check
(
int
,
int
,
int
,
uint
offset
=
0
)
static
__device__
__forceinline__
bool
check
(
int
,
int
,
int
)
{
{
return
true
;
return
true
;
}
}
...
...
modules/gpu/src/cuda/integral_image.cu
浏览文件 @
39da17a0
...
@@ -357,18 +357,19 @@ namespace cv { namespace gpu { namespace device
...
@@ -357,18 +357,19 @@ namespace cv { namespace gpu { namespace device
#endif
#endif
}
}
void
shfl_integral_gpu
(
PtrStepSzb
img
,
PtrStepSz
<
unsigned
int
>
integral
,
cudaStream_t
stream
)
void
shfl_integral_gpu
(
const
PtrStepSzb
&
img
,
PtrStepSz
<
unsigned
int
>
integral
,
cudaStream_t
stream
)
{
{
{
{
// each thread handles 16 values, use 1 block/row
// each thread handles 16 values, use 1 block/row
const
int
block
=
img
.
cols
/
16
;
// save, becouse step is actually can't be less 512 bytes
int
block
=
integral
.
cols
/
16
;
// launch 1 block / row
// launch 1 block / row
const
int
grid
=
img
.
rows
;
const
int
grid
=
img
.
rows
;
cudaSafeCall
(
cudaFuncSetCacheConfig
(
shfl_integral_horizontal
,
cudaFuncCachePreferL1
)
);
cudaSafeCall
(
cudaFuncSetCacheConfig
(
shfl_integral_horizontal
,
cudaFuncCachePreferL1
)
);
shfl_integral_horizontal
<<<
grid
,
block
,
0
,
stream
>>>
((
PtrStepSz
<
uint4
>
)
img
,
(
PtrStepSz
<
uint4
>
)
integral
);
shfl_integral_horizontal
<<<
grid
,
block
,
0
,
stream
>>>
((
const
PtrStepSz
<
uint4
>
)
img
,
(
PtrStepSz
<
uint4
>
)
integral
);
cudaSafeCall
(
cudaGetLastError
()
);
cudaSafeCall
(
cudaGetLastError
()
);
}
}
...
...
modules/gpu/src/cuda/lbp.cu
浏览文件 @
39da17a0
...
@@ -185,6 +185,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -185,6 +185,7 @@ namespace cv { namespace gpu { namespace device
void
connectedConmonents
(
PtrStepSz
<
int4
>
candidates
,
int
ncandidates
,
PtrStepSz
<
int4
>
objects
,
int
groupThreshold
,
float
grouping_eps
,
unsigned
int
*
nclasses
)
void
connectedConmonents
(
PtrStepSz
<
int4
>
candidates
,
int
ncandidates
,
PtrStepSz
<
int4
>
objects
,
int
groupThreshold
,
float
grouping_eps
,
unsigned
int
*
nclasses
)
{
{
if
(
!
ncandidates
)
return
;
int
block
=
ncandidates
;
int
block
=
ncandidates
;
int
smem
=
block
*
(
sizeof
(
int
)
+
sizeof
(
int4
)
);
int
smem
=
block
*
(
sizeof
(
int
)
+
sizeof
(
int4
)
);
disjoin
<
InSameComponint
><<<
1
,
block
,
smem
>>>
(
candidates
,
objects
,
ncandidates
,
groupThreshold
,
grouping_eps
,
nclasses
);
disjoin
<
InSameComponint
><<<
1
,
block
,
smem
>>>
(
candidates
,
objects
,
ncandidates
,
groupThreshold
,
grouping_eps
,
nclasses
);
...
...
modules/gpu/src/cuda/surf.cu
浏览文件 @
39da17a0
...
@@ -177,7 +177,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -177,7 +177,7 @@ namespace cv { namespace gpu { namespace device
return
(
HAAR_SIZE0
+
HAAR_SIZE_INC
*
layer
)
<<
octave
;
return
(
HAAR_SIZE0
+
HAAR_SIZE_INC
*
layer
)
<<
octave
;
}
}
__global__
void
icvCalcLayerDetAndTrace
(
PtrStepf
det
,
PtrStepf
trace
,
uint
sumOffset
)
__global__
void
icvCalcLayerDetAndTrace
(
PtrStepf
det
,
PtrStepf
trace
)
{
{
// Determine the indices
// Determine the indices
const
int
gridDim_y
=
gridDim
.
y
/
(
c_nOctaveLayers
+
2
);
const
int
gridDim_y
=
gridDim
.
y
/
(
c_nOctaveLayers
+
2
);
...
@@ -198,9 +198,9 @@ namespace cv { namespace gpu { namespace device
...
@@ -198,9 +198,9 @@ namespace cv { namespace gpu { namespace device
if
(
size
<=
c_img_rows
&&
size
<=
c_img_cols
&&
i
<
samples_i
&&
j
<
samples_j
)
if
(
size
<=
c_img_rows
&&
size
<=
c_img_cols
&&
i
<
samples_i
&&
j
<
samples_j
)
{
{
const
float
dx
=
icvCalcHaarPatternSum
<
3
>
(
c_DX
,
9
,
size
,
(
i
<<
c_octave
),
sumOffset
+
(
j
<<
c_octave
));
const
float
dx
=
icvCalcHaarPatternSum
<
3
>
(
c_DX
,
9
,
size
,
(
i
<<
c_octave
),
(
j
<<
c_octave
));
const
float
dy
=
icvCalcHaarPatternSum
<
3
>
(
c_DY
,
9
,
size
,
(
i
<<
c_octave
),
sumOffset
+
(
j
<<
c_octave
));
const
float
dy
=
icvCalcHaarPatternSum
<
3
>
(
c_DY
,
9
,
size
,
(
i
<<
c_octave
),
(
j
<<
c_octave
));
const
float
dxy
=
icvCalcHaarPatternSum
<
4
>
(
c_DXY
,
9
,
size
,
(
i
<<
c_octave
),
sumOffset
+
(
j
<<
c_octave
));
const
float
dxy
=
icvCalcHaarPatternSum
<
4
>
(
c_DXY
,
9
,
size
,
(
i
<<
c_octave
),
(
j
<<
c_octave
));
det
.
ptr
(
layer
*
c_layer_rows
+
i
+
margin
)[
j
+
margin
]
=
dx
*
dy
-
0.81
f
*
dxy
*
dxy
;
det
.
ptr
(
layer
*
c_layer_rows
+
i
+
margin
)[
j
+
margin
]
=
dx
*
dy
-
0.81
f
*
dxy
*
dxy
;
trace
.
ptr
(
layer
*
c_layer_rows
+
i
+
margin
)[
j
+
margin
]
=
dx
+
dy
;
trace
.
ptr
(
layer
*
c_layer_rows
+
i
+
margin
)[
j
+
margin
]
=
dx
+
dy
;
...
@@ -208,7 +208,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -208,7 +208,7 @@ namespace cv { namespace gpu { namespace device
}
}
void
icvCalcLayerDetAndTrace_gpu
(
const
PtrStepf
&
det
,
const
PtrStepf
&
trace
,
int
img_rows
,
int
img_cols
,
void
icvCalcLayerDetAndTrace_gpu
(
const
PtrStepf
&
det
,
const
PtrStepf
&
trace
,
int
img_rows
,
int
img_cols
,
int
octave
,
int
nOctaveLayers
,
const
size_t
sumOffset
)
int
octave
,
int
nOctaveLayers
)
{
{
const
int
min_size
=
calcSize
(
octave
,
0
);
const
int
min_size
=
calcSize
(
octave
,
0
);
const
int
max_samples_i
=
1
+
((
img_rows
-
min_size
)
>>
octave
);
const
int
max_samples_i
=
1
+
((
img_rows
-
min_size
)
>>
octave
);
...
@@ -220,7 +220,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -220,7 +220,7 @@ namespace cv { namespace gpu { namespace device
grid
.
x
=
divUp
(
max_samples_j
,
threads
.
x
);
grid
.
x
=
divUp
(
max_samples_j
,
threads
.
x
);
grid
.
y
=
divUp
(
max_samples_i
,
threads
.
y
)
*
(
nOctaveLayers
+
2
);
grid
.
y
=
divUp
(
max_samples_i
,
threads
.
y
)
*
(
nOctaveLayers
+
2
);
icvCalcLayerDetAndTrace
<<<
grid
,
threads
>>>
(
det
,
trace
,
(
uint
)
sumOffset
);
icvCalcLayerDetAndTrace
<<<
grid
,
threads
>>>
(
det
,
trace
);
cudaSafeCall
(
cudaGetLastError
()
);
cudaSafeCall
(
cudaGetLastError
()
);
cudaSafeCall
(
cudaDeviceSynchronize
()
);
cudaSafeCall
(
cudaDeviceSynchronize
()
);
...
@@ -233,7 +233,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -233,7 +233,7 @@ namespace cv { namespace gpu { namespace device
struct
WithMask
struct
WithMask
{
{
static
__device__
bool
check
(
int
sum_i
,
int
sum_j
,
int
size
,
const
uint
offset
)
static
__device__
bool
check
(
int
sum_i
,
int
sum_j
,
int
size
)
{
{
float
ratio
=
(
float
)
size
/
9.0
f
;
float
ratio
=
(
float
)
size
/
9.0
f
;
...
@@ -245,10 +245,10 @@ namespace cv { namespace gpu { namespace device
...
@@ -245,10 +245,10 @@ namespace cv { namespace gpu { namespace device
int
dy2
=
__float2int_rn
(
ratio
*
c_DM
[
3
]);
int
dy2
=
__float2int_rn
(
ratio
*
c_DM
[
3
]);
float
t
=
0
;
float
t
=
0
;
t
+=
tex2D
(
maskSumTex
,
offset
+
sum_j
+
dx1
,
sum_i
+
dy1
);
t
+=
tex2D
(
maskSumTex
,
sum_j
+
dx1
,
sum_i
+
dy1
);
t
-=
tex2D
(
maskSumTex
,
offset
+
sum_j
+
dx1
,
sum_i
+
dy2
);
t
-=
tex2D
(
maskSumTex
,
sum_j
+
dx1
,
sum_i
+
dy2
);
t
-=
tex2D
(
maskSumTex
,
offset
+
sum_j
+
dx2
,
sum_i
+
dy1
);
t
-=
tex2D
(
maskSumTex
,
sum_j
+
dx2
,
sum_i
+
dy1
);
t
+=
tex2D
(
maskSumTex
,
offset
+
sum_j
+
dx2
,
sum_i
+
dy2
);
t
+=
tex2D
(
maskSumTex
,
sum_j
+
dx2
,
sum_i
+
dy2
);
d
+=
t
*
c_DM
[
4
]
/
((
dx2
-
dx1
)
*
(
dy2
-
dy1
));
d
+=
t
*
c_DM
[
4
]
/
((
dx2
-
dx1
)
*
(
dy2
-
dy1
));
...
@@ -258,7 +258,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -258,7 +258,7 @@ namespace cv { namespace gpu { namespace device
template
<
typename
Mask
>
template
<
typename
Mask
>
__global__
void
icvFindMaximaInLayer
(
const
PtrStepf
det
,
const
PtrStepf
trace
,
int4
*
maxPosBuffer
,
__global__
void
icvFindMaximaInLayer
(
const
PtrStepf
det
,
const
PtrStepf
trace
,
int4
*
maxPosBuffer
,
unsigned
int
*
maxCounter
,
const
uint
maskOffset
)
unsigned
int
*
maxCounter
)
{
{
#if __CUDA_ARCH__ && __CUDA_ARCH__ >= 110
#if __CUDA_ARCH__ && __CUDA_ARCH__ >= 110
...
@@ -299,7 +299,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -299,7 +299,7 @@ namespace cv { namespace gpu { namespace device
const
int
sum_i
=
(
i
-
((
size
>>
1
)
>>
c_octave
))
<<
c_octave
;
const
int
sum_i
=
(
i
-
((
size
>>
1
)
>>
c_octave
))
<<
c_octave
;
const
int
sum_j
=
(
j
-
((
size
>>
1
)
>>
c_octave
))
<<
c_octave
;
const
int
sum_j
=
(
j
-
((
size
>>
1
)
>>
c_octave
))
<<
c_octave
;
if
(
Mask
::
check
(
sum_i
,
sum_j
,
size
,
maskOffset
))
if
(
Mask
::
check
(
sum_i
,
sum_j
,
size
))
{
{
// Check to see if we have a max (in its 26 neighbours)
// Check to see if we have a max (in its 26 neighbours)
const
bool
condmax
=
val0
>
N9
[
localLin
-
1
-
blockDim
.
x
-
zoff
]
const
bool
condmax
=
val0
>
N9
[
localLin
-
1
-
blockDim
.
x
-
zoff
]
...
@@ -351,7 +351,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -351,7 +351,7 @@ namespace cv { namespace gpu { namespace device
}
}
void
icvFindMaximaInLayer_gpu
(
const
PtrStepf
&
det
,
const
PtrStepf
&
trace
,
int4
*
maxPosBuffer
,
unsigned
int
*
maxCounter
,
void
icvFindMaximaInLayer_gpu
(
const
PtrStepf
&
det
,
const
PtrStepf
&
trace
,
int4
*
maxPosBuffer
,
unsigned
int
*
maxCounter
,
int
img_rows
,
int
img_cols
,
int
octave
,
bool
use_mask
,
int
nOctaveLayers
,
const
size_t
maskOffset
)
int
img_rows
,
int
img_cols
,
int
octave
,
bool
use_mask
,
int
nOctaveLayers
)
{
{
const
int
layer_rows
=
img_rows
>>
octave
;
const
int
layer_rows
=
img_rows
>>
octave
;
const
int
layer_cols
=
img_cols
>>
octave
;
const
int
layer_cols
=
img_cols
>>
octave
;
...
@@ -367,9 +367,9 @@ namespace cv { namespace gpu { namespace device
...
@@ -367,9 +367,9 @@ namespace cv { namespace gpu { namespace device
const
size_t
smem_size
=
threads
.
x
*
threads
.
y
*
3
*
sizeof
(
float
);
const
size_t
smem_size
=
threads
.
x
*
threads
.
y
*
3
*
sizeof
(
float
);
if
(
use_mask
)
if
(
use_mask
)
icvFindMaximaInLayer
<
WithMask
><<<
grid
,
threads
,
smem_size
>>>
(
det
,
trace
,
maxPosBuffer
,
maxCounter
,
(
uint
)
maskOffset
);
icvFindMaximaInLayer
<
WithMask
><<<
grid
,
threads
,
smem_size
>>>
(
det
,
trace
,
maxPosBuffer
,
maxCounter
);
else
else
icvFindMaximaInLayer
<
WithOutMask
><<<
grid
,
threads
,
smem_size
>>>
(
det
,
trace
,
maxPosBuffer
,
maxCounter
,
0
);
icvFindMaximaInLayer
<
WithOutMask
><<<
grid
,
threads
,
smem_size
>>>
(
det
,
trace
,
maxPosBuffer
,
maxCounter
);
cudaSafeCall
(
cudaGetLastError
()
);
cudaSafeCall
(
cudaGetLastError
()
);
...
...
modules/gpu/src/imgproc.cpp
浏览文件 @
39da17a0
...
@@ -537,7 +537,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -537,7 +537,7 @@ namespace cv { namespace gpu { namespace device
{
{
namespace
imgproc
namespace
imgproc
{
{
void
shfl_integral_gpu
(
PtrStepSzb
img
,
PtrStepSz
<
unsigned
int
>
integral
,
cudaStream_t
stream
);
void
shfl_integral_gpu
(
const
PtrStepSzb
&
img
,
PtrStepSz
<
unsigned
int
>
integral
,
cudaStream_t
stream
);
}
}
}}}
}}}
...
@@ -553,44 +553,26 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S
...
@@ -553,44 +553,26 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S
src
.
locateROI
(
whole
,
offset
);
src
.
locateROI
(
whole
,
offset
);
if
(
info
.
supports
(
WARP_SHUFFLE_FUNCTIONS
)
&&
src
.
cols
<=
2048
)
if
(
info
.
supports
(
WARP_SHUFFLE_FUNCTIONS
)
&&
src
.
cols
<=
2048
&&
offset
.
x
%
16
==
0
&&
((
src
.
cols
+
63
)
/
64
)
*
64
<=
(
src
.
step
-
offset
.
x
))
{
{
GpuMat
srcAlligned
;
ensureSizeIsEnough
(((
src
.
rows
+
7
)
/
8
)
*
8
,
((
src
.
cols
+
63
)
/
64
)
*
64
,
CV_32SC1
,
buffer
)
;
if
(
src
.
cols
%
16
==
0
&&
src
.
rows
%
8
==
0
&&
offset
.
x
%
16
==
0
&&
offset
.
y
%
8
==
0
)
cv
::
gpu
::
device
::
imgproc
::
shfl_integral_gpu
(
src
,
buffer
,
stream
);
srcAlligned
=
src
;
else
{
ensureSizeIsEnough
(((
src
.
rows
+
7
)
/
8
)
*
8
,
((
src
.
cols
+
15
)
/
16
)
*
16
,
src
.
type
(),
buffer
);
GpuMat
inner
=
buffer
(
Rect
(
0
,
0
,
src
.
cols
,
src
.
rows
));
if
(
s
)
{
s
.
enqueueMemSet
(
buffer
,
Scalar
::
all
(
0
));
s
.
enqueueCopy
(
src
,
inner
);
}
else
{
buffer
.
setTo
(
Scalar
::
all
(
0
));
src
.
copyTo
(
inner
);
}
srcAlligned
=
buffer
;
}
sum
.
create
(
srcAlligned
.
rows
+
1
,
srcAlligned
.
cols
+
4
,
CV_32SC1
);
sum
.
create
(
src
.
rows
+
1
,
src
.
cols
+
1
,
CV_32SC1
);
if
(
s
)
if
(
s
)
s
.
enqueueMemSet
(
sum
,
Scalar
::
all
(
0
));
s
.
enqueueMemSet
(
sum
,
Scalar
::
all
(
0
));
else
else
sum
.
setTo
(
Scalar
::
all
(
0
));
sum
.
setTo
(
Scalar
::
all
(
0
));
GpuMat
inner
=
sum
(
Rect
(
4
,
1
,
srcAlligned
.
cols
,
srcAlligned
.
rows
));
GpuMat
inner
=
sum
(
Rect
(
1
,
1
,
src
.
cols
,
src
.
rows
));
GpuMat
res
=
buffer
(
Rect
(
0
,
0
,
src
.
cols
,
src
.
rows
));
cv
::
gpu
::
device
::
imgproc
::
shfl_integral_gpu
(
srcAlligned
,
inner
,
stream
);
sum
=
sum
(
Rect
(
3
,
0
,
src
.
cols
+
1
,
src
.
rows
+
1
));
if
(
s
)
s
.
enqueueCopy
(
res
,
inner
);
else
res
.
copyTo
(
inner
);
}
}
else
else
{
{
...
...
modules/gpu/src/surf.cpp
浏览文件 @
39da17a0
...
@@ -75,10 +75,10 @@ namespace cv { namespace gpu { namespace device
...
@@ -75,10 +75,10 @@ namespace cv { namespace gpu { namespace device
size_t
bindMaskSumTex
(
PtrStepSz
<
unsigned
int
>
maskSum
);
size_t
bindMaskSumTex
(
PtrStepSz
<
unsigned
int
>
maskSum
);
void
icvCalcLayerDetAndTrace_gpu
(
const
PtrStepf
&
det
,
const
PtrStepf
&
trace
,
int
img_rows
,
int
img_cols
,
void
icvCalcLayerDetAndTrace_gpu
(
const
PtrStepf
&
det
,
const
PtrStepf
&
trace
,
int
img_rows
,
int
img_cols
,
int
octave
,
int
nOctaveLayer
s
,
const
size_t
sumOffset
);
int
octave
,
int
nOctaveLayer
);
void
icvFindMaximaInLayer_gpu
(
const
PtrStepf
&
det
,
const
PtrStepf
&
trace
,
int4
*
maxPosBuffer
,
unsigned
int
*
maxCounter
,
void
icvFindMaximaInLayer_gpu
(
const
PtrStepf
&
det
,
const
PtrStepf
&
trace
,
int4
*
maxPosBuffer
,
unsigned
int
*
maxCounter
,
int
img_rows
,
int
img_cols
,
int
octave
,
bool
use_mask
,
int
nLayers
,
const
size_t
maskOffset
);
int
img_rows
,
int
img_cols
,
int
octave
,
bool
use_mask
,
int
nLayers
);
void
icvInterpolateKeypoint_gpu
(
const
PtrStepf
&
det
,
const
int4
*
maxPosBuffer
,
unsigned
int
maxCounter
,
void
icvInterpolateKeypoint_gpu
(
const
PtrStepf
&
det
,
const
int4
*
maxPosBuffer
,
unsigned
int
maxCounter
,
float
*
featureX
,
float
*
featureY
,
int
*
featureLaplacian
,
int
*
featureOctave
,
float
*
featureSize
,
float
*
featureHessian
,
float
*
featureX
,
float
*
featureY
,
int
*
featureLaplacian
,
int
*
featureOctave
,
float
*
featureSize
,
float
*
featureHessian
,
...
@@ -146,8 +146,8 @@ namespace
...
@@ -146,8 +146,8 @@ namespace
loadGlobalConstants
(
maxCandidates
,
maxFeatures
,
img_rows
,
img_cols
,
surf_
.
nOctaveLayers
,
static_cast
<
float
>
(
surf_
.
hessianThreshold
));
loadGlobalConstants
(
maxCandidates
,
maxFeatures
,
img_rows
,
img_cols
,
surf_
.
nOctaveLayers
,
static_cast
<
float
>
(
surf_
.
hessianThreshold
));
bindImgTex
(
img
);
bindImgTex
(
img
);
integralBuffered
(
img
,
surf_
.
sum
,
surf_
.
intBuffer
);
integralBuffered
(
img
,
surf_
.
sum
,
surf_
.
intBuffer
);
sumOffset
=
bindSumTex
(
surf_
.
sum
);
sumOffset
=
bindSumTex
(
surf_
.
sum
);
if
(
use_mask
)
if
(
use_mask
)
...
@@ -174,10 +174,10 @@ namespace
...
@@ -174,10 +174,10 @@ namespace
loadOctaveConstants
(
octave
,
layer_rows
,
layer_cols
);
loadOctaveConstants
(
octave
,
layer_rows
,
layer_cols
);
icvCalcLayerDetAndTrace_gpu
(
surf_
.
det
,
surf_
.
trace
,
img_rows
,
img_cols
,
octave
,
surf_
.
nOctaveLayers
,
sumOffset
);
icvCalcLayerDetAndTrace_gpu
(
surf_
.
det
,
surf_
.
trace
,
img_rows
,
img_cols
,
octave
,
surf_
.
nOctaveLayers
);
icvFindMaximaInLayer_gpu
(
surf_
.
det
,
surf_
.
trace
,
surf_
.
maxPosBuffer
.
ptr
<
int4
>
(),
counters
.
ptr
<
unsigned
int
>
()
+
1
+
octave
,
icvFindMaximaInLayer_gpu
(
surf_
.
det
,
surf_
.
trace
,
surf_
.
maxPosBuffer
.
ptr
<
int4
>
(),
counters
.
ptr
<
unsigned
int
>
()
+
1
+
octave
,
img_rows
,
img_cols
,
octave
,
use_mask
,
surf_
.
nOctaveLayers
,
maskOffset
);
img_rows
,
img_cols
,
octave
,
use_mask
,
surf_
.
nOctaveLayers
);
unsigned
int
maxCounter
;
unsigned
int
maxCounter
;
cudaSafeCall
(
cudaMemcpy
(
&
maxCounter
,
counters
.
ptr
<
unsigned
int
>
()
+
1
+
octave
,
sizeof
(
unsigned
int
),
cudaMemcpyDeviceToHost
)
);
cudaSafeCall
(
cudaMemcpy
(
&
maxCounter
,
counters
.
ptr
<
unsigned
int
>
()
+
1
+
octave
,
sizeof
(
unsigned
int
),
cudaMemcpyDeviceToHost
)
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录