Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
a21ede94
O
Opencv
项目概览
Greenplum
/
Opencv
大约 1 年 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
a21ede94
编写于
3月 10, 2016
作者:
D
Dan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Thrust allocator usage.
上级
7a934f9e
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
40 addition
and
25 deletion
+40
-25
modules/cudafeatures2d/src/cuda/orb.cu
modules/cudafeatures2d/src/cuda/orb.cu
+12
-1
modules/cudaimgproc/src/cuda/gftt.cu
modules/cudaimgproc/src/cuda/gftt.cu
+18
-11
modules/cudaimgproc/src/gftt.cpp
modules/cudaimgproc/src/gftt.cpp
+10
-13
未找到文件。
modules/cudafeatures2d/src/cuda/orb.cu
浏览文件 @
a21ede94
...
...
@@ -51,7 +51,7 @@
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/utility.hpp"
namespace
cv
{
namespace
cuda
{
namespace
device
{
namespace
orb
...
...
@@ -64,6 +64,16 @@ namespace cv { namespace cuda { namespace device
thrust
::
device_ptr
<
int
>
loc_ptr
(
loc
);
thrust
::
device_ptr
<
float
>
response_ptr
(
response
);
#if THRUST_VERSION >= 100800
#if THRUST_VERSION >= 100802
if
(
stream
)
{
thrust
::
sort_by_key
(
thrust
::
cuda
::
par
(
ThrustAllocator
::
getAllocator
()).
on
(
stream
),
response_ptr
,
response_ptr
+
size
,
loc_ptr
,
thrust
::
greater
<
float
>
());
}
else
{
thrust
::
sort_by_key
(
thrust
::
cuda
::
par
(
ThrustAllocator
::
getAllocator
()),
response_ptr
,
response_ptr
+
size
,
loc_ptr
,
thrust
::
greater
<
float
>
());
}
#else
if
(
stream
)
{
thrust
::
sort_by_key
(
thrust
::
cuda
::
par
.
on
(
stream
),
response_ptr
,
response_ptr
+
size
,
loc_ptr
,
thrust
::
greater
<
float
>
());
...
...
@@ -71,6 +81,7 @@ namespace cv { namespace cuda { namespace device
{
thrust
::
sort_by_key
(
response_ptr
,
response_ptr
+
size
,
loc_ptr
,
thrust
::
greater
<
float
>
());
}
#endif
#else
thrust
::
sort_by_key
(
response_ptr
,
response_ptr
+
size
,
loc_ptr
,
thrust
::
greater
<
float
>
());
#endif
...
...
modules/cudaimgproc/src/cuda/gftt.cu
浏览文件 @
a21ede94
...
...
@@ -47,7 +47,7 @@
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/utility.hpp"
#include <thrust/execution_policy.h>
namespace
cv
{
namespace
cuda
{
namespace
device
{
namespace
gfft
...
...
@@ -91,12 +91,12 @@ namespace cv { namespace cuda { namespace device
}
}
int
findCorners_gpu
(
PtrStepSzf
eig
,
float
threshold
,
PtrStepSzb
mask
,
float2
*
corners
,
int
max_count
)
int
findCorners_gpu
(
PtrStepSzf
eig
,
float
threshold
,
PtrStepSzb
mask
,
float2
*
corners
,
int
max_count
,
cudaStream_t
stream
)
{
void
*
counter_ptr
;
cudaSafeCall
(
cudaGetSymbolAddress
(
&
counter_ptr
,
g_counter
)
);
cudaSafeCall
(
cudaMemset
(
counter_ptr
,
0
,
sizeof
(
int
)
)
);
cudaSafeCall
(
cudaMemset
Async
(
counter_ptr
,
0
,
sizeof
(
int
),
stream
)
);
bindTexture
(
&
eigTex
,
eig
);
...
...
@@ -104,17 +104,18 @@ namespace cv { namespace cuda { namespace device
dim3
grid
(
divUp
(
eig
.
cols
,
block
.
x
),
divUp
(
eig
.
rows
,
block
.
y
));
if
(
mask
.
data
)
findCorners
<<<
grid
,
block
>>>
(
threshold
,
SingleMask
(
mask
),
corners
,
max_count
,
eig
.
rows
,
eig
.
cols
);
findCorners
<<<
grid
,
block
,
0
,
stream
>>>
(
threshold
,
SingleMask
(
mask
),
corners
,
max_count
,
eig
.
rows
,
eig
.
cols
);
else
findCorners
<<<
grid
,
block
>>>
(
threshold
,
WithOutMask
(),
corners
,
max_count
,
eig
.
rows
,
eig
.
cols
);
findCorners
<<<
grid
,
block
,
0
,
stream
>>>
(
threshold
,
WithOutMask
(),
corners
,
max_count
,
eig
.
rows
,
eig
.
cols
);
cudaSafeCall
(
cudaGetLastError
()
);
cudaSafeCall
(
cudaDeviceSynchronize
()
);
int
count
;
cudaSafeCall
(
cudaMemcpy
(
&
count
,
counter_ptr
,
sizeof
(
int
),
cudaMemcpyDeviceToHost
)
);
cudaSafeCall
(
cudaMemcpyAsync
(
&
count
,
counter_ptr
,
sizeof
(
int
),
cudaMemcpyDeviceToHost
,
stream
)
);
if
(
stream
)
cudaSafeCall
(
cudaStreamSynchronize
(
stream
));
else
cudaSafeCall
(
cudaDeviceSynchronize
()
);
return
std
::
min
(
count
,
max_count
);
}
...
...
@@ -128,13 +129,19 @@ namespace cv { namespace cuda { namespace device
};
void
sortCorners_gpu
(
PtrStepSzf
eig
,
float2
*
corners
,
int
count
)
void
sortCorners_gpu
(
PtrStepSzf
eig
,
float2
*
corners
,
int
count
,
cudaStream_t
stream
)
{
bindTexture
(
&
eigTex
,
eig
);
thrust
::
device_ptr
<
float2
>
ptr
(
corners
);
#if THRUST_VERSION >= 100802
if
(
stream
)
thrust
::
sort
(
thrust
::
cuda
::
par
(
ThrustAllocator
::
getAllocator
()).
on
(
stream
),
ptr
,
ptr
+
count
,
EigGreater
());
else
thrust
::
sort
(
thrust
::
cuda
::
par
(
ThrustAllocator
::
getAllocator
()),
ptr
,
ptr
+
count
,
EigGreater
());
#else
thrust
::
sort
(
ptr
,
ptr
+
count
,
EigGreater
());
#endif
}
}
// namespace optical_flow
}}}
...
...
modules/cudaimgproc/src/gftt.cpp
浏览文件 @
a21ede94
...
...
@@ -55,8 +55,8 @@ namespace cv { namespace cuda { namespace device
{
namespace
gfft
{
int
findCorners_gpu
(
PtrStepSzf
eig
,
float
threshold
,
PtrStepSzb
mask
,
float2
*
corners
,
int
max_count
);
void
sortCorners_gpu
(
PtrStepSzf
eig
,
float2
*
corners
,
int
count
);
int
findCorners_gpu
(
PtrStepSzf
eig
,
float
threshold
,
PtrStepSzb
mask
,
float2
*
corners
,
int
max_count
,
cudaStream_t
stream
);
void
sortCorners_gpu
(
PtrStepSzf
eig
,
float2
*
corners
,
int
count
,
cudaStream_t
stream
);
}
}}}
...
...
@@ -97,9 +97,6 @@ namespace
void
GoodFeaturesToTrackDetector
::
detect
(
InputArray
_image
,
OutputArray
_corners
,
InputArray
_mask
,
Stream
&
stream
)
{
// TODO : implement async version
(
void
)
stream
;
using
namespace
cv
::
cuda
::
device
::
gfft
;
GpuMat
image
=
_image
.
getGpuMat
();
...
...
@@ -108,14 +105,14 @@ namespace
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8UC1
&&
mask
.
size
()
==
image
.
size
())
);
ensureSizeIsEnough
(
image
.
size
(),
CV_32FC1
,
eig_
);
cornerCriteria_
->
compute
(
image
,
eig_
);
cornerCriteria_
->
compute
(
image
,
eig_
,
stream
);
double
maxVal
=
0
;
cuda
::
minMax
(
eig_
,
0
,
&
maxVal
);
cudaStream_t
stream_
=
StreamAccessor
::
getStream
(
stream
);
ensureSizeIsEnough
(
1
,
std
::
max
(
1000
,
static_cast
<
int
>
(
image
.
size
().
area
()
*
0.05
)),
CV_32FC2
,
tmpCorners_
);
int
total
=
findCorners_gpu
(
eig_
,
static_cast
<
float
>
(
maxVal
*
qualityLevel_
),
mask
,
tmpCorners_
.
ptr
<
float2
>
(),
tmpCorners_
.
cols
);
int
total
=
findCorners_gpu
(
eig_
,
static_cast
<
float
>
(
maxVal
*
qualityLevel_
),
mask
,
tmpCorners_
.
ptr
<
float2
>
(),
tmpCorners_
.
cols
,
stream_
);
if
(
total
==
0
)
{
...
...
@@ -123,18 +120,18 @@ namespace
return
;
}
sortCorners_gpu
(
eig_
,
tmpCorners_
.
ptr
<
float2
>
(),
total
);
sortCorners_gpu
(
eig_
,
tmpCorners_
.
ptr
<
float2
>
(),
total
,
stream_
);
if
(
minDistance_
<
1
)
{
tmpCorners_
.
colRange
(
0
,
maxCorners_
>
0
?
std
::
min
(
maxCorners_
,
total
)
:
total
).
copyTo
(
_corners
);
tmpCorners_
.
colRange
(
0
,
maxCorners_
>
0
?
std
::
min
(
maxCorners_
,
total
)
:
total
).
copyTo
(
_corners
,
stream
);
}
else
{
std
::
vector
<
Point2f
>
tmp
(
total
);
Mat
tmpMat
(
1
,
total
,
CV_32FC2
,
(
void
*
)
&
tmp
[
0
]);
tmpCorners_
.
colRange
(
0
,
total
).
download
(
tmpMat
);
tmpCorners_
.
colRange
(
0
,
total
).
download
(
tmpMat
,
stream
);
stream
.
waitForCompletion
();
std
::
vector
<
Point2f
>
tmp2
;
tmp2
.
reserve
(
total
);
...
...
@@ -203,7 +200,7 @@ namespace
_corners
.
create
(
1
,
static_cast
<
int
>
(
tmp2
.
size
()),
CV_32FC2
);
GpuMat
corners
=
_corners
.
getGpuMat
();
corners
.
upload
(
Mat
(
1
,
static_cast
<
int
>
(
tmp2
.
size
()),
CV_32FC2
,
&
tmp2
[
0
]));
corners
.
upload
(
Mat
(
1
,
static_cast
<
int
>
(
tmp2
.
size
()),
CV_32FC2
,
&
tmp2
[
0
])
,
stream
);
}
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录