提交 a21ede94 编写于 作者: D Dan

Thrust allocator usage.

上级 7a934f9e
......@@ -51,7 +51,7 @@
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/utility.hpp"
namespace cv { namespace cuda { namespace device
{
namespace orb
......@@ -64,6 +64,16 @@ namespace cv { namespace cuda { namespace device
thrust::device_ptr<int> loc_ptr(loc);
thrust::device_ptr<float> response_ptr(response);
#if THRUST_VERSION >= 100800
#if THRUST_VERSION >= 100802
if (stream)
{
thrust::sort_by_key(thrust::cuda::par(ThrustAllocator::getAllocator()).on(stream), response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
}
else
{
thrust::sort_by_key(thrust::cuda::par(ThrustAllocator::getAllocator()), response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
}
#else
if(stream)
{
thrust::sort_by_key(thrust::cuda::par.on(stream), response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
......@@ -71,6 +81,7 @@ namespace cv { namespace cuda { namespace device
{
thrust::sort_by_key(response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
}
#endif
#else
thrust::sort_by_key(response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
#endif
......
......@@ -47,7 +47,7 @@
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/utility.hpp"
#include <thrust/execution_policy.h>
namespace cv { namespace cuda { namespace device
{
namespace gfft
......@@ -91,12 +91,12 @@ namespace cv { namespace cuda { namespace device
}
}
int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count)
int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count, cudaStream_t stream)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(int), stream) );
bindTexture(&eigTex, eig);
......@@ -104,17 +104,18 @@ namespace cv { namespace cuda { namespace device
dim3 grid(divUp(eig.cols, block.x), divUp(eig.rows, block.y));
if (mask.data)
findCorners<<<grid, block>>>(threshold, SingleMask(mask), corners, max_count, eig.rows, eig.cols);
findCorners<<<grid, block, 0, stream>>>(threshold, SingleMask(mask), corners, max_count, eig.rows, eig.cols);
else
findCorners<<<grid, block>>>(threshold, WithOutMask(), corners, max_count, eig.rows, eig.cols);
findCorners<<<grid, block, 0, stream>>>(threshold, WithOutMask(), corners, max_count, eig.rows, eig.cols);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int count;
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
cudaSafeCall( cudaMemcpyAsync(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost, stream) );
if (stream)
cudaSafeCall(cudaStreamSynchronize(stream));
else
cudaSafeCall( cudaDeviceSynchronize() );
return std::min(count, max_count);
}
......@@ -128,13 +129,19 @@ namespace cv { namespace cuda { namespace device
};
void sortCorners_gpu(PtrStepSzf eig, float2* corners, int count)
void sortCorners_gpu(PtrStepSzf eig, float2* corners, int count, cudaStream_t stream)
{
bindTexture(&eigTex, eig);
thrust::device_ptr<float2> ptr(corners);
#if THRUST_VERSION >= 100802
if (stream)
thrust::sort(thrust::cuda::par(ThrustAllocator::getAllocator()).on(stream), ptr, ptr + count, EigGreater());
else
thrust::sort(thrust::cuda::par(ThrustAllocator::getAllocator()), ptr, ptr + count, EigGreater());
#else
thrust::sort(ptr, ptr + count, EigGreater());
#endif
}
} // namespace optical_flow
}}}
......
......@@ -55,8 +55,8 @@ namespace cv { namespace cuda { namespace device
{
namespace gfft
{
int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count);
void sortCorners_gpu(PtrStepSzf eig, float2* corners, int count);
int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count, cudaStream_t stream);
void sortCorners_gpu(PtrStepSzf eig, float2* corners, int count, cudaStream_t stream);
}
}}}
......@@ -97,9 +97,6 @@ namespace
void GoodFeaturesToTrackDetector::detect(InputArray _image, OutputArray _corners, InputArray _mask, Stream& stream)
{
// TODO : implement async version
(void) stream;
using namespace cv::cuda::device::gfft;
GpuMat image = _image.getGpuMat();
......@@ -108,14 +105,14 @@ namespace
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()) );
ensureSizeIsEnough(image.size(), CV_32FC1, eig_);
cornerCriteria_->compute(image, eig_);
cornerCriteria_->compute(image, eig_, stream);
double maxVal = 0;
cuda::minMax(eig_, 0, &maxVal);
cudaStream_t stream_ = StreamAccessor::getStream(stream);
ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_);
int total = findCorners_gpu(eig_, static_cast<float>(maxVal * qualityLevel_), mask, tmpCorners_.ptr<float2>(), tmpCorners_.cols);
int total = findCorners_gpu(eig_, static_cast<float>(maxVal * qualityLevel_), mask, tmpCorners_.ptr<float2>(), tmpCorners_.cols, stream_);
if (total == 0)
{
......@@ -123,18 +120,18 @@ namespace
return;
}
sortCorners_gpu(eig_, tmpCorners_.ptr<float2>(), total);
sortCorners_gpu(eig_, tmpCorners_.ptr<float2>(), total, stream_);
if (minDistance_ < 1)
{
tmpCorners_.colRange(0, maxCorners_ > 0 ? std::min(maxCorners_, total) : total).copyTo(_corners);
tmpCorners_.colRange(0, maxCorners_ > 0 ? std::min(maxCorners_, total) : total).copyTo(_corners, stream);
}
else
{
std::vector<Point2f> tmp(total);
Mat tmpMat(1, total, CV_32FC2, (void*)&tmp[0]);
tmpCorners_.colRange(0, total).download(tmpMat);
tmpCorners_.colRange(0, total).download(tmpMat, stream);
stream.waitForCompletion();
std::vector<Point2f> tmp2;
tmp2.reserve(total);
......@@ -203,7 +200,7 @@ namespace
_corners.create(1, static_cast<int>(tmp2.size()), CV_32FC2);
GpuMat corners = _corners.getGpuMat();
corners.upload(Mat(1, static_cast<int>(tmp2.size()), CV_32FC2, &tmp2[0]));
corners.upload(Mat(1, static_cast<int>(tmp2.size()), CV_32FC2, &tmp2[0]), stream);
}
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册