提交 edcfa64d 编写于 作者: A Anatoly Baksheev

experimental kernels for cuda

上级 50d1d711
......@@ -63,7 +63,7 @@ namespace cv
struct Warp
{
static __forceinline__ __device__ int STRIDE() { return warpSize;
static __forceinline__ __device__ int STRIDE() { return warpSize };
static __forceinline__ __device__ int SHIFT() { return threadIdx.x & (warpSize - 1); }
};
......@@ -77,8 +77,8 @@ namespace cv
out[idx] = in[idx];
}
template <class Worker, typename ForwardIterator, typename ForwardIterator>
__forceinline__ __device__ void Copy(ForwardIterator beg, ForwardIterator end, OutIter out)
template <class Worker, typename InIter, typename OutIter>
__forceinline__ __device__ void Copy(InIter beg, InIter end, OutIter out)
{
int STRIDE = Worker::STRIDE();
int SHIFT = Worker::SHIFT();
......@@ -103,6 +103,19 @@ namespace cv
for (; idx < length; idx += STRIDE, cur += STRIDE)
out[idx] = cur;
}
template <class Worker, typename OutIter>
__forceinline__ __device__ void Yota(OutIter beg, OutIter end, int val)
{
int STRIDE = Worker::STRIDE();
int SHIFT = Worker::SHIFT();
beg += SHIFT;
val += SHIFT;
for (; beg < end; beg += STRIDE, val += STRIDE)
*beg = val;
}
}
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册