提交 1c1a61dd 编写于 作者: V Vladislav Vinogradov

added __forceinline__ to device functions

fixed BFM warning ("cannot tell what pointer points to")
上级 79f3260b
...@@ -56,7 +56,7 @@ namespace cv ...@@ -56,7 +56,7 @@ namespace cv
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile // It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
#if defined(__CUDACC__) #if defined(__CUDACC__)
#define __CV_GPU_HOST_DEVICE__ __host__ __device__ #define __CV_GPU_HOST_DEVICE__ __host__ __device__ __forceinline__
#else #else
#define __CV_GPU_HOST_DEVICE__ #define __CV_GPU_HOST_DEVICE__
#endif #endif
......
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#include "internal_shared.hpp" #include "internal_shared.hpp"
#include "opencv2/gpu/device/limits_gpu.hpp" #include "opencv2/gpu/device/limits_gpu.hpp"
#include "opencv2/gpu/device/datamov_utils.hpp"
using namespace cv::gpu; using namespace cv::gpu;
using namespace cv::gpu::device; using namespace cv::gpu::device;
...@@ -60,7 +61,7 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -60,7 +61,7 @@ namespace cv { namespace gpu { namespace bfmatcher
public: public:
explicit SingleMask(const PtrStep& mask_) : mask(mask_) {} explicit SingleMask(const PtrStep& mask_) : mask(mask_) {}
__device__ bool operator()(int queryIdx, int trainIdx) const __device__ __forceinline__ bool operator()(int queryIdx, int trainIdx) const
{ {
return mask.ptr(queryIdx)[trainIdx] != 0; return mask.ptr(queryIdx)[trainIdx] != 0;
} }
...@@ -74,14 +75,15 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -74,14 +75,15 @@ namespace cv { namespace gpu { namespace bfmatcher
public: public:
explicit MaskCollection(PtrStep* maskCollection_) : maskCollection(maskCollection_) {} explicit MaskCollection(PtrStep* maskCollection_) : maskCollection(maskCollection_) {}
__device__ void nextMask() __device__ __forceinline__ void nextMask()
{ {
curMask = *maskCollection++; curMask = *maskCollection++;
} }
__device__ bool operator()(int queryIdx, int trainIdx) const __device__ __forceinline__ bool operator()(int queryIdx, int trainIdx) const
{ {
return curMask.data == 0 || curMask.ptr(queryIdx)[trainIdx] != 0; uchar val;
return curMask.data == 0 || (ForceGlob<uchar>::Load(curMask.ptr(queryIdx), trainIdx, val), (val != 0));
} }
private: private:
...@@ -92,10 +94,10 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -92,10 +94,10 @@ namespace cv { namespace gpu { namespace bfmatcher
class WithOutMask class WithOutMask
{ {
public: public:
__device__ void nextMask() __device__ __forceinline__ void nextMask()
{ {
} }
__device__ bool operator()(int queryIdx, int trainIdx) const __device__ __forceinline__ bool operator()(int queryIdx, int trainIdx) const
{ {
return true; return true;
} }
...@@ -132,19 +134,19 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -132,19 +134,19 @@ namespace cv { namespace gpu { namespace bfmatcher
typedef int ResultType; typedef int ResultType;
typedef int ValueType; typedef int ValueType;
__device__ L1Dist() : mySum(0) {} __device__ __forceinline__ L1Dist() : mySum(0) {}
__device__ void reduceIter(int val1, int val2) __device__ __forceinline__ void reduceIter(int val1, int val2)
{ {
mySum = __sad(val1, val2, mySum); mySum = __sad(val1, val2, mySum);
} }
template <int BLOCK_DIM_X> __device__ void reduceAll(int* sdiff_row) template <int BLOCK_DIM_X> __device__ __forceinline__ void reduceAll(int* sdiff_row)
{ {
SumReductor<BLOCK_DIM_X>::reduce(sdiff_row, mySum); SumReductor<BLOCK_DIM_X>::reduce(sdiff_row, mySum);
} }
__device__ operator int() const __device__ __forceinline__ operator int() const
{ {
return mySum; return mySum;
} }
...@@ -158,19 +160,19 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -158,19 +160,19 @@ namespace cv { namespace gpu { namespace bfmatcher
typedef float ResultType; typedef float ResultType;
typedef float ValueType; typedef float ValueType;
__device__ L1Dist() : mySum(0.0f) {} __device__ __forceinline__ L1Dist() : mySum(0.0f) {}
__device__ void reduceIter(float val1, float val2) __device__ __forceinline__ void reduceIter(float val1, float val2)
{ {
mySum += fabs(val1 - val2); mySum += fabs(val1 - val2);
} }
template <int BLOCK_DIM_X> __device__ void reduceAll(float* sdiff_row) template <int BLOCK_DIM_X> __device__ __forceinline__ void reduceAll(float* sdiff_row)
{ {
SumReductor<BLOCK_DIM_X>::reduce(sdiff_row, mySum); SumReductor<BLOCK_DIM_X>::reduce(sdiff_row, mySum);
} }
__device__ operator float() const __device__ __forceinline__ operator float() const
{ {
return mySum; return mySum;
} }
...@@ -185,20 +187,20 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -185,20 +187,20 @@ namespace cv { namespace gpu { namespace bfmatcher
typedef float ResultType; typedef float ResultType;
typedef float ValueType; typedef float ValueType;
__device__ L2Dist() : mySum(0.0f) {} __device__ __forceinline__ L2Dist() : mySum(0.0f) {}
__device__ void reduceIter(float val1, float val2) __device__ __forceinline__ void reduceIter(float val1, float val2)
{ {
float reg = val1 - val2; float reg = val1 - val2;
mySum += reg * reg; mySum += reg * reg;
} }
template <int BLOCK_DIM_X> __device__ void reduceAll(float* sdiff_row) template <int BLOCK_DIM_X> __device__ __forceinline__ void reduceAll(float* sdiff_row)
{ {
SumReductor<BLOCK_DIM_X>::reduce(sdiff_row, mySum); SumReductor<BLOCK_DIM_X>::reduce(sdiff_row, mySum);
} }
__device__ operator float() const __device__ __forceinline__ operator float() const
{ {
return sqrtf(mySum); return sqrtf(mySum);
} }
...@@ -213,19 +215,19 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -213,19 +215,19 @@ namespace cv { namespace gpu { namespace bfmatcher
typedef int ResultType; typedef int ResultType;
typedef int ValueType; typedef int ValueType;
__device__ HammingDist() : mySum(0) {} __device__ __forceinline__ HammingDist() : mySum(0) {}
__device__ void reduceIter(int val1, int val2) __device__ __forceinline__ void reduceIter(int val1, int val2)
{ {
mySum += __popc(val1 ^ val2); mySum += __popc(val1 ^ val2);
} }
template <int BLOCK_DIM_X> __device__ void reduceAll(int* sdiff_row) template <int BLOCK_DIM_X> __device__ __forceinline__ void reduceAll(int* sdiff_row)
{ {
SumReductor<BLOCK_DIM_X>::reduce(sdiff_row, mySum); SumReductor<BLOCK_DIM_X>::reduce(sdiff_row, mySum);
} }
__device__ operator int() const __device__ __forceinline__ operator int() const
{ {
return mySum; return mySum;
} }
...@@ -241,7 +243,11 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -241,7 +243,11 @@ namespace cv { namespace gpu { namespace bfmatcher
__device__ void reduceDescDiff(const T* queryDescs, const T* trainDescs, int desc_len, Dist& dist, typename Dist::ResultType* sdiff_row) __device__ void reduceDescDiff(const T* queryDescs, const T* trainDescs, int desc_len, Dist& dist, typename Dist::ResultType* sdiff_row)
{ {
for (int i = threadIdx.x; i < desc_len; i += BLOCK_DIM_X) for (int i = threadIdx.x; i < desc_len; i += BLOCK_DIM_X)
dist.reduceIter(queryDescs[i], trainDescs[i]); {
T trainVal;
ForceGlob<T>::Load(trainDescs, i, trainVal);
dist.reduceIter(queryDescs[i], trainVal);
}
dist.reduceAll<BLOCK_DIM_X>(sdiff_row); dist.reduceAll<BLOCK_DIM_X>(sdiff_row);
} }
...@@ -282,7 +288,9 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -282,7 +288,9 @@ namespace cv { namespace gpu { namespace bfmatcher
{ {
if (ind < desc_len) if (ind < desc_len)
{ {
dist.reduceIter(*queryVals, trainDescs[ind]); T trainVal;
ForceGlob<T>::Load(trainDescs, ind, trainVal);
dist.reduceIter(*queryVals, trainVal);
++queryVals; ++queryVals;
...@@ -293,7 +301,9 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -293,7 +301,9 @@ namespace cv { namespace gpu { namespace bfmatcher
template <typename Dist, typename T> template <typename Dist, typename T>
static __device__ void calcWithoutCheck(const typename Dist::ValueType* queryVals, const T* trainDescs, Dist& dist) static __device__ void calcWithoutCheck(const typename Dist::ValueType* queryVals, const T* trainDescs, Dist& dist)
{ {
dist.reduceIter(*queryVals, *trainDescs); T trainVal;
ForceGlob<T>::Load(trainDescs, 0, trainVal);
dist.reduceIter(*queryVals, trainVal);
++queryVals; ++queryVals;
trainDescs += blockDim.x; trainDescs += blockDim.x;
...@@ -304,13 +314,13 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -304,13 +314,13 @@ namespace cv { namespace gpu { namespace bfmatcher
template <> struct UnrollDescDiff<0> template <> struct UnrollDescDiff<0>
{ {
template <typename Dist, typename T> template <typename Dist, typename T>
static __device__ void calcCheck(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len, static __device__ __forceinline__ void calcCheck(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len,
Dist& dist, int ind) Dist& dist, int ind)
{ {
} }
template <typename Dist, typename T> template <typename Dist, typename T>
static __device__ void calcWithoutCheck(const typename Dist::ValueType* queryVals, const T* trainDescs, Dist& dist) static __device__ __forceinline__ void calcWithoutCheck(const typename Dist::ValueType* queryVals, const T* trainDescs, Dist& dist)
{ {
} }
}; };
...@@ -320,7 +330,7 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -320,7 +330,7 @@ namespace cv { namespace gpu { namespace bfmatcher
struct DescDiffCalculator<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, false> struct DescDiffCalculator<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, false>
{ {
template <typename Dist, typename T> template <typename Dist, typename T>
static __device__ void calc(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len, Dist& dist) static __device__ __forceinline__ void calc(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len, Dist& dist)
{ {
UnrollDescDiff<MAX_DESCRIPTORS_LEN / BLOCK_DIM_X>::calcCheck(queryVals, trainDescs, desc_len, dist, threadIdx.x); UnrollDescDiff<MAX_DESCRIPTORS_LEN / BLOCK_DIM_X>::calcCheck(queryVals, trainDescs, desc_len, dist, threadIdx.x);
} }
...@@ -329,14 +339,14 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -329,14 +339,14 @@ namespace cv { namespace gpu { namespace bfmatcher
struct DescDiffCalculator<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, true> struct DescDiffCalculator<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, true>
{ {
template <typename Dist, typename T> template <typename Dist, typename T>
static __device__ void calc(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len, Dist& dist) static __device__ __forceinline__ void calc(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len, Dist& dist)
{ {
UnrollDescDiff<MAX_DESCRIPTORS_LEN / BLOCK_DIM_X>::calcWithoutCheck(queryVals, trainDescs + threadIdx.x, dist); UnrollDescDiff<MAX_DESCRIPTORS_LEN / BLOCK_DIM_X>::calcWithoutCheck(queryVals, trainDescs + threadIdx.x, dist);
} }
}; };
template <int BLOCK_DIM_X, int MAX_DESCRIPTORS_LEN, bool DESC_LEN_EQ_MAX_LEN, typename Dist, typename T> template <int BLOCK_DIM_X, int MAX_DESCRIPTORS_LEN, bool DESC_LEN_EQ_MAX_LEN, typename Dist, typename T>
__device__ void reduceDescDiffCached(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len, Dist& dist, typename Dist::ResultType* sdiff_row) __device__ __forceinline__ void reduceDescDiffCached(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len, Dist& dist, typename Dist::ResultType* sdiff_row)
{ {
DescDiffCalculator<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, DESC_LEN_EQ_MAX_LEN>::calc(queryVals, trainDescs, desc_len, dist); DescDiffCalculator<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, DESC_LEN_EQ_MAX_LEN>::calc(queryVals, trainDescs, desc_len, dist);
...@@ -419,13 +429,13 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -419,13 +429,13 @@ namespace cv { namespace gpu { namespace bfmatcher
class ReduceDescCalculatorSimple class ReduceDescCalculatorSimple
{ {
public: public:
__device__ void prepare(const T* queryDescs_, int, void*) __device__ __forceinline__ void prepare(const T* queryDescs_, int, void*)
{ {
queryDescs = queryDescs_; queryDescs = queryDescs_;
} }
template <typename Dist> template <typename Dist>
__device__ void calc(const T* trainDescs, int desc_len, Dist& dist, typename Dist::ResultType* sdiff_row) const __device__ __forceinline__ void calc(const T* trainDescs, int desc_len, Dist& dist, typename Dist::ResultType* sdiff_row) const
{ {
reduceDescDiff<BLOCK_DIM_X>(queryDescs, trainDescs, desc_len, dist, sdiff_row); reduceDescDiff<BLOCK_DIM_X>(queryDescs, trainDescs, desc_len, dist, sdiff_row);
} }
...@@ -438,13 +448,13 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -438,13 +448,13 @@ namespace cv { namespace gpu { namespace bfmatcher
class ReduceDescCalculatorCached class ReduceDescCalculatorCached
{ {
public: public:
__device__ void prepare(const T* queryDescs, int desc_len, U* smem) __device__ __forceinline__ void prepare(const T* queryDescs, int desc_len, U* smem)
{ {
loadDescsVals<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN>(queryDescs, desc_len, queryVals, smem); loadDescsVals<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN>(queryDescs, desc_len, queryVals, smem);
} }
template <typename Dist> template <typename Dist>
__device__ void calc(const T* trainDescs, int desc_len, Dist& dist, typename Dist::ResultType* sdiff_row) const __device__ __forceinline__ void calc(const T* trainDescs, int desc_len, Dist& dist, typename Dist::ResultType* sdiff_row) const
{ {
reduceDescDiffCached<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, DESC_LEN_EQ_MAX_LEN>(queryVals, trainDescs, desc_len, dist, sdiff_row); reduceDescDiffCached<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, DESC_LEN_EQ_MAX_LEN>(queryVals, trainDescs, desc_len, dist, sdiff_row);
} }
...@@ -496,13 +506,13 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -496,13 +506,13 @@ namespace cv { namespace gpu { namespace bfmatcher
} }
template <typename Dist, typename ReduceDescCalculator, typename Mask> template <typename Dist, typename ReduceDescCalculator, typename Mask>
__device__ void loop(int queryIdx, Mask& m, const ReduceDescCalculator& reduceDescCalc, __device__ __forceinline__ void loop(int queryIdx, Mask& m, const ReduceDescCalculator& reduceDescCalc,
typename Dist::ResultType& myMin, int& myBestTrainIdx, int& myBestImgIdx, typename Dist::ResultType* sdiff_row) const typename Dist::ResultType& myMin, int& myBestTrainIdx, int& myBestImgIdx, typename Dist::ResultType* sdiff_row) const
{ {
matchDescs<Dist>(queryIdx, 0, trainDescs, m, reduceDescCalc, myMin, myBestTrainIdx, myBestImgIdx, sdiff_row); matchDescs<Dist>(queryIdx, 0, trainDescs, m, reduceDescCalc, myMin, myBestTrainIdx, myBestImgIdx, sdiff_row);
} }
__device__ int desc_len() const __device__ __forceinline__ int desc_len() const
{ {
return trainDescs.cols; return trainDescs.cols;
} }
...@@ -532,7 +542,7 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -532,7 +542,7 @@ namespace cv { namespace gpu { namespace bfmatcher
} }
} }
__device__ int desc_len() const __device__ __forceinline__ int desc_len() const
{ {
return desclen; return desclen;
} }
......
...@@ -56,7 +56,7 @@ namespace cv { namespace gpu ...@@ -56,7 +56,7 @@ namespace cv { namespace gpu
struct TransformOp struct TransformOp
{ {
__device__ float3 operator()(float3 p) const __device__ __forceinline__ float3 operator()(float3 p) const
{ {
return make_float3( return make_float3(
crot0.x * p.x + crot0.y * p.y + crot0.z * p.z + ctransl.x, crot0.x * p.x + crot0.y * p.y + crot0.z * p.z + ctransl.x,
...@@ -89,7 +89,7 @@ namespace cv { namespace gpu ...@@ -89,7 +89,7 @@ namespace cv { namespace gpu
struct ProjectOp struct ProjectOp
{ {
__device__ float2 operator()(float3 p) const __device__ __forceinline__ float2 operator()(float3 p) const
{ {
// Rotate and translate in 3D // Rotate and translate in 3D
float3 t = make_float3( float3 t = make_float3(
...@@ -128,7 +128,7 @@ namespace cv { namespace gpu ...@@ -128,7 +128,7 @@ namespace cv { namespace gpu
return SOLVE_PNP_RANSAC_MAX_NUM_ITERS; return SOLVE_PNP_RANSAC_MAX_NUM_ITERS;
} }
__device__ float sqr(float x) __device__ __forceinline__ float sqr(float x)
{ {
return x * x; return x * x;
} }
......
...@@ -59,38 +59,38 @@ namespace cv { namespace gpu { namespace color ...@@ -59,38 +59,38 @@ namespace cv { namespace gpu { namespace color
template<> struct ColorChannel<uchar> template<> struct ColorChannel<uchar>
{ {
typedef float worktype_f; typedef float worktype_f;
static __device__ uchar max() { return UCHAR_MAX; } static __device__ __forceinline__ uchar max() { return UCHAR_MAX; }
static __device__ uchar half() { return (uchar)(max()/2 + 1); } static __device__ __forceinline__ uchar half() { return (uchar)(max()/2 + 1); }
}; };
template<> struct ColorChannel<ushort> template<> struct ColorChannel<ushort>
{ {
typedef float worktype_f; typedef float worktype_f;
static __device__ ushort max() { return USHRT_MAX; } static __device__ __forceinline__ ushort max() { return USHRT_MAX; }
static __device__ ushort half() { return (ushort)(max()/2 + 1); } static __device__ __forceinline__ ushort half() { return (ushort)(max()/2 + 1); }
}; };
template<> struct ColorChannel<float> template<> struct ColorChannel<float>
{ {
typedef float worktype_f; typedef float worktype_f;
static __device__ float max() { return 1.f; } static __device__ __forceinline__ float max() { return 1.f; }
static __device__ float half() { return 0.5f; } static __device__ __forceinline__ float half() { return 0.5f; }
}; };
template <typename T> template <typename T>
__device__ void setAlpha(typename TypeVec<T, 3>::vec_t& vec, T val) __device__ __forceinline__ void setAlpha(typename TypeVec<T, 3>::vec_t& vec, T val)
{ {
} }
template <typename T> template <typename T>
__device__ void setAlpha(typename TypeVec<T, 4>::vec_t& vec, T val) __device__ __forceinline__ void setAlpha(typename TypeVec<T, 4>::vec_t& vec, T val)
{ {
vec.w = val; vec.w = val;
} }
template <typename T> template <typename T>
__device__ T getAlpha(const typename TypeVec<T, 3>::vec_t& vec) __device__ __forceinline__ T getAlpha(const typename TypeVec<T, 3>::vec_t& vec)
{ {
return ColorChannel<T>::max(); return ColorChannel<T>::max();
} }
template <typename T> template <typename T>
__device__ T getAlpha(const typename TypeVec<T, 4>::vec_t& vec) __device__ __forceinline__ T getAlpha(const typename TypeVec<T, 4>::vec_t& vec)
{ {
return vec.w; return vec.w;
} }
...@@ -114,7 +114,7 @@ namespace cv { namespace gpu { namespace color ...@@ -114,7 +114,7 @@ namespace cv { namespace gpu { namespace color
explicit RGB2RGB(int bidx) : bidx(bidx) {} explicit RGB2RGB(int bidx) : bidx(bidx) {}
__device__ dst_t operator()(const src_t& src) const __device__ __forceinline__ dst_t operator()(const src_t& src) const
{ {
dst_t dst; dst_t dst;
...@@ -179,7 +179,7 @@ namespace cv { namespace gpu { namespace color ...@@ -179,7 +179,7 @@ namespace cv { namespace gpu { namespace color
template <> struct RGB5x52RGBConverter<5> template <> struct RGB5x52RGBConverter<5>
{ {
template <typename D> template <typename D>
static __device__ void cvt(uint src, D& dst, int bidx) static __device__ __forceinline__ void cvt(uint src, D& dst, int bidx)
{ {
(&dst.x)[bidx] = (uchar)(src << 3); (&dst.x)[bidx] = (uchar)(src << 3);
dst.y = (uchar)((src >> 2) & ~7); dst.y = (uchar)((src >> 2) & ~7);
...@@ -190,7 +190,7 @@ namespace cv { namespace gpu { namespace color ...@@ -190,7 +190,7 @@ namespace cv { namespace gpu { namespace color
template <> struct RGB5x52RGBConverter<6> template <> struct RGB5x52RGBConverter<6>
{ {
template <typename D> template <typename D>
static __device__ void cvt(uint src, D& dst, int bidx) static __device__ __forceinline__ void cvt(uint src, D& dst, int bidx)
{ {
(&dst.x)[bidx] = (uchar)(src << 3); (&dst.x)[bidx] = (uchar)(src << 3);
dst.y = (uchar)((src >> 3) & ~3); dst.y = (uchar)((src >> 3) & ~3);
...@@ -206,7 +206,7 @@ namespace cv { namespace gpu { namespace color ...@@ -206,7 +206,7 @@ namespace cv { namespace gpu { namespace color
explicit RGB5x52RGB(int bidx) : bidx(bidx) {} explicit RGB5x52RGB(int bidx) : bidx(bidx) {}
__device__ dst_t operator()(ushort src) const __device__ __forceinline__ dst_t operator()(ushort src) const
{ {
dst_t dst; dst_t dst;
RGB5x52RGBConverter<GREEN_BITS>::cvt((uint)src, dst, bidx); RGB5x52RGBConverter<GREEN_BITS>::cvt((uint)src, dst, bidx);
...@@ -221,18 +221,18 @@ namespace cv { namespace gpu { namespace color ...@@ -221,18 +221,18 @@ namespace cv { namespace gpu { namespace color
template<> struct RGB2RGB5x5Converter<6> template<> struct RGB2RGB5x5Converter<6>
{ {
template <typename T> template <typename T>
static __device__ ushort cvt(const T& src, int bidx) static __device__ __forceinline__ ushort cvt(const T& src, int bidx)
{ {
return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~3) << 3) | (((&src.x)[bidx^2] & ~7) << 8)); return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~3) << 3) | (((&src.x)[bidx^2] & ~7) << 8));
} }
}; };
template<> struct RGB2RGB5x5Converter<5> template<> struct RGB2RGB5x5Converter<5>
{ {
static __device__ ushort cvt(const uchar3& src, int bidx) static __device__ __forceinline__ ushort cvt(const uchar3& src, int bidx)
{ {
return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~7) << 2) | (((&src.x)[bidx^2] & ~7) << 7)); return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~7) << 2) | (((&src.x)[bidx^2] & ~7) << 7));
} }
static __device__ ushort cvt(const uchar4& src, int bidx) static __device__ __forceinline__ ushort cvt(const uchar4& src, int bidx)
{ {
return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~7) << 2) | (((&src.x)[bidx^2] & ~7) << 7) | (src.w ? 0x8000 : 0)); return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~7) << 2) | (((&src.x)[bidx^2] & ~7) << 7) | (src.w ? 0x8000 : 0));
} }
...@@ -245,7 +245,7 @@ namespace cv { namespace gpu { namespace color ...@@ -245,7 +245,7 @@ namespace cv { namespace gpu { namespace color
explicit RGB2RGB5x5(int bidx) : bidx(bidx) {} explicit RGB2RGB5x5(int bidx) : bidx(bidx) {}
__device__ ushort operator()(const src_t& src) __device__ __forceinline__ ushort operator()(const src_t& src)
{ {
return RGB2RGB5x5Converter<GREEN_BITS>::cvt(src, bidx); return RGB2RGB5x5Converter<GREEN_BITS>::cvt(src, bidx);
} }
...@@ -299,7 +299,7 @@ namespace cv { namespace gpu { namespace color ...@@ -299,7 +299,7 @@ namespace cv { namespace gpu { namespace color
typedef T src_t; typedef T src_t;
typedef typename TypeVec<T, DSTCN>::vec_t dst_t; typedef typename TypeVec<T, DSTCN>::vec_t dst_t;
__device__ dst_t operator()(const T& src) const __device__ __forceinline__ dst_t operator()(const T& src) const
{ {
dst_t dst; dst_t dst;
...@@ -313,14 +313,14 @@ namespace cv { namespace gpu { namespace color ...@@ -313,14 +313,14 @@ namespace cv { namespace gpu { namespace color
template <int GREEN_BITS> struct Gray2RGB5x5Converter; template <int GREEN_BITS> struct Gray2RGB5x5Converter;
template<> struct Gray2RGB5x5Converter<6> template<> struct Gray2RGB5x5Converter<6>
{ {
static __device__ ushort cvt(uint t) static __device__ __forceinline__ ushort cvt(uint t)
{ {
return (ushort)((t >> 3) | ((t & ~3) << 3) | ((t & ~7) << 8)); return (ushort)((t >> 3) | ((t & ~3) << 3) | ((t & ~7) << 8));
} }
}; };
template<> struct Gray2RGB5x5Converter<5> template<> struct Gray2RGB5x5Converter<5>
{ {
static __device__ ushort cvt(uint t) static __device__ __forceinline__ ushort cvt(uint t)
{ {
t >>= 3; t >>= 3;
return (ushort)(t | (t << 5) | (t << 10)); return (ushort)(t | (t << 5) | (t << 10));
...@@ -332,7 +332,7 @@ namespace cv { namespace gpu { namespace color ...@@ -332,7 +332,7 @@ namespace cv { namespace gpu { namespace color
typedef uchar src_t; typedef uchar src_t;
typedef ushort dst_t; typedef ushort dst_t;
__device__ ushort operator()(uchar src) const __device__ __forceinline__ ushort operator()(uchar src) const
{ {
return Gray2RGB5x5Converter<GREEN_BITS>::cvt((uint)src); return Gray2RGB5x5Converter<GREEN_BITS>::cvt((uint)src);
} }
...@@ -406,14 +406,14 @@ namespace cv { namespace gpu { namespace color ...@@ -406,14 +406,14 @@ namespace cv { namespace gpu { namespace color
template <int GREEN_BITS> struct RGB5x52GrayConverter; template <int GREEN_BITS> struct RGB5x52GrayConverter;
template<> struct RGB5x52GrayConverter<6> template<> struct RGB5x52GrayConverter<6>
{ {
static __device__ uchar cvt(uint t) static __device__ __forceinline__ uchar cvt(uint t)
{ {
return (uchar)CV_DESCALE(((t << 3) & 0xf8) * B2Y + ((t >> 3) & 0xfc) * G2Y + ((t >> 8) & 0xf8) * R2Y, yuv_shift); return (uchar)CV_DESCALE(((t << 3) & 0xf8) * B2Y + ((t >> 3) & 0xfc) * G2Y + ((t >> 8) & 0xf8) * R2Y, yuv_shift);
} }
}; };
template<> struct RGB5x52GrayConverter<5> template<> struct RGB5x52GrayConverter<5>
{ {
static __device__ uchar cvt(uint t) static __device__ __forceinline__ uchar cvt(uint t)
{ {
return (uchar)CV_DESCALE(((t << 3) & 0xf8) * B2Y + ((t >> 2) & 0xf8) * G2Y + ((t >> 7) & 0xf8) * R2Y, yuv_shift); return (uchar)CV_DESCALE(((t << 3) & 0xf8) * B2Y + ((t >> 2) & 0xf8) * G2Y + ((t >> 7) & 0xf8) * R2Y, yuv_shift);
} }
...@@ -424,18 +424,18 @@ namespace cv { namespace gpu { namespace color ...@@ -424,18 +424,18 @@ namespace cv { namespace gpu { namespace color
typedef ushort src_t; typedef ushort src_t;
typedef uchar dst_t; typedef uchar dst_t;
__device__ uchar operator()(ushort src) const __device__ __forceinline__ uchar operator()(ushort src) const
{ {
return RGB5x52GrayConverter<GREEN_BITS>::cvt((uint)src); return RGB5x52GrayConverter<GREEN_BITS>::cvt((uint)src);
} }
}; };
template <typename T> template <typename T>
__device__ T RGB2GrayConvert(const T* src, int bidx) __device__ __forceinline__ T RGB2GrayConvert(const T* src, int bidx)
{ {
return (T)CV_DESCALE((unsigned)(src[bidx] * B2Y + src[1] * G2Y + src[bidx^2] * R2Y), yuv_shift); return (T)CV_DESCALE((unsigned)(src[bidx] * B2Y + src[1] * G2Y + src[bidx^2] * R2Y), yuv_shift);
} }
__device__ float RGB2GrayConvert(const float* src, int bidx) __device__ __forceinline__ float RGB2GrayConvert(const float* src, int bidx)
{ {
const float cr = 0.299f; const float cr = 0.299f;
const float cg = 0.587f; const float cg = 0.587f;
...@@ -451,7 +451,7 @@ namespace cv { namespace gpu { namespace color ...@@ -451,7 +451,7 @@ namespace cv { namespace gpu { namespace color
explicit RGB2Gray(int bidx) : bidx(bidx) {} explicit RGB2Gray(int bidx) : bidx(bidx) {}
__device__ T operator()(const src_t& src) __device__ __forceinline__ T operator()(const src_t& src)
{ {
return RGB2GrayConvert(&src.x, bidx); return RGB2GrayConvert(&src.x, bidx);
} }
...@@ -515,7 +515,7 @@ namespace cv { namespace gpu { namespace color ...@@ -515,7 +515,7 @@ namespace cv { namespace gpu { namespace color
__constant__ float cYCrCbCoeffs_f[5]; __constant__ float cYCrCbCoeffs_f[5];
template <typename T, typename D> template <typename T, typename D>
__device__ void RGB2YCrCbConvert(const T* src, D& dst, int bidx) __device__ __forceinline__ void RGB2YCrCbConvert(const T* src, D& dst, int bidx)
{ {
const int delta = ColorChannel<T>::half() * (1 << yuv_shift); const int delta = ColorChannel<T>::half() * (1 << yuv_shift);
...@@ -528,7 +528,7 @@ namespace cv { namespace gpu { namespace color ...@@ -528,7 +528,7 @@ namespace cv { namespace gpu { namespace color
dst.z = saturate_cast<T>(Cb); dst.z = saturate_cast<T>(Cb);
} }
template <typename D> template <typename D>
static __device__ void RGB2YCrCbConvert(const float* src, D& dst, int bidx) static __device__ __forceinline__ void RGB2YCrCbConvert(const float* src, D& dst, int bidx)
{ {
dst.x = src[0] * cYCrCbCoeffs_f[0] + src[1] * cYCrCbCoeffs_f[1] + src[2] * cYCrCbCoeffs_f[2]; dst.x = src[0] * cYCrCbCoeffs_f[0] + src[1] * cYCrCbCoeffs_f[1] + src[2] * cYCrCbCoeffs_f[2];
dst.y = (src[bidx^2] - dst.x) * cYCrCbCoeffs_f[3] + ColorChannel<float>::half(); dst.y = (src[bidx^2] - dst.x) * cYCrCbCoeffs_f[3] + ColorChannel<float>::half();
...@@ -561,7 +561,7 @@ namespace cv { namespace gpu { namespace color ...@@ -561,7 +561,7 @@ namespace cv { namespace gpu { namespace color
RGB2YCrCb(int bidx, const coeff_t coeffs[5]) : RGB2YCrCbBase<T>(coeffs), bidx(bidx) {} RGB2YCrCb(int bidx, const coeff_t coeffs[5]) : RGB2YCrCbBase<T>(coeffs), bidx(bidx) {}
__device__ dst_t operator()(const src_t& src) const __device__ __forceinline__ dst_t operator()(const src_t& src) const
{ {
dst_t dst; dst_t dst;
RGB2YCrCbConvert(&src.x, dst, bidx); RGB2YCrCbConvert(&src.x, dst, bidx);
...@@ -573,7 +573,7 @@ namespace cv { namespace gpu { namespace color ...@@ -573,7 +573,7 @@ namespace cv { namespace gpu { namespace color
}; };
template <typename T, typename D> template <typename T, typename D>
__device__ void YCrCb2RGBConvert(const T& src, D* dst, int bidx) __device__ __forceinline__ void YCrCb2RGBConvert(const T& src, D* dst, int bidx)
{ {
const int b = src.x + CV_DESCALE((src.z - ColorChannel<D>::half()) * cYCrCbCoeffs_i[3], yuv_shift); const int b = src.x + CV_DESCALE((src.z - ColorChannel<D>::half()) * cYCrCbCoeffs_i[3], yuv_shift);
const int g = src.x + CV_DESCALE((src.z - ColorChannel<D>::half()) * cYCrCbCoeffs_i[2] + (src.y - ColorChannel<D>::half()) * cYCrCbCoeffs_i[1], yuv_shift); const int g = src.x + CV_DESCALE((src.z - ColorChannel<D>::half()) * cYCrCbCoeffs_i[2] + (src.y - ColorChannel<D>::half()) * cYCrCbCoeffs_i[1], yuv_shift);
...@@ -584,7 +584,7 @@ namespace cv { namespace gpu { namespace color ...@@ -584,7 +584,7 @@ namespace cv { namespace gpu { namespace color
dst[bidx^2] = saturate_cast<D>(r); dst[bidx^2] = saturate_cast<D>(r);
} }
template <typename T> template <typename T>
__device__ void YCrCb2RGBConvert(const T& src, float* dst, int bidx) __device__ __forceinline__ void YCrCb2RGBConvert(const T& src, float* dst, int bidx)
{ {
dst[bidx] = src.x + (src.z - ColorChannel<float>::half()) * cYCrCbCoeffs_f[3]; dst[bidx] = src.x + (src.z - ColorChannel<float>::half()) * cYCrCbCoeffs_f[3];
dst[1] = src.x + (src.z - ColorChannel<float>::half()) * cYCrCbCoeffs_f[2] + (src.y - ColorChannel<float>::half()) * cYCrCbCoeffs_f[1]; dst[1] = src.x + (src.z - ColorChannel<float>::half()) * cYCrCbCoeffs_f[2] + (src.y - ColorChannel<float>::half()) * cYCrCbCoeffs_f[1];
...@@ -617,7 +617,7 @@ namespace cv { namespace gpu { namespace color ...@@ -617,7 +617,7 @@ namespace cv { namespace gpu { namespace color
YCrCb2RGB(int bidx, const coeff_t coeffs[4]) : YCrCb2RGBBase<T>(coeffs), bidx(bidx) {} YCrCb2RGB(int bidx, const coeff_t coeffs[4]) : YCrCb2RGBBase<T>(coeffs), bidx(bidx) {}
__device__ dst_t operator()(const src_t& src) const __device__ __forceinline__ dst_t operator()(const src_t& src) const
{ {
dst_t dst; dst_t dst;
...@@ -725,14 +725,14 @@ namespace cv { namespace gpu { namespace color ...@@ -725,14 +725,14 @@ namespace cv { namespace gpu { namespace color
__constant__ float cXYZ_D65f[9]; __constant__ float cXYZ_D65f[9];
template <typename T, typename D> template <typename T, typename D>
__device__ void RGB2XYZConvert(const T* src, D& dst) __device__ __forceinline__ void RGB2XYZConvert(const T* src, D& dst)
{ {
dst.x = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[0] + src[1] * cXYZ_D65i[1] + src[2] * cXYZ_D65i[2], xyz_shift)); dst.x = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[0] + src[1] * cXYZ_D65i[1] + src[2] * cXYZ_D65i[2], xyz_shift));
dst.y = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[3] + src[1] * cXYZ_D65i[4] + src[2] * cXYZ_D65i[5], xyz_shift)); dst.y = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[3] + src[1] * cXYZ_D65i[4] + src[2] * cXYZ_D65i[5], xyz_shift));
dst.z = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[6] + src[1] * cXYZ_D65i[7] + src[2] * cXYZ_D65i[8], xyz_shift)); dst.z = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[6] + src[1] * cXYZ_D65i[7] + src[2] * cXYZ_D65i[8], xyz_shift));
} }
template <typename D> template <typename D>
__device__ void RGB2XYZConvert(const float* src, D& dst) __device__ __forceinline__ void RGB2XYZConvert(const float* src, D& dst)
{ {
dst.x = src[0] * cXYZ_D65f[0] + src[1] * cXYZ_D65f[1] + src[2] * cXYZ_D65f[2]; dst.x = src[0] * cXYZ_D65f[0] + src[1] * cXYZ_D65f[1] + src[2] * cXYZ_D65f[2];
dst.y = src[0] * cXYZ_D65f[3] + src[1] * cXYZ_D65f[4] + src[2] * cXYZ_D65f[5]; dst.y = src[0] * cXYZ_D65f[3] + src[1] * cXYZ_D65f[4] + src[2] * cXYZ_D65f[5];
...@@ -765,7 +765,7 @@ namespace cv { namespace gpu { namespace color ...@@ -765,7 +765,7 @@ namespace cv { namespace gpu { namespace color
explicit RGB2XYZ(const coeff_t coeffs[9]) : RGB2XYZBase<T>(coeffs) {} explicit RGB2XYZ(const coeff_t coeffs[9]) : RGB2XYZBase<T>(coeffs) {}
__device__ dst_t operator()(const src_t& src) const __device__ __forceinline__ dst_t operator()(const src_t& src) const
{ {
dst_t dst; dst_t dst;
RGB2XYZConvert(&src.x, dst); RGB2XYZConvert(&src.x, dst);
...@@ -774,14 +774,14 @@ namespace cv { namespace gpu { namespace color ...@@ -774,14 +774,14 @@ namespace cv { namespace gpu { namespace color
}; };
template <typename T, typename D> template <typename T, typename D>
__device__ void XYZ2RGBConvert(const T& src, D* dst) __device__ __forceinline__ void XYZ2RGBConvert(const T& src, D* dst)
{ {
dst[0] = saturate_cast<D>(CV_DESCALE(src.x * cXYZ_D65i[0] + src.y * cXYZ_D65i[1] + src.z * cXYZ_D65i[2], xyz_shift)); dst[0] = saturate_cast<D>(CV_DESCALE(src.x * cXYZ_D65i[0] + src.y * cXYZ_D65i[1] + src.z * cXYZ_D65i[2], xyz_shift));
dst[1] = saturate_cast<D>(CV_DESCALE(src.x * cXYZ_D65i[3] + src.y * cXYZ_D65i[4] + src.z * cXYZ_D65i[5], xyz_shift)); dst[1] = saturate_cast<D>(CV_DESCALE(src.x * cXYZ_D65i[3] + src.y * cXYZ_D65i[4] + src.z * cXYZ_D65i[5], xyz_shift));
dst[2] = saturate_cast<D>(CV_DESCALE(src.x * cXYZ_D65i[6] + src.y * cXYZ_D65i[7] + src.z * cXYZ_D65i[8], xyz_shift)); dst[2] = saturate_cast<D>(CV_DESCALE(src.x * cXYZ_D65i[6] + src.y * cXYZ_D65i[7] + src.z * cXYZ_D65i[8], xyz_shift));
} }
template <typename T> template <typename T>
__device__ void XYZ2RGBConvert(const T& src, float* dst) __device__ __forceinline__ void XYZ2RGBConvert(const T& src, float* dst)
{ {
dst[0] = src.x * cXYZ_D65f[0] + src.y * cXYZ_D65f[1] + src.z * cXYZ_D65f[2]; dst[0] = src.x * cXYZ_D65f[0] + src.y * cXYZ_D65f[1] + src.z * cXYZ_D65f[2];
dst[1] = src.x * cXYZ_D65f[3] + src.y * cXYZ_D65f[4] + src.z * cXYZ_D65f[5]; dst[1] = src.x * cXYZ_D65f[3] + src.y * cXYZ_D65f[4] + src.z * cXYZ_D65f[5];
...@@ -814,7 +814,7 @@ namespace cv { namespace gpu { namespace color ...@@ -814,7 +814,7 @@ namespace cv { namespace gpu { namespace color
explicit XYZ2RGB(const coeff_t coeffs[9]) : XYZ2RGBBase<T>(coeffs) {} explicit XYZ2RGB(const coeff_t coeffs[9]) : XYZ2RGBBase<T>(coeffs) {}
__device__ dst_t operator()(const src_t& src) const __device__ __forceinline__ dst_t operator()(const src_t& src) const
{ {
dst_t dst; dst_t dst;
XYZ2RGBConvert(src, &dst.x); XYZ2RGBConvert(src, &dst.x);
...@@ -987,7 +987,7 @@ namespace cv { namespace gpu { namespace color ...@@ -987,7 +987,7 @@ namespace cv { namespace gpu { namespace color
explicit RGB2HSV(int bidx) : bidx(bidx) {} explicit RGB2HSV(int bidx) : bidx(bidx) {}
__device__ dst_t operator()(const src_t& src) const __device__ __forceinline__ dst_t operator()(const src_t& src) const
{ {
dst_t dst; dst_t dst;
RGB2HSVConvert<HR>(&src.x, dst, bidx); RGB2HSVConvert<HR>(&src.x, dst, bidx);
...@@ -1062,7 +1062,7 @@ namespace cv { namespace gpu { namespace color ...@@ -1062,7 +1062,7 @@ namespace cv { namespace gpu { namespace color
explicit HSV2RGB(int bidx) : bidx(bidx) {} explicit HSV2RGB(int bidx) : bidx(bidx) {}
__device__ dst_t operator()(const src_t& src) const __device__ __forceinline__ dst_t operator()(const src_t& src) const
{ {
dst_t dst; dst_t dst;
HSV2RGBConvert<HR>(src, &dst.x, bidx); HSV2RGBConvert<HR>(src, &dst.x, bidx);
...@@ -1214,7 +1214,7 @@ namespace cv { namespace gpu { namespace color ...@@ -1214,7 +1214,7 @@ namespace cv { namespace gpu { namespace color
explicit RGB2HLS(int bidx) : bidx(bidx) {} explicit RGB2HLS(int bidx) : bidx(bidx) {}
__device__ dst_t operator()(const src_t& src) const __device__ __forceinline__ dst_t operator()(const src_t& src) const
{ {
dst_t dst; dst_t dst;
RGB2HLSConvert<HR>(&src.x, dst, bidx); RGB2HLSConvert<HR>(&src.x, dst, bidx);
...@@ -1295,7 +1295,7 @@ namespace cv { namespace gpu { namespace color ...@@ -1295,7 +1295,7 @@ namespace cv { namespace gpu { namespace color
explicit HLS2RGB(int bidx) : bidx(bidx) {} explicit HLS2RGB(int bidx) : bidx(bidx) {}
__device__ dst_t operator()(const src_t& src) const __device__ __forceinline__ dst_t operator()(const src_t& src) const
{ {
dst_t dst; dst_t dst;
HLS2RGBConvert<HR>(src, &dst.x, bidx); HLS2RGBConvert<HR>(src, &dst.x, bidx);
......
...@@ -57,7 +57,7 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -57,7 +57,7 @@ namespace cv { namespace gpu { namespace mathfunc
template <typename T1, typename T2> template <typename T1, typename T2>
struct NotEqual struct NotEqual
{ {
__device__ uchar operator()(const T1& src1, const T2& src2) __device__ __forceinline__ uchar operator()(const T1& src1, const T2& src2)
{ {
return static_cast<uchar>(static_cast<int>(src1 != src2) * 255); return static_cast<uchar>(static_cast<int>(src1 != src2) * 255);
} }
...@@ -91,7 +91,7 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -91,7 +91,7 @@ namespace cv { namespace gpu { namespace mathfunc
template <typename T> template <typename T>
struct UnOp<T, UN_OP_NOT> struct UnOp<T, UN_OP_NOT>
{ {
static __device__ T call(T v) { return ~v; } static __device__ __forceinline__ T call(T v) { return ~v; }
}; };
...@@ -199,20 +199,20 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -199,20 +199,20 @@ namespace cv { namespace gpu { namespace mathfunc
template <typename T> template <typename T>
struct BinOp<T, BIN_OP_OR> struct BinOp<T, BIN_OP_OR>
{ {
static __device__ T call(T a, T b) { return a | b; } static __device__ __forceinline__ T call(T a, T b) { return a | b; }
}; };
template <typename T> template <typename T>
struct BinOp<T, BIN_OP_AND> struct BinOp<T, BIN_OP_AND>
{ {
static __device__ T call(T a, T b) { return a & b; } static __device__ __forceinline__ T call(T a, T b) { return a & b; }
}; };
template <typename T> template <typename T>
struct BinOp<T, BIN_OP_XOR> struct BinOp<T, BIN_OP_XOR>
{ {
static __device__ T call(T a, T b) { return a ^ b; } static __device__ __forceinline__ T call(T a, T b) { return a ^ b; }
}; };
...@@ -357,15 +357,15 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -357,15 +357,15 @@ namespace cv { namespace gpu { namespace mathfunc
struct MinOp struct MinOp
{ {
template <typename T> template <typename T>
__device__ T operator()(T a, T b) __device__ __forceinline__ T operator()(T a, T b)
{ {
return min(a, b); return min(a, b);
} }
__device__ float operator()(float a, float b) __device__ __forceinline__ float operator()(float a, float b)
{ {
return fmin(a, b); return fmin(a, b);
} }
__device__ double operator()(double a, double b) __device__ __forceinline__ double operator()(double a, double b)
{ {
return fmin(a, b); return fmin(a, b);
} }
...@@ -374,15 +374,15 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -374,15 +374,15 @@ namespace cv { namespace gpu { namespace mathfunc
struct MaxOp struct MaxOp
{ {
template <typename T> template <typename T>
__device__ T operator()(T a, T b) __device__ __forceinline__ T operator()(T a, T b)
{ {
return max(a, b); return max(a, b);
} }
__device__ float operator()(float a, float b) __device__ __forceinline__ float operator()(float a, float b)
{ {
return fmax(a, b); return fmax(a, b);
} }
__device__ double operator()(double a, double b) __device__ __forceinline__ double operator()(double a, double b)
{ {
return fmax(a, b); return fmax(a, b);
} }
...@@ -394,7 +394,7 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -394,7 +394,7 @@ namespace cv { namespace gpu { namespace mathfunc
explicit ScalarMinOp(T s_) : s(s_) {} explicit ScalarMinOp(T s_) : s(s_) {}
__device__ T operator()(T a) __device__ __forceinline__ T operator()(T a)
{ {
return min(a, s); return min(a, s);
} }
...@@ -405,7 +405,7 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -405,7 +405,7 @@ namespace cv { namespace gpu { namespace mathfunc
explicit ScalarMinOp(float s_) : s(s_) {} explicit ScalarMinOp(float s_) : s(s_) {}
__device__ float operator()(float a) __device__ __forceinline__ float operator()(float a)
{ {
return fmin(a, s); return fmin(a, s);
} }
...@@ -416,7 +416,7 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -416,7 +416,7 @@ namespace cv { namespace gpu { namespace mathfunc
explicit ScalarMinOp(double s_) : s(s_) {} explicit ScalarMinOp(double s_) : s(s_) {}
__device__ double operator()(double a) __device__ __forceinline__ double operator()(double a)
{ {
return fmin(a, s); return fmin(a, s);
} }
...@@ -428,7 +428,7 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -428,7 +428,7 @@ namespace cv { namespace gpu { namespace mathfunc
explicit ScalarMaxOp(T s_) : s(s_) {} explicit ScalarMaxOp(T s_) : s(s_) {}
__device__ T operator()(T a) __device__ __forceinline__ T operator()(T a)
{ {
return max(a, s); return max(a, s);
} }
...@@ -439,7 +439,7 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -439,7 +439,7 @@ namespace cv { namespace gpu { namespace mathfunc
explicit ScalarMaxOp(float s_) : s(s_) {} explicit ScalarMaxOp(float s_) : s(s_) {}
__device__ float operator()(float a) __device__ __forceinline__ float operator()(float a)
{ {
return fmax(a, s); return fmax(a, s);
} }
...@@ -450,7 +450,7 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -450,7 +450,7 @@ namespace cv { namespace gpu { namespace mathfunc
explicit ScalarMaxOp(double s_) : s(s_) {} explicit ScalarMaxOp(double s_) : s(s_) {}
__device__ double operator()(double a) __device__ __forceinline__ double operator()(double a)
{ {
return fmax(a, s); return fmax(a, s);
} }
...@@ -524,7 +524,7 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -524,7 +524,7 @@ namespace cv { namespace gpu { namespace mathfunc
{ {
ThreshBinary(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {} ThreshBinary(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
__device__ T operator()(const T& src) const __device__ __forceinline__ T operator()(const T& src) const
{ {
return src > thresh ? maxVal : 0; return src > thresh ? maxVal : 0;
} }
...@@ -538,7 +538,7 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -538,7 +538,7 @@ namespace cv { namespace gpu { namespace mathfunc
{ {
ThreshBinaryInv(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {} ThreshBinaryInv(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
__device__ T operator()(const T& src) const __device__ __forceinline__ T operator()(const T& src) const
{ {
return src > thresh ? 0 : maxVal; return src > thresh ? 0 : maxVal;
} }
...@@ -552,7 +552,7 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -552,7 +552,7 @@ namespace cv { namespace gpu { namespace mathfunc
{ {
ThreshTrunc(T thresh_, T) : thresh(thresh_) {} ThreshTrunc(T thresh_, T) : thresh(thresh_) {}
__device__ T operator()(const T& src) const __device__ __forceinline__ T operator()(const T& src) const
{ {
return min(src, thresh); return min(src, thresh);
} }
...@@ -564,7 +564,7 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -564,7 +564,7 @@ namespace cv { namespace gpu { namespace mathfunc
{ {
ThreshTrunc(float thresh_, float) : thresh(thresh_) {} ThreshTrunc(float thresh_, float) : thresh(thresh_) {}
__device__ float operator()(const float& src) const __device__ __forceinline__ float operator()(const float& src) const
{ {
return fmin(src, thresh); return fmin(src, thresh);
} }
...@@ -576,7 +576,7 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -576,7 +576,7 @@ namespace cv { namespace gpu { namespace mathfunc
{ {
ThreshTrunc(double thresh_, double) : thresh(thresh_) {} ThreshTrunc(double thresh_, double) : thresh(thresh_) {}
__device__ double operator()(const double& src) const __device__ __forceinline__ double operator()(const double& src) const
{ {
return fmin(src, thresh); return fmin(src, thresh);
} }
...@@ -590,7 +590,7 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -590,7 +590,7 @@ namespace cv { namespace gpu { namespace mathfunc
public: public:
ThreshToZero(T thresh_, T) : thresh(thresh_) {} ThreshToZero(T thresh_, T) : thresh(thresh_) {}
__device__ T operator()(const T& src) const __device__ __forceinline__ T operator()(const T& src) const
{ {
return src > thresh ? src : 0; return src > thresh ? src : 0;
} }
...@@ -604,7 +604,7 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -604,7 +604,7 @@ namespace cv { namespace gpu { namespace mathfunc
public: public:
ThreshToZeroInv(T thresh_, T) : thresh(thresh_) {} ThreshToZeroInv(T thresh_, T) : thresh(thresh_) {}
__device__ T operator()(const T& src) const __device__ __forceinline__ T operator()(const T& src) const
{ {
return src > thresh ? 0 : src; return src > thresh ? 0 : src;
} }
......
...@@ -406,7 +406,7 @@ namespace bf_krnls ...@@ -406,7 +406,7 @@ namespace bf_krnls
template <int channels> template <int channels>
struct DistRgbMax struct DistRgbMax
{ {
static __device__ uchar calc(const uchar* a, const uchar* b) static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
{ {
uchar x = abs(a[0] - b[0]); uchar x = abs(a[0] - b[0]);
uchar y = abs(a[1] - b[1]); uchar y = abs(a[1] - b[1]);
...@@ -418,7 +418,7 @@ namespace bf_krnls ...@@ -418,7 +418,7 @@ namespace bf_krnls
template <> template <>
struct DistRgbMax<1> struct DistRgbMax<1>
{ {
static __device__ uchar calc(const uchar* a, const uchar* b) static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
{ {
return abs(a[0] - b[0]); return abs(a[0] - b[0]);
} }
......
...@@ -48,35 +48,35 @@ using namespace cv::gpu::device; ...@@ -48,35 +48,35 @@ using namespace cv::gpu::device;
namespace cv { namespace gpu { namespace imgproc { namespace cv { namespace gpu { namespace imgproc {
__device__ float sum(float v) { return v; } __device__ __forceinline__ float sum(float v) { return v; }
__device__ float sum(float2 v) { return v.x + v.y; } __device__ __forceinline__ float sum(float2 v) { return v.x + v.y; }
__device__ float sum(float3 v) { return v.x + v.y + v.z; } __device__ __forceinline__ float sum(float3 v) { return v.x + v.y + v.z; }
__device__ float sum(float4 v) { return v.x + v.y + v.z + v.w; } __device__ __forceinline__ float sum(float4 v) { return v.x + v.y + v.z + v.w; }
__device__ float first(float v) { return v; } __device__ __forceinline__ float first(float v) { return v; }
__device__ float first(float2 v) { return v.x; } __device__ __forceinline__ float first(float2 v) { return v.x; }
__device__ float first(float3 v) { return v.x; } __device__ __forceinline__ float first(float3 v) { return v.x; }
__device__ float first(float4 v) { return v.x; } __device__ __forceinline__ float first(float4 v) { return v.x; }
__device__ float mul(float a, float b) { return a * b; } __device__ __forceinline__ float mul(float a, float b) { return a * b; }
__device__ float2 mul(float2 a, float2 b) { return make_float2(a.x * b.x, a.y * b.y); } __device__ __forceinline__ float2 mul(float2 a, float2 b) { return make_float2(a.x * b.x, a.y * b.y); }
__device__ float3 mul(float3 a, float3 b) { return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); } __device__ __forceinline__ float3 mul(float3 a, float3 b) { return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); }
__device__ float4 mul(float4 a, float4 b) { return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); } __device__ __forceinline__ float4 mul(float4 a, float4 b) { return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); }
__device__ float mul(uchar a, uchar b) { return a * b; } __device__ __forceinline__ float mul(uchar a, uchar b) { return a * b; }
__device__ float2 mul(uchar2 a, uchar2 b) { return make_float2(a.x * b.x, a.y * b.y); } __device__ __forceinline__ float2 mul(uchar2 a, uchar2 b) { return make_float2(a.x * b.x, a.y * b.y); }
__device__ float3 mul(uchar3 a, uchar3 b) { return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); } __device__ __forceinline__ float3 mul(uchar3 a, uchar3 b) { return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); }
__device__ float4 mul(uchar4 a, uchar4 b) { return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); } __device__ __forceinline__ float4 mul(uchar4 a, uchar4 b) { return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); }
__device__ float sub(float a, float b) { return a - b; } __device__ __forceinline__ float sub(float a, float b) { return a - b; }
__device__ float2 sub(float2 a, float2 b) { return make_float2(a.x - b.x, a.y - b.y); } __device__ __forceinline__ float2 sub(float2 a, float2 b) { return make_float2(a.x - b.x, a.y - b.y); }
__device__ float3 sub(float3 a, float3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); } __device__ __forceinline__ float3 sub(float3 a, float3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); }
__device__ float4 sub(float4 a, float4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); } __device__ __forceinline__ float4 sub(float4 a, float4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); }
__device__ float sub(uchar a, uchar b) { return a - b; } __device__ __forceinline__ float sub(uchar a, uchar b) { return a - b; }
__device__ float2 sub(uchar2 a, uchar2 b) { return make_float2(a.x - b.x, a.y - b.y); } __device__ __forceinline__ float2 sub(uchar2 a, uchar2 b) { return make_float2(a.x - b.x, a.y - b.y); }
__device__ float3 sub(uchar3 a, uchar3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); } __device__ __forceinline__ float3 sub(uchar3 a, uchar3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); }
__device__ float4 sub(uchar4 a, uchar4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); } __device__ __forceinline__ float4 sub(uchar4 a, uchar4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); }
template <typename T, int cn> template <typename T, int cn>
......
...@@ -60,27 +60,27 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -60,27 +60,27 @@ namespace cv { namespace gpu { namespace mathfunc
{ {
struct Nothing struct Nothing
{ {
static __device__ void calc(int, int, float, float, float*, size_t, float) static __device__ __forceinline__ void calc(int, int, float, float, float*, size_t, float)
{ {
} }
}; };
struct Magnitude struct Magnitude
{ {
static __device__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float) static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
{ {
dst[y * dst_step + x] = sqrtf(x_data * x_data + y_data * y_data); dst[y * dst_step + x] = sqrtf(x_data * x_data + y_data * y_data);
} }
}; };
struct MagnitudeSqr struct MagnitudeSqr
{ {
static __device__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float) static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
{ {
dst[y * dst_step + x] = x_data * x_data + y_data * y_data; dst[y * dst_step + x] = x_data * x_data + y_data * y_data;
} }
}; };
struct Atan2 struct Atan2
{ {
static __device__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float scale) static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float scale)
{ {
dst[y * dst_step + x] = scale * atan2f(y_data, x_data); dst[y * dst_step + x] = scale * atan2f(y_data, x_data);
} }
...@@ -104,14 +104,14 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -104,14 +104,14 @@ namespace cv { namespace gpu { namespace mathfunc
struct NonEmptyMag struct NonEmptyMag
{ {
static __device__ float get(const float* mag, size_t mag_step, int x, int y) static __device__ __forceinline__ float get(const float* mag, size_t mag_step, int x, int y)
{ {
return mag[y * mag_step + x]; return mag[y * mag_step + x];
} }
}; };
struct EmptyMag struct EmptyMag
{ {
static __device__ float get(const float*, size_t, int, int) static __device__ __forceinline__ float get(const float*, size_t, int, int)
{ {
return 1.0f; return 1.0f;
} }
......
...@@ -123,14 +123,14 @@ namespace cv { namespace gpu { namespace matrix_operations { ...@@ -123,14 +123,14 @@ namespace cv { namespace gpu { namespace matrix_operations {
__constant__ float scalar_32f[4]; __constant__ float scalar_32f[4];
__constant__ double scalar_64f[4]; __constant__ double scalar_64f[4];
template <typename T> __device__ T readScalar(int i); template <typename T> __device__ __forceinline__ T readScalar(int i);
template <> __device__ uchar readScalar<uchar>(int i) {return scalar_8u[i];} template <> __device__ __forceinline__ uchar readScalar<uchar>(int i) {return scalar_8u[i];}
template <> __device__ schar readScalar<schar>(int i) {return scalar_8s[i];} template <> __device__ __forceinline__ schar readScalar<schar>(int i) {return scalar_8s[i];}
template <> __device__ ushort readScalar<ushort>(int i) {return scalar_16u[i];} template <> __device__ __forceinline__ ushort readScalar<ushort>(int i) {return scalar_16u[i];}
template <> __device__ short readScalar<short>(int i) {return scalar_16s[i];} template <> __device__ __forceinline__ short readScalar<short>(int i) {return scalar_16s[i];}
template <> __device__ int readScalar<int>(int i) {return scalar_32s[i];} template <> __device__ __forceinline__ int readScalar<int>(int i) {return scalar_32s[i];}
template <> __device__ float readScalar<float>(int i) {return scalar_32f[i];} template <> __device__ __forceinline__ float readScalar<float>(int i) {return scalar_32f[i];}
template <> __device__ double readScalar<double>(int i) {return scalar_64f[i];} template <> __device__ __forceinline__ double readScalar<double>(int i) {return scalar_64f[i];}
void writeScalar(const uchar* vals) void writeScalar(const uchar* vals)
{ {
...@@ -243,7 +243,7 @@ namespace cv { namespace gpu { namespace matrix_operations { ...@@ -243,7 +243,7 @@ namespace cv { namespace gpu { namespace matrix_operations {
public: public:
Convertor(double alpha_, double beta_) : alpha(alpha_), beta(beta_) {} Convertor(double alpha_, double beta_) : alpha(alpha_), beta(beta_) {}
__device__ D operator()(const T& src) __device__ __forceinline__ D operator()(const T& src)
{ {
return saturate_cast<D>(alpha * src + beta); return saturate_cast<D>(alpha * src + beta);
} }
......
...@@ -78,7 +78,7 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -78,7 +78,7 @@ namespace cv { namespace gpu { namespace mathfunc
{ {
explicit Mask8U(PtrStep mask): mask(mask) {} explicit Mask8U(PtrStep mask): mask(mask) {}
__device__ bool operator()(int y, int x) const __device__ __forceinline__ bool operator()(int y, int x) const
{ {
return mask.ptr(y)[x]; return mask.ptr(y)[x];
} }
...@@ -89,7 +89,7 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -89,7 +89,7 @@ namespace cv { namespace gpu { namespace mathfunc
struct MaskTrue struct MaskTrue
{ {
__device__ bool operator()(int y, int x) const __device__ __forceinline__ bool operator()(int y, int x) const
{ {
return true; return true;
} }
...@@ -153,7 +153,7 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -153,7 +153,7 @@ namespace cv { namespace gpu { namespace mathfunc
// Does min and max in shared memory // Does min and max in shared memory
template <typename T> template <typename T>
__device__ void merge(uint tid, uint offset, volatile T* minval, volatile T* maxval) __device__ __forceinline__ void merge(uint tid, uint offset, volatile T* minval, volatile T* maxval)
{ {
minval[tid] = min(minval[tid], minval[tid + offset]); minval[tid] = min(minval[tid], minval[tid + offset]);
maxval[tid] = max(maxval[tid], maxval[tid + offset]); maxval[tid] = max(maxval[tid], maxval[tid + offset]);
...@@ -976,16 +976,16 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -976,16 +976,16 @@ namespace cv { namespace gpu { namespace mathfunc
template <> struct SumType<double> { typedef double R; }; template <> struct SumType<double> { typedef double R; };
template <typename R> template <typename R>
struct IdentityOp { static __device__ R call(R x) { return x; } }; struct IdentityOp { static __device__ __forceinline__ R call(R x) { return x; } };
template <typename R> template <typename R>
struct AbsOp { static __device__ R call(R x) { return abs(x); } }; struct AbsOp { static __device__ __forceinline__ R call(R x) { return abs(x); } };
template <> template <>
struct AbsOp<uint> { static __device__ uint call(uint x) { return x; } }; struct AbsOp<uint> { static __device__ __forceinline__ uint call(uint x) { return x; } };
template <typename R> template <typename R>
struct SqrOp { static __device__ R call(R x) { return x * x; } }; struct SqrOp { static __device__ __forceinline__ R call(R x) { return x * x; } };
__constant__ int ctwidth; __constant__ int ctwidth;
__constant__ int ctheight; __constant__ int ctheight;
......
...@@ -68,7 +68,7 @@ __constant__ size_t cminSSD_step; ...@@ -68,7 +68,7 @@ __constant__ size_t cminSSD_step;
__constant__ int cwidth; __constant__ int cwidth;
__constant__ int cheight; __constant__ int cheight;
__device__ int SQ(int a) __device__ __forceinline__ int SQ(int a)
{ {
return a * a; return a * a;
} }
...@@ -419,7 +419,7 @@ extern "C" void prefilter_xsobel(const DevMem2D& input, const DevMem2D& output, ...@@ -419,7 +419,7 @@ extern "C" void prefilter_xsobel(const DevMem2D& input, const DevMem2D& output,
texture<unsigned char, 2, cudaReadModeNormalizedFloat> texForTF; texture<unsigned char, 2, cudaReadModeNormalizedFloat> texForTF;
__device__ float sobel(int x, int y) __device__ __forceinline__ float sobel(int x, int y)
{ {
float conv = tex2D(texForTF, x - 1, y - 1) * (-1) + tex2D(texForTF, x + 1, y - 1) * (1) + float conv = tex2D(texForTF, x - 1, y - 1) * (-1) + tex2D(texForTF, x + 1, y - 1) * (1) +
tex2D(texForTF, x - 1, y ) * (-2) + tex2D(texForTF, x + 1, y ) * (2) + tex2D(texForTF, x - 1, y ) * (-2) + tex2D(texForTF, x + 1, y ) * (2) +
......
...@@ -76,11 +76,11 @@ namespace cv { namespace gpu { namespace bp ...@@ -76,11 +76,11 @@ namespace cv { namespace gpu { namespace bp
template <int cn> struct PixDiff; template <int cn> struct PixDiff;
template <> struct PixDiff<1> template <> struct PixDiff<1>
{ {
__device__ PixDiff(const uchar* ls) __device__ __forceinline__ PixDiff(const uchar* ls)
{ {
l = *ls; l = *ls;
} }
__device__ float operator()(const uchar* rs) const __device__ __forceinline__ float operator()(const uchar* rs) const
{ {
return abs((int)l - *rs); return abs((int)l - *rs);
} }
...@@ -88,11 +88,11 @@ namespace cv { namespace gpu { namespace bp ...@@ -88,11 +88,11 @@ namespace cv { namespace gpu { namespace bp
}; };
template <> struct PixDiff<3> template <> struct PixDiff<3>
{ {
__device__ PixDiff(const uchar* ls) __device__ __forceinline__ PixDiff(const uchar* ls)
{ {
l = *((uchar3*)ls); l = *((uchar3*)ls);
} }
__device__ float operator()(const uchar* rs) const __device__ __forceinline__ float operator()(const uchar* rs) const
{ {
const float tr = 0.299f; const float tr = 0.299f;
const float tg = 0.587f; const float tg = 0.587f;
...@@ -108,11 +108,11 @@ namespace cv { namespace gpu { namespace bp ...@@ -108,11 +108,11 @@ namespace cv { namespace gpu { namespace bp
}; };
template <> struct PixDiff<4> template <> struct PixDiff<4>
{ {
__device__ PixDiff(const uchar* ls) __device__ __forceinline__ PixDiff(const uchar* ls)
{ {
l = *((uchar4*)ls); l = *((uchar4*)ls);
} }
__device__ float operator()(const uchar* rs) const __device__ __forceinline__ float operator()(const uchar* rs) const
{ {
const float tr = 0.299f; const float tr = 0.299f;
const float tg = 0.587f; const float tg = 0.587f;
......
...@@ -102,14 +102,14 @@ namespace cv { namespace gpu { namespace csbp ...@@ -102,14 +102,14 @@ namespace cv { namespace gpu { namespace csbp
template <int channels> struct DataCostPerPixel; template <int channels> struct DataCostPerPixel;
template <> struct DataCostPerPixel<1> template <> struct DataCostPerPixel<1>
{ {
static __device__ float compute(const uchar* left, const uchar* right) static __device__ __forceinline__ float compute(const uchar* left, const uchar* right)
{ {
return fmin(cdata_weight * abs((int)*left - *right), cdata_weight * cmax_data_term); return fmin(cdata_weight * abs((int)*left - *right), cdata_weight * cmax_data_term);
} }
}; };
template <> struct DataCostPerPixel<3> template <> struct DataCostPerPixel<3>
{ {
static __device__ float compute(const uchar* left, const uchar* right) static __device__ __forceinline__ float compute(const uchar* left, const uchar* right)
{ {
float tb = 0.114f * abs((int)left[0] - right[0]); float tb = 0.114f * abs((int)left[0] - right[0]);
float tg = 0.587f * abs((int)left[1] - right[1]); float tg = 0.587f * abs((int)left[1] - right[1]);
...@@ -120,7 +120,7 @@ namespace cv { namespace gpu { namespace csbp ...@@ -120,7 +120,7 @@ namespace cv { namespace gpu { namespace csbp
}; };
template <> struct DataCostPerPixel<4> template <> struct DataCostPerPixel<4>
{ {
static __device__ float compute(const uchar* left, const uchar* right) static __device__ __forceinline__ float compute(const uchar* left, const uchar* right)
{ {
uchar4 l = *((const uchar4*)left); uchar4 l = *((const uchar4*)left);
uchar4 r = *((const uchar4*)right); uchar4 r = *((const uchar4*)right);
......
...@@ -122,7 +122,7 @@ namespace cv { namespace gpu { namespace surf ...@@ -122,7 +122,7 @@ namespace cv { namespace gpu { namespace surf
__constant__ float c_DY [3][5] = { {2, 0, 7, 3, 1}, {2, 3, 7, 6, -2}, {2, 6, 7, 9, 1} }; __constant__ float c_DY [3][5] = { {2, 0, 7, 3, 1}, {2, 3, 7, 6, -2}, {2, 6, 7, 9, 1} };
__constant__ float c_DXY[4][5] = { {1, 1, 4, 4, 1}, {5, 1, 8, 4, -1}, {1, 5, 4, 8, -1}, {5, 5, 8, 8, 1} }; __constant__ float c_DXY[4][5] = { {1, 1, 4, 4, 1}, {5, 1, 8, 4, -1}, {1, 5, 4, 8, -1}, {5, 5, 8, 8, 1} };
__host__ __device__ int calcSize(int octave, int layer) __host__ __device__ __forceinline__ int calcSize(int octave, int layer)
{ {
/* Wavelet size at first layer of first octave. */ /* Wavelet size at first layer of first octave. */
const int HAAR_SIZE0 = 9; const int HAAR_SIZE0 = 9;
...@@ -189,7 +189,7 @@ namespace cv { namespace gpu { namespace surf ...@@ -189,7 +189,7 @@ namespace cv { namespace gpu { namespace surf
struct WithOutMask struct WithOutMask
{ {
static __device__ bool check(int, int, int) static __device__ __forceinline__ bool check(int, int, int)
{ {
return true; return true;
} }
...@@ -708,7 +708,7 @@ namespace cv { namespace gpu { namespace surf ...@@ -708,7 +708,7 @@ namespace cv { namespace gpu { namespace surf
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f 3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f
}; };
__device__ unsigned char calcWin(int i, int j, float centerX, float centerY, float win_offset, float cos_dir, float sin_dir) __device__ __forceinline__ unsigned char calcWin(int i, int j, float centerX, float centerY, float win_offset, float cos_dir, float sin_dir)
{ {
float pixel_x = centerX + (win_offset + j) * cos_dir + (win_offset + i) * sin_dir; float pixel_x = centerX + (win_offset + j) * cos_dir + (win_offset + i) * sin_dir;
float pixel_y = centerY - (win_offset + j) * sin_dir + (win_offset + i) * cos_dir; float pixel_y = centerY - (win_offset + j) * sin_dir + (win_offset + i) * cos_dir;
......
...@@ -40,30 +40,29 @@ ...@@ -40,30 +40,29 @@
// //
//M*/ //M*/
#ifndef __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
#define __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
#include "opencv2/gpu/device/saturate_cast.hpp" #include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/vecmath.hpp" #include "opencv2/gpu/device/vecmath.hpp"
namespace cv namespace cv { namespace gpu { namespace device
{ {
namespace gpu
{
namespace device
{
struct BrdReflect101 struct BrdReflect101
{ {
explicit BrdReflect101(int len): last(len - 1) {} explicit BrdReflect101(int len): last(len - 1) {}
__device__ int idx_low(int i) const __device__ __forceinline__ int idx_low(int i) const
{ {
return abs(i); return abs(i);
} }
__device__ int idx_high(int i) const __device__ __forceinline__ int idx_high(int i) const
{ {
return last - abs(last - i); return last - abs(last - i);
} }
__device__ int idx(int i) const __device__ __forceinline__ int idx(int i) const
{ {
return idx_low(idx_high(i)); return idx_low(idx_high(i));
} }
...@@ -84,13 +83,13 @@ namespace cv ...@@ -84,13 +83,13 @@ namespace cv
explicit BrdRowReflect101(int len): BrdReflect101(len) {} explicit BrdRowReflect101(int len): BrdReflect101(len) {}
template <typename T> template <typename T>
__device__ D at_low(int i, const T* data) const __device__ __forceinline__ D at_low(int i, const T* data) const
{ {
return saturate_cast<D>(data[idx_low(i)]); return saturate_cast<D>(data[idx_low(i)]);
} }
template <typename T> template <typename T>
__device__ D at_high(int i, const T* data) const __device__ __forceinline__ D at_high(int i, const T* data) const
{ {
return saturate_cast<D>(data[idx_high(i)]); return saturate_cast<D>(data[idx_high(i)]);
} }
...@@ -103,13 +102,13 @@ namespace cv ...@@ -103,13 +102,13 @@ namespace cv
BrdColReflect101(int len, int step): BrdReflect101(len), step(step) {} BrdColReflect101(int len, int step): BrdReflect101(len), step(step) {}
template <typename T> template <typename T>
__device__ D at_low(int i, const T* data) const __device__ __forceinline__ D at_low(int i, const T* data) const
{ {
return saturate_cast<D>(data[idx_low(i) * step]); return saturate_cast<D>(data[idx_low(i) * step]);
} }
template <typename T> template <typename T>
__device__ D at_high(int i, const T* data) const __device__ __forceinline__ D at_high(int i, const T* data) const
{ {
return saturate_cast<D>(data[idx_high(i) * step]); return saturate_cast<D>(data[idx_high(i) * step]);
} }
...@@ -123,17 +122,17 @@ namespace cv ...@@ -123,17 +122,17 @@ namespace cv
{ {
explicit BrdReplicate(int len): last(len - 1) {} explicit BrdReplicate(int len): last(len - 1) {}
__device__ int idx_low(int i) const __device__ __forceinline__ int idx_low(int i) const
{ {
return max(i, 0); return max(i, 0);
} }
__device__ int idx_high(int i) const __device__ __forceinline__ int idx_high(int i) const
{ {
return min(i, last); return min(i, last);
} }
__device__ int idx(int i) const __device__ __forceinline__ int idx(int i) const
{ {
return idx_low(idx_high(i)); return idx_low(idx_high(i));
} }
...@@ -154,13 +153,13 @@ namespace cv ...@@ -154,13 +153,13 @@ namespace cv
explicit BrdRowReplicate(int len): BrdReplicate(len) {} explicit BrdRowReplicate(int len): BrdReplicate(len) {}
template <typename T> template <typename T>
__device__ D at_low(int i, const T* data) const __device__ __forceinline__ D at_low(int i, const T* data) const
{ {
return saturate_cast<D>(data[idx_low(i)]); return saturate_cast<D>(data[idx_low(i)]);
} }
template <typename T> template <typename T>
__device__ D at_high(int i, const T* data) const __device__ __forceinline__ D at_high(int i, const T* data) const
{ {
return saturate_cast<D>(data[idx_high(i)]); return saturate_cast<D>(data[idx_high(i)]);
} }
...@@ -173,13 +172,13 @@ namespace cv ...@@ -173,13 +172,13 @@ namespace cv
BrdColReplicate(int len, int step): BrdReplicate(len), step(step) {} BrdColReplicate(int len, int step): BrdReplicate(len), step(step) {}
template <typename T> template <typename T>
__device__ D at_low(int i, const T* data) const __device__ __forceinline__ D at_low(int i, const T* data) const
{ {
return saturate_cast<D>(data[idx_low(i) * step]); return saturate_cast<D>(data[idx_low(i) * step]);
} }
template <typename T> template <typename T>
__device__ D at_high(int i, const T* data) const __device__ __forceinline__ D at_high(int i, const T* data) const
{ {
return saturate_cast<D>(data[idx_high(i) * step]); return saturate_cast<D>(data[idx_high(i) * step]);
} }
...@@ -194,13 +193,13 @@ namespace cv ...@@ -194,13 +193,13 @@ namespace cv
explicit BrdRowConstant(int len_, const D& val_ = VecTraits<D>::all(0)): len(len_), val(val_) {} explicit BrdRowConstant(int len_, const D& val_ = VecTraits<D>::all(0)): len(len_), val(val_) {}
template <typename T> template <typename T>
__device__ D at_low(int i, const T* data) const __device__ __forceinline__ D at_low(int i, const T* data) const
{ {
return i >= 0 ? saturate_cast<D>(data[i]) : val; return i >= 0 ? saturate_cast<D>(data[i]) : val;
} }
template <typename T> template <typename T>
__device__ D at_high(int i, const T* data) const __device__ __forceinline__ D at_high(int i, const T* data) const
{ {
return i < len ? saturate_cast<D>(data[i]) : val; return i < len ? saturate_cast<D>(data[i]) : val;
} }
...@@ -221,13 +220,13 @@ namespace cv ...@@ -221,13 +220,13 @@ namespace cv
BrdColConstant(int len_, int step_, const D& val_ = VecTraits<D>::all(0)): len(len_), step(step_), val(val_) {} BrdColConstant(int len_, int step_, const D& val_ = VecTraits<D>::all(0)): len(len_), step(step_), val(val_) {}
template <typename T> template <typename T>
__device__ D at_low(int i, const T* data) const __device__ __forceinline__ D at_low(int i, const T* data) const
{ {
return i >= 0 ? saturate_cast<D>(data[i * step]) : val; return i >= 0 ? saturate_cast<D>(data[i * step]) : val;
} }
template <typename T> template <typename T>
__device__ D at_high(int i, const T* data) const __device__ __forceinline__ D at_high(int i, const T* data) const
{ {
return i < len ? saturate_cast<D>(data[i * step]) : val; return i < len ? saturate_cast<D>(data[i * step]) : val;
} }
...@@ -242,6 +241,6 @@ namespace cv ...@@ -242,6 +241,6 @@ namespace cv
int step; int step;
D val; D val;
}; };
} }}}
}
} #endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
\ No newline at end of file
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if __CUDA_ARCH__ >= 200 #ifndef __OPENCV_GPU_DATAMOV_UTILS_HPP__
#define __OPENCV_GPU_DATAMOV_UTILS_HPP__
#include "internal_shared.hpp"
namespace cv { namespace gpu { namespace device
{
#if __CUDA_ARCH__ >= 200
// for Fermi memory space is detected automatically // for Fermi memory space is detected automatically
template <typename T> struct ForceGlobLoad template <typename T> struct ForceGlob
{ {
__device__ __forceinline__ static void Ld(T* ptr, int offset, T& val) { val = d_ptr[offset]; } __device__ __forceinline__ static void Load(const T* ptr, int offset, T& val) { val = d_ptr[offset]; }
}; };
#else #else // __CUDA_ARCH__ >= 200
#if defined(_WIN64) || defined(__LP64__) #if defined(_WIN64) || defined(__LP64__)
// 64-bit register modifier for inlined asm // 64-bit register modifier for inlined asm
...@@ -18,22 +65,41 @@ ...@@ -18,22 +65,41 @@
#define _OPENCV_ASM_PTR_ "r" #define _OPENCV_ASM_PTR_ "r"
#endif #endif
template<class T> struct ForceGlobLoad; template<class T> struct ForceGlob;
#define DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
#define DEFINE_FORCE_GLOB_LOAD(base_type, ptx_type, reg_mod) \ template <> struct ForceGlob<base_type> \
template <> struct ForceGlobLoad<base_type> \
{ \ { \
__device__ __forceinline__ static void Ld(type* ptr, int offset, type& val) \ __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
{ \ { \
asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : _OPENCV_ASM_PTR_(d_ptr + offset)); \ asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : _OPENCV_ASM_PTR_(ptr + offset)); \
} \ } \
}; };
#define DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
template <> struct ForceGlob<base_type> \
{ \
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
{ \
asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : _OPENCV_ASM_PTR_(ptr + offset)); \
} \
};
DEFINE_FORCE_GLOB_B(uchar, u8)
DEFINE_FORCE_GLOB_B(schar, s8)
DEFINE_FORCE_GLOB_B(char, b8)
DEFINE_FORCE_GLOB (ushort, u16, h)
DEFINE_FORCE_GLOB (short, s16, h)
DEFINE_FORCE_GLOB (uint, u32, r)
DEFINE_FORCE_GLOB (int, s32, r)
DEFINE_FORCE_GLOB (float, f32, f)
DEFINE_FORCE_GLOB (double, f64, d)
DEFINE_FORCE_GLOB_LOAD(int, s32, r)
DEFINE_FORCE_GLOB_LOAD(float, f32, f)
#undef DEFINE_FORCE_GLOB
#undef DEFINE_FORCE_GLOB_B
#undef _OPENCV_ASM_PTR_
#undef DEFINE_FORCE_GLOB_LOAD #endif // __CUDA_ARCH__ >= 200
}}}
#endif #endif // __OPENCV_GPU_DATAMOV_UTILS_HPP__
...@@ -40,22 +40,20 @@ ...@@ -40,22 +40,20 @@
// //
//M*/ //M*/
#ifndef __OPENCV_GPU_DYNAMIC_SMEM_HPP__
#define __OPENCV_GPU_DYNAMIC_SMEM_HPP__
namespace cv namespace cv { namespace gpu { namespace device
{ {
namespace gpu
{
namespace device
{
template<class T> struct DynamicSharedMem template<class T> struct DynamicSharedMem
{ {
__device__ operator T*() __device__ __forceinline__ operator T*()
{ {
extern __shared__ int __smem[]; extern __shared__ int __smem[];
return (T*)__smem; return (T*)__smem;
} }
__device__ operator const T*() const __device__ __forceinline__ operator const T*() const
{ {
extern __shared__ int __smem[]; extern __shared__ int __smem[];
return (T*)__smem; return (T*)__smem;
...@@ -65,19 +63,18 @@ namespace cv ...@@ -65,19 +63,18 @@ namespace cv
// specialize for double to avoid unaligned memory access compile errors // specialize for double to avoid unaligned memory access compile errors
template<> struct DynamicSharedMem<double> template<> struct DynamicSharedMem<double>
{ {
__device__ operator double*() __device__ __forceinline__ operator double*()
{ {
extern __shared__ double __smem_d[]; extern __shared__ double __smem_d[];
return (double*)__smem_d; return (double*)__smem_d;
} }
__device__ operator const double*() const __device__ __forceinline__ operator const double*() const
{ {
extern __shared__ double __smem_d[]; extern __shared__ double __smem_d[];
return (double*)__smem_d; return (double*)__smem_d;
} }
}; };
} }}}
} #endif // __OPENCV_GPU_DYNAMIC_SMEM_HPP__
}
\ No newline at end of file
...@@ -40,108 +40,106 @@ ...@@ -40,108 +40,106 @@
// //
//M*/ //M*/
#ifndef __OPENCV_GPU_LIMITS_GPU_HPP__
#define __OPENCV_GPU_LIMITS_GPU_HPP__
namespace cv namespace cv { namespace gpu { namespace device
{ {
namespace gpu
{
namespace device
{
template<class T> struct numeric_limits_gpu template<class T> struct numeric_limits_gpu
{ {
typedef T type; typedef T type;
__device__ static type min() { return type(); }; __device__ __forceinline__ static type min() { return type(); };
__device__ static type max() { return type(); }; __device__ __forceinline__ static type max() { return type(); };
__device__ static type epsilon() { return type(); } __device__ __forceinline__ static type epsilon() { return type(); }
__device__ static type round_error() { return type(); } __device__ __forceinline__ static type round_error() { return type(); }
__device__ static type denorm_min() { return type(); } __device__ __forceinline__ static type denorm_min() { return type(); }
__device__ static type infinity() { return type(); } __device__ __forceinline__ static type infinity() { return type(); }
__device__ static type quiet_NaN() { return type(); } __device__ __forceinline__ static type quiet_NaN() { return type(); }
__device__ static type signaling_NaN() { return T(); } __device__ __forceinline__ static type signaling_NaN() { return T(); }
static const bool is_signed; static const bool is_signed;
}; };
template<> struct numeric_limits_gpu<bool> template<> struct numeric_limits_gpu<bool>
{ {
typedef bool type; typedef bool type;
__device__ static type min() { return false; }; __device__ __forceinline__ static type min() { return false; };
__device__ static type max() { return true; }; __device__ __forceinline__ static type max() { return true; };
__device__ static type epsilon(); __device__ __forceinline__ static type epsilon();
__device__ static type round_error(); __device__ __forceinline__ static type round_error();
__device__ static type denorm_min(); __device__ __forceinline__ static type denorm_min();
__device__ static type infinity(); __device__ __forceinline__ static type infinity();
__device__ static type quiet_NaN(); __device__ __forceinline__ static type quiet_NaN();
__device__ static type signaling_NaN(); __device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false; static const bool is_signed = false;
}; };
template<> struct numeric_limits_gpu<char> template<> struct numeric_limits_gpu<char>
{ {
typedef char type; typedef char type;
__device__ static type min() { return CHAR_MIN; }; __device__ __forceinline__ static type min() { return CHAR_MIN; };
__device__ static type max() { return CHAR_MAX; }; __device__ __forceinline__ static type max() { return CHAR_MAX; };
__device__ static type epsilon(); __device__ __forceinline__ static type epsilon();
__device__ static type round_error(); __device__ __forceinline__ static type round_error();
__device__ static type denorm_min(); __device__ __forceinline__ static type denorm_min();
__device__ static type infinity(); __device__ __forceinline__ static type infinity();
__device__ static type quiet_NaN(); __device__ __forceinline__ static type quiet_NaN();
__device__ static type signaling_NaN(); __device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = (char)-1 == -1; static const bool is_signed = (char)-1 == -1;
}; };
template<> struct numeric_limits_gpu<unsigned char> template<> struct numeric_limits_gpu<unsigned char>
{ {
typedef unsigned char type; typedef unsigned char type;
__device__ static type min() { return 0; }; __device__ __forceinline__ static type min() { return 0; };
__device__ static type max() { return UCHAR_MAX; }; __device__ __forceinline__ static type max() { return UCHAR_MAX; };
__device__ static type epsilon(); __device__ __forceinline__ static type epsilon();
__device__ static type round_error(); __device__ __forceinline__ static type round_error();
__device__ static type denorm_min(); __device__ __forceinline__ static type denorm_min();
__device__ static type infinity(); __device__ __forceinline__ static type infinity();
__device__ static type quiet_NaN(); __device__ __forceinline__ static type quiet_NaN();
__device__ static type signaling_NaN(); __device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false; static const bool is_signed = false;
}; };
template<> struct numeric_limits_gpu<short> template<> struct numeric_limits_gpu<short>
{ {
typedef short type; typedef short type;
__device__ static type min() { return SHRT_MIN; }; __device__ __forceinline__ static type min() { return SHRT_MIN; };
__device__ static type max() { return SHRT_MAX; }; __device__ __forceinline__ static type max() { return SHRT_MAX; };
__device__ static type epsilon(); __device__ __forceinline__ static type epsilon();
__device__ static type round_error(); __device__ __forceinline__ static type round_error();
__device__ static type denorm_min(); __device__ __forceinline__ static type denorm_min();
__device__ static type infinity(); __device__ __forceinline__ static type infinity();
__device__ static type quiet_NaN(); __device__ __forceinline__ static type quiet_NaN();
__device__ static type signaling_NaN(); __device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true; static const bool is_signed = true;
}; };
template<> struct numeric_limits_gpu<unsigned short> template<> struct numeric_limits_gpu<unsigned short>
{ {
typedef unsigned short type; typedef unsigned short type;
__device__ static type min() { return 0; }; __device__ __forceinline__ static type min() { return 0; };
__device__ static type max() { return USHRT_MAX; }; __device__ __forceinline__ static type max() { return USHRT_MAX; };
__device__ static type epsilon(); __device__ __forceinline__ static type epsilon();
__device__ static type round_error(); __device__ __forceinline__ static type round_error();
__device__ static type denorm_min(); __device__ __forceinline__ static type denorm_min();
__device__ static type infinity(); __device__ __forceinline__ static type infinity();
__device__ static type quiet_NaN(); __device__ __forceinline__ static type quiet_NaN();
__device__ static type signaling_NaN(); __device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false; static const bool is_signed = false;
}; };
template<> struct numeric_limits_gpu<int> template<> struct numeric_limits_gpu<int>
{ {
typedef int type; typedef int type;
__device__ static type min() { return INT_MIN; }; __device__ __forceinline__ static type min() { return INT_MIN; };
__device__ static type max() { return INT_MAX; }; __device__ __forceinline__ static type max() { return INT_MAX; };
__device__ static type epsilon(); __device__ __forceinline__ static type epsilon();
__device__ static type round_error(); __device__ __forceinline__ static type round_error();
__device__ static type denorm_min(); __device__ __forceinline__ static type denorm_min();
__device__ static type infinity(); __device__ __forceinline__ static type infinity();
__device__ static type quiet_NaN(); __device__ __forceinline__ static type quiet_NaN();
__device__ static type signaling_NaN(); __device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true; static const bool is_signed = true;
}; };
...@@ -149,72 +147,72 @@ namespace cv ...@@ -149,72 +147,72 @@ namespace cv
template<> struct numeric_limits_gpu<unsigned int> template<> struct numeric_limits_gpu<unsigned int>
{ {
typedef unsigned int type; typedef unsigned int type;
__device__ static type min() { return 0; }; __device__ __forceinline__ static type min() { return 0; };
__device__ static type max() { return UINT_MAX; }; __device__ __forceinline__ static type max() { return UINT_MAX; };
__device__ static type epsilon(); __device__ __forceinline__ static type epsilon();
__device__ static type round_error(); __device__ __forceinline__ static type round_error();
__device__ static type denorm_min(); __device__ __forceinline__ static type denorm_min();
__device__ static type infinity(); __device__ __forceinline__ static type infinity();
__device__ static type quiet_NaN(); __device__ __forceinline__ static type quiet_NaN();
__device__ static type signaling_NaN(); __device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false; static const bool is_signed = false;
}; };
template<> struct numeric_limits_gpu<long> template<> struct numeric_limits_gpu<long>
{ {
typedef long type; typedef long type;
__device__ static type min() { return LONG_MIN; }; __device__ __forceinline__ static type min() { return LONG_MIN; };
__device__ static type max() { return LONG_MAX; }; __device__ __forceinline__ static type max() { return LONG_MAX; };
__device__ static type epsilon(); __device__ __forceinline__ static type epsilon();
__device__ static type round_error(); __device__ __forceinline__ static type round_error();
__device__ static type denorm_min(); __device__ __forceinline__ static type denorm_min();
__device__ static type infinity(); __device__ __forceinline__ static type infinity();
__device__ static type quiet_NaN(); __device__ __forceinline__ static type quiet_NaN();
__device__ static type signaling_NaN(); __device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true; static const bool is_signed = true;
}; };
template<> struct numeric_limits_gpu<unsigned long> template<> struct numeric_limits_gpu<unsigned long>
{ {
typedef unsigned long type; typedef unsigned long type;
__device__ static type min() { return 0; }; __device__ __forceinline__ static type min() { return 0; };
__device__ static type max() { return ULONG_MAX; }; __device__ __forceinline__ static type max() { return ULONG_MAX; };
__device__ static type epsilon(); __device__ __forceinline__ static type epsilon();
__device__ static type round_error(); __device__ __forceinline__ static type round_error();
__device__ static type denorm_min(); __device__ __forceinline__ static type denorm_min();
__device__ static type infinity(); __device__ __forceinline__ static type infinity();
__device__ static type quiet_NaN(); __device__ __forceinline__ static type quiet_NaN();
__device__ static type signaling_NaN(); __device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false; static const bool is_signed = false;
}; };
template<> struct numeric_limits_gpu<float> template<> struct numeric_limits_gpu<float>
{ {
typedef float type; typedef float type;
__device__ static type min() { return 1.175494351e-38f/*FLT_MIN*/; }; __device__ __forceinline__ static type min() { return 1.175494351e-38f/*FLT_MIN*/; };
__device__ static type max() { return 3.402823466e+38f/*FLT_MAX*/; }; __device__ __forceinline__ static type max() { return 3.402823466e+38f/*FLT_MAX*/; };
__device__ static type epsilon() { return 1.192092896e-07f/*FLT_EPSILON*/; }; __device__ __forceinline__ static type epsilon() { return 1.192092896e-07f/*FLT_EPSILON*/; };
__device__ static type round_error(); __device__ __forceinline__ static type round_error();
__device__ static type denorm_min(); __device__ __forceinline__ static type denorm_min();
__device__ static type infinity(); __device__ __forceinline__ static type infinity();
__device__ static type quiet_NaN(); __device__ __forceinline__ static type quiet_NaN();
__device__ static type signaling_NaN(); __device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true; static const bool is_signed = true;
}; };
template<> struct numeric_limits_gpu<double> template<> struct numeric_limits_gpu<double>
{ {
typedef double type; typedef double type;
__device__ static type min() { return 2.2250738585072014e-308/*DBL_MIN*/; }; __device__ __forceinline__ static type min() { return 2.2250738585072014e-308/*DBL_MIN*/; };
__device__ static type max() { return 1.7976931348623158e+308/*DBL_MAX*/; }; __device__ __forceinline__ static type max() { return 1.7976931348623158e+308/*DBL_MAX*/; };
__device__ static type epsilon(); __device__ __forceinline__ static type epsilon();
__device__ static type round_error(); __device__ __forceinline__ static type round_error();
__device__ static type denorm_min(); __device__ __forceinline__ static type denorm_min();
__device__ static type infinity(); __device__ __forceinline__ static type infinity();
__device__ static type quiet_NaN(); __device__ __forceinline__ static type quiet_NaN();
__device__ static type signaling_NaN(); __device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true; static const bool is_signed = true;
}; };
} }}}
}
} #endif // __OPENCV_GPU_LIMITS_GPU_HPP__
\ No newline at end of file
...@@ -51,29 +51,29 @@ namespace cv ...@@ -51,29 +51,29 @@ namespace cv
{ {
namespace device namespace device
{ {
template<typename _Tp> static __device__ _Tp saturate_cast(uchar v) { return _Tp(v); } template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
template<typename _Tp> static __device__ _Tp saturate_cast(schar v) { return _Tp(v); } template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
template<typename _Tp> static __device__ _Tp saturate_cast(ushort v) { return _Tp(v); } template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); }
template<typename _Tp> static __device__ _Tp saturate_cast(short v) { return _Tp(v); } template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(short v) { return _Tp(v); }
template<typename _Tp> static __device__ _Tp saturate_cast(uint v) { return _Tp(v); } template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(uint v) { return _Tp(v); }
template<typename _Tp> static __device__ _Tp saturate_cast(int v) { return _Tp(v); } template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(int v) { return _Tp(v); }
template<typename _Tp> static __device__ _Tp saturate_cast(float v) { return _Tp(v); } template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(float v) { return _Tp(v); }
template<typename _Tp> static __device__ _Tp saturate_cast(double v) { return _Tp(v); } template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(double v) { return _Tp(v); }
template<> static __device__ uchar saturate_cast<uchar>(schar v) template<> static __device__ __forceinline__ uchar saturate_cast<uchar>(schar v)
{ return (uchar)max((int)v, 0); } { return (uchar)max((int)v, 0); }
template<> static __device__ uchar saturate_cast<uchar>(ushort v) template<> static __device__ __forceinline__ uchar saturate_cast<uchar>(ushort v)
{ return (uchar)min((uint)v, (uint)UCHAR_MAX); } { return (uchar)min((uint)v, (uint)UCHAR_MAX); }
template<> static __device__ uchar saturate_cast<uchar>(int v) template<> static __device__ __forceinline__ uchar saturate_cast<uchar>(int v)
{ return (uchar)((uint)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); } { return (uchar)((uint)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
template<> static __device__ uchar saturate_cast<uchar>(uint v) template<> static __device__ __forceinline__ uchar saturate_cast<uchar>(uint v)
{ return (uchar)min(v, (uint)UCHAR_MAX); } { return (uchar)min(v, (uint)UCHAR_MAX); }
template<> static __device__ uchar saturate_cast<uchar>(short v) template<> static __device__ __forceinline__ uchar saturate_cast<uchar>(short v)
{ return saturate_cast<uchar>((uint)v); } { return saturate_cast<uchar>((uint)v); }
template<> static __device__ uchar saturate_cast<uchar>(float v) template<> static __device__ __forceinline__ uchar saturate_cast<uchar>(float v)
{ int iv = __float2int_rn(v); return saturate_cast<uchar>(iv); } { int iv = __float2int_rn(v); return saturate_cast<uchar>(iv); }
template<> static __device__ uchar saturate_cast<uchar>(double v) template<> static __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
{ {
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130 #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
int iv = __double2int_rn(v); return saturate_cast<uchar>(iv); int iv = __double2int_rn(v); return saturate_cast<uchar>(iv);
...@@ -82,23 +82,23 @@ namespace cv ...@@ -82,23 +82,23 @@ namespace cv
#endif #endif
} }
template<> static __device__ schar saturate_cast<schar>(uchar v) template<> static __device__ __forceinline__ schar saturate_cast<schar>(uchar v)
{ return (schar)min((int)v, SCHAR_MAX); } { return (schar)min((int)v, SCHAR_MAX); }
template<> static __device__ schar saturate_cast<schar>(ushort v) template<> static __device__ __forceinline__ schar saturate_cast<schar>(ushort v)
{ return (schar)min((uint)v, (uint)SCHAR_MAX); } { return (schar)min((uint)v, (uint)SCHAR_MAX); }
template<> static __device__ schar saturate_cast<schar>(int v) template<> static __device__ __forceinline__ schar saturate_cast<schar>(int v)
{ {
return (schar)((uint)(v-SCHAR_MIN) <= (uint)UCHAR_MAX ? return (schar)((uint)(v-SCHAR_MIN) <= (uint)UCHAR_MAX ?
v : v > 0 ? SCHAR_MAX : SCHAR_MIN); v : v > 0 ? SCHAR_MAX : SCHAR_MIN);
} }
template<> static __device__ schar saturate_cast<schar>(short v) template<> static __device__ __forceinline__ schar saturate_cast<schar>(short v)
{ return saturate_cast<schar>((int)v); } { return saturate_cast<schar>((int)v); }
template<> static __device__ schar saturate_cast<schar>(uint v) template<> static __device__ __forceinline__ schar saturate_cast<schar>(uint v)
{ return (schar)min(v, (uint)SCHAR_MAX); } { return (schar)min(v, (uint)SCHAR_MAX); }
template<> static __device__ schar saturate_cast<schar>(float v) template<> static __device__ __forceinline__ schar saturate_cast<schar>(float v)
{ int iv = __float2int_rn(v); return saturate_cast<schar>(iv); } { int iv = __float2int_rn(v); return saturate_cast<schar>(iv); }
template<> static __device__ schar saturate_cast<schar>(double v) template<> static __device__ __forceinline__ schar saturate_cast<schar>(double v)
{ {
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130 #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
int iv = __double2int_rn(v); return saturate_cast<schar>(iv); int iv = __double2int_rn(v); return saturate_cast<schar>(iv);
...@@ -107,17 +107,17 @@ namespace cv ...@@ -107,17 +107,17 @@ namespace cv
#endif #endif
} }
template<> static __device__ ushort saturate_cast<ushort>(schar v) template<> static __device__ __forceinline__ ushort saturate_cast<ushort>(schar v)
{ return (ushort)max((int)v, 0); } { return (ushort)max((int)v, 0); }
template<> static __device__ ushort saturate_cast<ushort>(short v) template<> static __device__ __forceinline__ ushort saturate_cast<ushort>(short v)
{ return (ushort)max((int)v, 0); } { return (ushort)max((int)v, 0); }
template<> static __device__ ushort saturate_cast<ushort>(int v) template<> static __device__ __forceinline__ ushort saturate_cast<ushort>(int v)
{ return (ushort)((uint)v <= (uint)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); } { return (ushort)((uint)v <= (uint)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }
template<> static __device__ ushort saturate_cast<ushort>(uint v) template<> static __device__ __forceinline__ ushort saturate_cast<ushort>(uint v)
{ return (ushort)min(v, (uint)USHRT_MAX); } { return (ushort)min(v, (uint)USHRT_MAX); }
template<> static __device__ ushort saturate_cast<ushort>(float v) template<> static __device__ __forceinline__ ushort saturate_cast<ushort>(float v)
{ int iv = __float2int_rn(v); return saturate_cast<ushort>(iv); } { int iv = __float2int_rn(v); return saturate_cast<ushort>(iv); }
template<> static __device__ ushort saturate_cast<ushort>(double v) template<> static __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
{ {
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130 #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
int iv = __double2int_rn(v); return saturate_cast<ushort>(iv); int iv = __double2int_rn(v); return saturate_cast<ushort>(iv);
...@@ -126,18 +126,18 @@ namespace cv ...@@ -126,18 +126,18 @@ namespace cv
#endif #endif
} }
template<> static __device__ short saturate_cast<short>(ushort v) template<> static __device__ __forceinline__ short saturate_cast<short>(ushort v)
{ return (short)min((int)v, SHRT_MAX); } { return (short)min((int)v, SHRT_MAX); }
template<> static __device__ short saturate_cast<short>(int v) template<> static __device__ __forceinline__ short saturate_cast<short>(int v)
{ {
return (short)((uint)(v - SHRT_MIN) <= (uint)USHRT_MAX ? return (short)((uint)(v - SHRT_MIN) <= (uint)USHRT_MAX ?
v : v > 0 ? SHRT_MAX : SHRT_MIN); v : v > 0 ? SHRT_MAX : SHRT_MIN);
} }
template<> static __device__ short saturate_cast<short>(uint v) template<> static __device__ __forceinline__ short saturate_cast<short>(uint v)
{ return (short)min(v, (uint)SHRT_MAX); } { return (short)min(v, (uint)SHRT_MAX); }
template<> static __device__ short saturate_cast<short>(float v) template<> static __device__ __forceinline__ short saturate_cast<short>(float v)
{ int iv = __float2int_rn(v); return saturate_cast<short>(iv); } { int iv = __float2int_rn(v); return saturate_cast<short>(iv); }
template<> static __device__ short saturate_cast<short>(double v) template<> static __device__ __forceinline__ short saturate_cast<short>(double v)
{ {
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130 #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
int iv = __double2int_rn(v); return saturate_cast<short>(iv); int iv = __double2int_rn(v); return saturate_cast<short>(iv);
...@@ -146,8 +146,8 @@ namespace cv ...@@ -146,8 +146,8 @@ namespace cv
#endif #endif
} }
template<> static __device__ int saturate_cast<int>(float v) { return __float2int_rn(v); } template<> static __device__ __forceinline__ int saturate_cast<int>(float v) { return __float2int_rn(v); }
template<> static __device__ int saturate_cast<int>(double v) template<> static __device__ __forceinline__ int saturate_cast<int>(double v)
{ {
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130 #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
return __double2int_rn(v); return __double2int_rn(v);
...@@ -156,8 +156,8 @@ namespace cv ...@@ -156,8 +156,8 @@ namespace cv
#endif #endif
} }
template<> static __device__ uint saturate_cast<uint>(float v){ return __float2uint_rn(v); } template<> static __device__ __forceinline__ uint saturate_cast<uint>(float v){ return __float2uint_rn(v); }
template<> static __device__ uint saturate_cast<uint>(double v) template<> static __device__ __forceinline__ uint saturate_cast<uint>(double v)
{ {
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130 #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
return __double2uint_rn(v); return __double2uint_rn(v);
......
...@@ -55,7 +55,7 @@ namespace cv { namespace gpu { namespace device ...@@ -55,7 +55,7 @@ namespace cv { namespace gpu { namespace device
public: public:
explicit MaskReader(const PtrStep& mask_): mask(mask_) {} explicit MaskReader(const PtrStep& mask_): mask(mask_) {}
__device__ bool operator()(int y, int x) const { return mask.ptr(y)[x]; } __device__ __forceinline__ bool operator()(int y, int x) const { return mask.ptr(y)[x]; }
private: private:
PtrStep mask; PtrStep mask;
...@@ -63,7 +63,7 @@ namespace cv { namespace gpu { namespace device ...@@ -63,7 +63,7 @@ namespace cv { namespace gpu { namespace device
struct NoMask struct NoMask
{ {
__device__ bool operator()(int y, int x) const { return true; } __device__ __forceinline__ bool operator()(int y, int x) const { return true; }
}; };
//! Read Write Traits //! Read Write Traits
...@@ -121,14 +121,14 @@ namespace cv { namespace gpu { namespace device ...@@ -121,14 +121,14 @@ namespace cv { namespace gpu { namespace device
template <> struct OpUnroller<1> template <> struct OpUnroller<1>
{ {
template <typename T, typename D, typename UnOp, typename Mask> template <typename T, typename D, typename UnOp, typename Mask>
static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y) static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
{ {
if (mask(y, x_shifted)) if (mask(y, x_shifted))
dst.x = op(src.x); dst.x = op(src.x);
} }
template <typename T1, typename T2, typename D, typename BinOp, typename Mask> template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y) static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
{ {
if (mask(y, x_shifted)) if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x); dst.x = op(src1.x, src2.x);
...@@ -137,7 +137,7 @@ namespace cv { namespace gpu { namespace device ...@@ -137,7 +137,7 @@ namespace cv { namespace gpu { namespace device
template <> struct OpUnroller<2> template <> struct OpUnroller<2>
{ {
template <typename T, typename D, typename UnOp, typename Mask> template <typename T, typename D, typename UnOp, typename Mask>
static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y) static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
{ {
if (mask(y, x_shifted)) if (mask(y, x_shifted))
dst.x = op(src.x); dst.x = op(src.x);
...@@ -146,7 +146,7 @@ namespace cv { namespace gpu { namespace device ...@@ -146,7 +146,7 @@ namespace cv { namespace gpu { namespace device
} }
template <typename T1, typename T2, typename D, typename BinOp, typename Mask> template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y) static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
{ {
if (mask(y, x_shifted)) if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x); dst.x = op(src1.x, src2.x);
...@@ -157,7 +157,7 @@ namespace cv { namespace gpu { namespace device ...@@ -157,7 +157,7 @@ namespace cv { namespace gpu { namespace device
template <> struct OpUnroller<3> template <> struct OpUnroller<3>
{ {
template <typename T, typename D, typename UnOp, typename Mask> template <typename T, typename D, typename UnOp, typename Mask>
static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y) static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
{ {
if (mask(y, x_shifted)) if (mask(y, x_shifted))
dst.x = op(src.x); dst.x = op(src.x);
...@@ -168,7 +168,7 @@ namespace cv { namespace gpu { namespace device ...@@ -168,7 +168,7 @@ namespace cv { namespace gpu { namespace device
} }
template <typename T1, typename T2, typename D, typename BinOp, typename Mask> template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y) static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
{ {
if (mask(y, x_shifted)) if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x); dst.x = op(src1.x, src2.x);
...@@ -181,7 +181,7 @@ namespace cv { namespace gpu { namespace device ...@@ -181,7 +181,7 @@ namespace cv { namespace gpu { namespace device
template <> struct OpUnroller<4> template <> struct OpUnroller<4>
{ {
template <typename T, typename D, typename UnOp, typename Mask> template <typename T, typename D, typename UnOp, typename Mask>
static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y) static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
{ {
if (mask(y, x_shifted)) if (mask(y, x_shifted))
dst.x = op(src.x); dst.x = op(src.x);
...@@ -194,7 +194,7 @@ namespace cv { namespace gpu { namespace device ...@@ -194,7 +194,7 @@ namespace cv { namespace gpu { namespace device
} }
template <typename T1, typename T2, typename D, typename BinOp, typename Mask> template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y) static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
{ {
if (mask(y, x_shifted)) if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x); dst.x = op(src1.x, src2.x);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册