Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
1c1a61dd
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
1c1a61dd
编写于
6月 14, 2011
作者:
V
Vladislav Vinogradov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
added __forceinline__ to device functions
fixed BFM warning ("cannot tell what pointer points to")
上级
79f3260b
变更
21
展开全部
隐藏空白更改
内联
并排
Showing
21 changed file
with
982 addition
and
912 deletion
+982
-912
modules/gpu/include/opencv2/gpu/devmem2d.hpp
modules/gpu/include/opencv2/gpu/devmem2d.hpp
+1
-1
modules/gpu/src/cuda/brute_force_matcher.cu
modules/gpu/src/cuda/brute_force_matcher.cu
+48
-38
modules/gpu/src/cuda/calib3d.cu
modules/gpu/src/cuda/calib3d.cu
+3
-3
modules/gpu/src/cuda/color.cu
modules/gpu/src/cuda/color.cu
+44
-44
modules/gpu/src/cuda/element_operations.cu
modules/gpu/src/cuda/element_operations.cu
+24
-24
modules/gpu/src/cuda/filters.cu
modules/gpu/src/cuda/filters.cu
+2
-2
modules/gpu/src/cuda/match_template.cu
modules/gpu/src/cuda/match_template.cu
+29
-29
modules/gpu/src/cuda/mathfunc.cu
modules/gpu/src/cuda/mathfunc.cu
+6
-6
modules/gpu/src/cuda/matrix_operations.cu
modules/gpu/src/cuda/matrix_operations.cu
+9
-9
modules/gpu/src/cuda/matrix_reductions.cu
modules/gpu/src/cuda/matrix_reductions.cu
+7
-7
modules/gpu/src/cuda/stereobm.cu
modules/gpu/src/cuda/stereobm.cu
+2
-2
modules/gpu/src/cuda/stereobp.cu
modules/gpu/src/cuda/stereobp.cu
+6
-6
modules/gpu/src/cuda/stereocsbp.cu
modules/gpu/src/cuda/stereocsbp.cu
+3
-3
modules/gpu/src/cuda/surf.cu
modules/gpu/src/cuda/surf.cu
+3
-3
modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp
modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp
+200
-201
modules/gpu/src/opencv2/gpu/device/datamov_utils.hpp
modules/gpu/src/opencv2/gpu/device/datamov_utils.hpp
+98
-32
modules/gpu/src/opencv2/gpu/device/dynamic_smem.hpp
modules/gpu/src/opencv2/gpu/device/dynamic_smem.hpp
+31
-34
modules/gpu/src/opencv2/gpu/device/limits_gpu.hpp
modules/gpu/src/opencv2/gpu/device/limits_gpu.hpp
+164
-166
modules/gpu/src/opencv2/gpu/device/saturate_cast.hpp
modules/gpu/src/opencv2/gpu/device/saturate_cast.hpp
+37
-37
modules/gpu/src/opencv2/gpu/device/transform.hpp
modules/gpu/src/opencv2/gpu/device/transform.hpp
+10
-10
modules/gpu/src/opencv2/gpu/device/vecmath.hpp
modules/gpu/src/opencv2/gpu/device/vecmath.hpp
+255
-255
未找到文件。
modules/gpu/include/opencv2/gpu/devmem2d.hpp
浏览文件 @
1c1a61dd
...
...
@@ -56,7 +56,7 @@ namespace cv
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
#if defined(__CUDACC__)
#define __CV_GPU_HOST_DEVICE__ __host__ __device__
#define __CV_GPU_HOST_DEVICE__ __host__ __device__
__forceinline__
#else
#define __CV_GPU_HOST_DEVICE__
#endif
...
...
modules/gpu/src/cuda/brute_force_matcher.cu
浏览文件 @
1c1a61dd
...
...
@@ -42,6 +42,7 @@
#include "internal_shared.hpp"
#include "opencv2/gpu/device/limits_gpu.hpp"
#include "opencv2/gpu/device/datamov_utils.hpp"
using
namespace
cv
::
gpu
;
using
namespace
cv
::
gpu
::
device
;
...
...
@@ -60,7 +61,7 @@ namespace cv { namespace gpu { namespace bfmatcher
public:
explicit
SingleMask
(
const
PtrStep
&
mask_
)
:
mask
(
mask_
)
{}
__device__
bool
operator
()(
int
queryIdx
,
int
trainIdx
)
const
__device__
__forceinline__
bool
operator
()(
int
queryIdx
,
int
trainIdx
)
const
{
return
mask
.
ptr
(
queryIdx
)[
trainIdx
]
!=
0
;
}
...
...
@@ -74,14 +75,15 @@ namespace cv { namespace gpu { namespace bfmatcher
public:
explicit
MaskCollection
(
PtrStep
*
maskCollection_
)
:
maskCollection
(
maskCollection_
)
{}
__device__
void
nextMask
()
__device__
__forceinline__
void
nextMask
()
{
curMask
=
*
maskCollection
++
;
}
__device__
bool
operator
()(
int
queryIdx
,
int
trainIdx
)
const
{
return
curMask
.
data
==
0
||
curMask
.
ptr
(
queryIdx
)[
trainIdx
]
!=
0
;
__device__
__forceinline__
bool
operator
()(
int
queryIdx
,
int
trainIdx
)
const
{
uchar
val
;
return
curMask
.
data
==
0
||
(
ForceGlob
<
uchar
>::
Load
(
curMask
.
ptr
(
queryIdx
),
trainIdx
,
val
),
(
val
!=
0
));
}
private:
...
...
@@ -92,10 +94,10 @@ namespace cv { namespace gpu { namespace bfmatcher
class
WithOutMask
{
public:
__device__
void
nextMask
()
__device__
__forceinline__
void
nextMask
()
{
}
__device__
bool
operator
()(
int
queryIdx
,
int
trainIdx
)
const
__device__
__forceinline__
bool
operator
()(
int
queryIdx
,
int
trainIdx
)
const
{
return
true
;
}
...
...
@@ -132,19 +134,19 @@ namespace cv { namespace gpu { namespace bfmatcher
typedef
int
ResultType
;
typedef
int
ValueType
;
__device__
L1Dist
()
:
mySum
(
0
)
{}
__device__
__forceinline__
L1Dist
()
:
mySum
(
0
)
{}
__device__
void
reduceIter
(
int
val1
,
int
val2
)
__device__
__forceinline__
void
reduceIter
(
int
val1
,
int
val2
)
{
mySum
=
__sad
(
val1
,
val2
,
mySum
);
}
template
<
int
BLOCK_DIM_X
>
__device__
void
reduceAll
(
int
*
sdiff_row
)
template
<
int
BLOCK_DIM_X
>
__device__
__forceinline__
void
reduceAll
(
int
*
sdiff_row
)
{
SumReductor
<
BLOCK_DIM_X
>::
reduce
(
sdiff_row
,
mySum
);
}
__device__
operator
int
()
const
__device__
__forceinline__
operator
int
()
const
{
return
mySum
;
}
...
...
@@ -158,19 +160,19 @@ namespace cv { namespace gpu { namespace bfmatcher
typedef
float
ResultType
;
typedef
float
ValueType
;
__device__
L1Dist
()
:
mySum
(
0.0
f
)
{}
__device__
__forceinline__
L1Dist
()
:
mySum
(
0.0
f
)
{}
__device__
void
reduceIter
(
float
val1
,
float
val2
)
__device__
__forceinline__
void
reduceIter
(
float
val1
,
float
val2
)
{
mySum
+=
fabs
(
val1
-
val2
);
}
template
<
int
BLOCK_DIM_X
>
__device__
void
reduceAll
(
float
*
sdiff_row
)
template
<
int
BLOCK_DIM_X
>
__device__
__forceinline__
void
reduceAll
(
float
*
sdiff_row
)
{
SumReductor
<
BLOCK_DIM_X
>::
reduce
(
sdiff_row
,
mySum
);
}
__device__
operator
float
()
const
__device__
__forceinline__
operator
float
()
const
{
return
mySum
;
}
...
...
@@ -185,20 +187,20 @@ namespace cv { namespace gpu { namespace bfmatcher
typedef
float
ResultType
;
typedef
float
ValueType
;
__device__
L2Dist
()
:
mySum
(
0.0
f
)
{}
__device__
__forceinline__
L2Dist
()
:
mySum
(
0.0
f
)
{}
__device__
void
reduceIter
(
float
val1
,
float
val2
)
__device__
__forceinline__
void
reduceIter
(
float
val1
,
float
val2
)
{
float
reg
=
val1
-
val2
;
mySum
+=
reg
*
reg
;
}
template
<
int
BLOCK_DIM_X
>
__device__
void
reduceAll
(
float
*
sdiff_row
)
template
<
int
BLOCK_DIM_X
>
__device__
__forceinline__
void
reduceAll
(
float
*
sdiff_row
)
{
SumReductor
<
BLOCK_DIM_X
>::
reduce
(
sdiff_row
,
mySum
);
}
__device__
operator
float
()
const
__device__
__forceinline__
operator
float
()
const
{
return
sqrtf
(
mySum
);
}
...
...
@@ -213,19 +215,19 @@ namespace cv { namespace gpu { namespace bfmatcher
typedef
int
ResultType
;
typedef
int
ValueType
;
__device__
HammingDist
()
:
mySum
(
0
)
{}
__device__
__forceinline__
HammingDist
()
:
mySum
(
0
)
{}
__device__
void
reduceIter
(
int
val1
,
int
val2
)
__device__
__forceinline__
void
reduceIter
(
int
val1
,
int
val2
)
{
mySum
+=
__popc
(
val1
^
val2
);
}
template
<
int
BLOCK_DIM_X
>
__device__
void
reduceAll
(
int
*
sdiff_row
)
template
<
int
BLOCK_DIM_X
>
__device__
__forceinline__
void
reduceAll
(
int
*
sdiff_row
)
{
SumReductor
<
BLOCK_DIM_X
>::
reduce
(
sdiff_row
,
mySum
);
}
__device__
operator
int
()
const
__device__
__forceinline__
operator
int
()
const
{
return
mySum
;
}
...
...
@@ -241,7 +243,11 @@ namespace cv { namespace gpu { namespace bfmatcher
__device__
void
reduceDescDiff
(
const
T
*
queryDescs
,
const
T
*
trainDescs
,
int
desc_len
,
Dist
&
dist
,
typename
Dist
::
ResultType
*
sdiff_row
)
{
for
(
int
i
=
threadIdx
.
x
;
i
<
desc_len
;
i
+=
BLOCK_DIM_X
)
dist
.
reduceIter
(
queryDescs
[
i
],
trainDescs
[
i
]);
{
T
trainVal
;
ForceGlob
<
T
>::
Load
(
trainDescs
,
i
,
trainVal
);
dist
.
reduceIter
(
queryDescs
[
i
],
trainVal
);
}
dist
.
reduceAll
<
BLOCK_DIM_X
>
(
sdiff_row
);
}
...
...
@@ -282,7 +288,9 @@ namespace cv { namespace gpu { namespace bfmatcher
{
if
(
ind
<
desc_len
)
{
dist
.
reduceIter
(
*
queryVals
,
trainDescs
[
ind
]);
T
trainVal
;
ForceGlob
<
T
>::
Load
(
trainDescs
,
ind
,
trainVal
);
dist
.
reduceIter
(
*
queryVals
,
trainVal
);
++
queryVals
;
...
...
@@ -293,7 +301,9 @@ namespace cv { namespace gpu { namespace bfmatcher
template
<
typename
Dist
,
typename
T
>
static
__device__
void
calcWithoutCheck
(
const
typename
Dist
::
ValueType
*
queryVals
,
const
T
*
trainDescs
,
Dist
&
dist
)
{
dist
.
reduceIter
(
*
queryVals
,
*
trainDescs
);
T
trainVal
;
ForceGlob
<
T
>::
Load
(
trainDescs
,
0
,
trainVal
);
dist
.
reduceIter
(
*
queryVals
,
trainVal
);
++
queryVals
;
trainDescs
+=
blockDim
.
x
;
...
...
@@ -304,13 +314,13 @@ namespace cv { namespace gpu { namespace bfmatcher
template
<
>
struct
UnrollDescDiff
<
0
>
{
template
<
typename
Dist
,
typename
T
>
static
__device__
void
calcCheck
(
const
typename
Dist
::
ValueType
*
queryVals
,
const
T
*
trainDescs
,
int
desc_len
,
static
__device__
__forceinline__
void
calcCheck
(
const
typename
Dist
::
ValueType
*
queryVals
,
const
T
*
trainDescs
,
int
desc_len
,
Dist
&
dist
,
int
ind
)
{
}
template
<
typename
Dist
,
typename
T
>
static
__device__
void
calcWithoutCheck
(
const
typename
Dist
::
ValueType
*
queryVals
,
const
T
*
trainDescs
,
Dist
&
dist
)
static
__device__
__forceinline__
void
calcWithoutCheck
(
const
typename
Dist
::
ValueType
*
queryVals
,
const
T
*
trainDescs
,
Dist
&
dist
)
{
}
};
...
...
@@ -320,7 +330,7 @@ namespace cv { namespace gpu { namespace bfmatcher
struct
DescDiffCalculator
<
BLOCK_DIM_X
,
MAX_DESCRIPTORS_LEN
,
false
>
{
template
<
typename
Dist
,
typename
T
>
static
__device__
void
calc
(
const
typename
Dist
::
ValueType
*
queryVals
,
const
T
*
trainDescs
,
int
desc_len
,
Dist
&
dist
)
static
__device__
__forceinline__
void
calc
(
const
typename
Dist
::
ValueType
*
queryVals
,
const
T
*
trainDescs
,
int
desc_len
,
Dist
&
dist
)
{
UnrollDescDiff
<
MAX_DESCRIPTORS_LEN
/
BLOCK_DIM_X
>::
calcCheck
(
queryVals
,
trainDescs
,
desc_len
,
dist
,
threadIdx
.
x
);
}
...
...
@@ -329,14 +339,14 @@ namespace cv { namespace gpu { namespace bfmatcher
struct
DescDiffCalculator
<
BLOCK_DIM_X
,
MAX_DESCRIPTORS_LEN
,
true
>
{
template
<
typename
Dist
,
typename
T
>
static
__device__
void
calc
(
const
typename
Dist
::
ValueType
*
queryVals
,
const
T
*
trainDescs
,
int
desc_len
,
Dist
&
dist
)
static
__device__
__forceinline__
void
calc
(
const
typename
Dist
::
ValueType
*
queryVals
,
const
T
*
trainDescs
,
int
desc_len
,
Dist
&
dist
)
{
UnrollDescDiff
<
MAX_DESCRIPTORS_LEN
/
BLOCK_DIM_X
>::
calcWithoutCheck
(
queryVals
,
trainDescs
+
threadIdx
.
x
,
dist
);
}
};
template
<
int
BLOCK_DIM_X
,
int
MAX_DESCRIPTORS_LEN
,
bool
DESC_LEN_EQ_MAX_LEN
,
typename
Dist
,
typename
T
>
__device__
void
reduceDescDiffCached
(
const
typename
Dist
::
ValueType
*
queryVals
,
const
T
*
trainDescs
,
int
desc_len
,
Dist
&
dist
,
typename
Dist
::
ResultType
*
sdiff_row
)
__device__
__forceinline__
void
reduceDescDiffCached
(
const
typename
Dist
::
ValueType
*
queryVals
,
const
T
*
trainDescs
,
int
desc_len
,
Dist
&
dist
,
typename
Dist
::
ResultType
*
sdiff_row
)
{
DescDiffCalculator
<
BLOCK_DIM_X
,
MAX_DESCRIPTORS_LEN
,
DESC_LEN_EQ_MAX_LEN
>::
calc
(
queryVals
,
trainDescs
,
desc_len
,
dist
);
...
...
@@ -419,13 +429,13 @@ namespace cv { namespace gpu { namespace bfmatcher
class
ReduceDescCalculatorSimple
{
public:
__device__
void
prepare
(
const
T
*
queryDescs_
,
int
,
void
*
)
__device__
__forceinline__
void
prepare
(
const
T
*
queryDescs_
,
int
,
void
*
)
{
queryDescs
=
queryDescs_
;
}
template
<
typename
Dist
>
__device__
void
calc
(
const
T
*
trainDescs
,
int
desc_len
,
Dist
&
dist
,
typename
Dist
::
ResultType
*
sdiff_row
)
const
__device__
__forceinline__
void
calc
(
const
T
*
trainDescs
,
int
desc_len
,
Dist
&
dist
,
typename
Dist
::
ResultType
*
sdiff_row
)
const
{
reduceDescDiff
<
BLOCK_DIM_X
>
(
queryDescs
,
trainDescs
,
desc_len
,
dist
,
sdiff_row
);
}
...
...
@@ -438,13 +448,13 @@ namespace cv { namespace gpu { namespace bfmatcher
class
ReduceDescCalculatorCached
{
public:
__device__
void
prepare
(
const
T
*
queryDescs
,
int
desc_len
,
U
*
smem
)
__device__
__forceinline__
void
prepare
(
const
T
*
queryDescs
,
int
desc_len
,
U
*
smem
)
{
loadDescsVals
<
BLOCK_DIM_X
,
MAX_DESCRIPTORS_LEN
>
(
queryDescs
,
desc_len
,
queryVals
,
smem
);
}
template
<
typename
Dist
>
__device__
void
calc
(
const
T
*
trainDescs
,
int
desc_len
,
Dist
&
dist
,
typename
Dist
::
ResultType
*
sdiff_row
)
const
__device__
__forceinline__
void
calc
(
const
T
*
trainDescs
,
int
desc_len
,
Dist
&
dist
,
typename
Dist
::
ResultType
*
sdiff_row
)
const
{
reduceDescDiffCached
<
BLOCK_DIM_X
,
MAX_DESCRIPTORS_LEN
,
DESC_LEN_EQ_MAX_LEN
>
(
queryVals
,
trainDescs
,
desc_len
,
dist
,
sdiff_row
);
}
...
...
@@ -496,13 +506,13 @@ namespace cv { namespace gpu { namespace bfmatcher
}
template
<
typename
Dist
,
typename
ReduceDescCalculator
,
typename
Mask
>
__device__
void
loop
(
int
queryIdx
,
Mask
&
m
,
const
ReduceDescCalculator
&
reduceDescCalc
,
__device__
__forceinline__
void
loop
(
int
queryIdx
,
Mask
&
m
,
const
ReduceDescCalculator
&
reduceDescCalc
,
typename
Dist
::
ResultType
&
myMin
,
int
&
myBestTrainIdx
,
int
&
myBestImgIdx
,
typename
Dist
::
ResultType
*
sdiff_row
)
const
{
matchDescs
<
Dist
>
(
queryIdx
,
0
,
trainDescs
,
m
,
reduceDescCalc
,
myMin
,
myBestTrainIdx
,
myBestImgIdx
,
sdiff_row
);
}
__device__
int
desc_len
()
const
__device__
__forceinline__
int
desc_len
()
const
{
return
trainDescs
.
cols
;
}
...
...
@@ -532,7 +542,7 @@ namespace cv { namespace gpu { namespace bfmatcher
}
}
__device__
int
desc_len
()
const
__device__
__forceinline__
int
desc_len
()
const
{
return
desclen
;
}
...
...
modules/gpu/src/cuda/calib3d.cu
浏览文件 @
1c1a61dd
...
...
@@ -56,7 +56,7 @@ namespace cv { namespace gpu
struct
TransformOp
{
__device__
float3
operator
()(
float3
p
)
const
__device__
__forceinline__
float3
operator
()(
float3
p
)
const
{
return
make_float3
(
crot0
.
x
*
p
.
x
+
crot0
.
y
*
p
.
y
+
crot0
.
z
*
p
.
z
+
ctransl
.
x
,
...
...
@@ -89,7 +89,7 @@ namespace cv { namespace gpu
struct
ProjectOp
{
__device__
float2
operator
()(
float3
p
)
const
__device__
__forceinline__
float2
operator
()(
float3
p
)
const
{
// Rotate and translate in 3D
float3
t
=
make_float3
(
...
...
@@ -128,7 +128,7 @@ namespace cv { namespace gpu
return
SOLVE_PNP_RANSAC_MAX_NUM_ITERS
;
}
__device__
float
sqr
(
float
x
)
__device__
__forceinline__
float
sqr
(
float
x
)
{
return
x
*
x
;
}
...
...
modules/gpu/src/cuda/color.cu
浏览文件 @
1c1a61dd
...
...
@@ -59,38 +59,38 @@ namespace cv { namespace gpu { namespace color
template
<
>
struct
ColorChannel
<
uchar
>
{
typedef
float
worktype_f
;
static
__device__
uchar
max
()
{
return
UCHAR_MAX
;
}
static
__device__
uchar
half
()
{
return
(
uchar
)(
max
()
/
2
+
1
);
}
static
__device__
__forceinline__
uchar
max
()
{
return
UCHAR_MAX
;
}
static
__device__
__forceinline__
uchar
half
()
{
return
(
uchar
)(
max
()
/
2
+
1
);
}
};
template
<
>
struct
ColorChannel
<
ushort
>
{
typedef
float
worktype_f
;
static
__device__
ushort
max
()
{
return
USHRT_MAX
;
}
static
__device__
ushort
half
()
{
return
(
ushort
)(
max
()
/
2
+
1
);
}
static
__device__
__forceinline__
ushort
max
()
{
return
USHRT_MAX
;
}
static
__device__
__forceinline__
ushort
half
()
{
return
(
ushort
)(
max
()
/
2
+
1
);
}
};
template
<
>
struct
ColorChannel
<
float
>
{
typedef
float
worktype_f
;
static
__device__
float
max
()
{
return
1.
f
;
}
static
__device__
float
half
()
{
return
0.5
f
;
}
static
__device__
__forceinline__
float
max
()
{
return
1.
f
;
}
static
__device__
__forceinline__
float
half
()
{
return
0.5
f
;
}
};
template
<
typename
T
>
__device__
void
setAlpha
(
typename
TypeVec
<
T
,
3
>::
vec_t
&
vec
,
T
val
)
__device__
__forceinline__
void
setAlpha
(
typename
TypeVec
<
T
,
3
>::
vec_t
&
vec
,
T
val
)
{
}
template
<
typename
T
>
__device__
void
setAlpha
(
typename
TypeVec
<
T
,
4
>::
vec_t
&
vec
,
T
val
)
__device__
__forceinline__
void
setAlpha
(
typename
TypeVec
<
T
,
4
>::
vec_t
&
vec
,
T
val
)
{
vec
.
w
=
val
;
}
template
<
typename
T
>
__device__
T
getAlpha
(
const
typename
TypeVec
<
T
,
3
>::
vec_t
&
vec
)
__device__
__forceinline__
T
getAlpha
(
const
typename
TypeVec
<
T
,
3
>::
vec_t
&
vec
)
{
return
ColorChannel
<
T
>::
max
();
}
template
<
typename
T
>
__device__
T
getAlpha
(
const
typename
TypeVec
<
T
,
4
>::
vec_t
&
vec
)
__device__
__forceinline__
T
getAlpha
(
const
typename
TypeVec
<
T
,
4
>::
vec_t
&
vec
)
{
return
vec
.
w
;
}
...
...
@@ -114,7 +114,7 @@ namespace cv { namespace gpu { namespace color
explicit
RGB2RGB
(
int
bidx
)
:
bidx
(
bidx
)
{}
__device__
dst_t
operator
()(
const
src_t
&
src
)
const
__device__
__forceinline__
dst_t
operator
()(
const
src_t
&
src
)
const
{
dst_t
dst
;
...
...
@@ -179,7 +179,7 @@ namespace cv { namespace gpu { namespace color
template
<
>
struct
RGB5x52RGBConverter
<
5
>
{
template
<
typename
D
>
static
__device__
void
cvt
(
uint
src
,
D
&
dst
,
int
bidx
)
static
__device__
__forceinline__
void
cvt
(
uint
src
,
D
&
dst
,
int
bidx
)
{
(
&
dst
.
x
)[
bidx
]
=
(
uchar
)(
src
<<
3
);
dst
.
y
=
(
uchar
)((
src
>>
2
)
&
~
7
);
...
...
@@ -190,7 +190,7 @@ namespace cv { namespace gpu { namespace color
template
<
>
struct
RGB5x52RGBConverter
<
6
>
{
template
<
typename
D
>
static
__device__
void
cvt
(
uint
src
,
D
&
dst
,
int
bidx
)
static
__device__
__forceinline__
void
cvt
(
uint
src
,
D
&
dst
,
int
bidx
)
{
(
&
dst
.
x
)[
bidx
]
=
(
uchar
)(
src
<<
3
);
dst
.
y
=
(
uchar
)((
src
>>
3
)
&
~
3
);
...
...
@@ -206,7 +206,7 @@ namespace cv { namespace gpu { namespace color
explicit
RGB5x52RGB
(
int
bidx
)
:
bidx
(
bidx
)
{}
__device__
dst_t
operator
()(
ushort
src
)
const
__device__
__forceinline__
dst_t
operator
()(
ushort
src
)
const
{
dst_t
dst
;
RGB5x52RGBConverter
<
GREEN_BITS
>::
cvt
((
uint
)
src
,
dst
,
bidx
);
...
...
@@ -221,18 +221,18 @@ namespace cv { namespace gpu { namespace color
template
<
>
struct
RGB2RGB5x5Converter
<
6
>
{
template
<
typename
T
>
static
__device__
ushort
cvt
(
const
T
&
src
,
int
bidx
)
static
__device__
__forceinline__
ushort
cvt
(
const
T
&
src
,
int
bidx
)
{
return
(
ushort
)(((
&
src
.
x
)[
bidx
]
>>
3
)
|
((
src
.
y
&
~
3
)
<<
3
)
|
(((
&
src
.
x
)[
bidx
^
2
]
&
~
7
)
<<
8
));
}
};
template
<
>
struct
RGB2RGB5x5Converter
<
5
>
{
static
__device__
ushort
cvt
(
const
uchar3
&
src
,
int
bidx
)
static
__device__
__forceinline__
ushort
cvt
(
const
uchar3
&
src
,
int
bidx
)
{
return
(
ushort
)(((
&
src
.
x
)[
bidx
]
>>
3
)
|
((
src
.
y
&
~
7
)
<<
2
)
|
(((
&
src
.
x
)[
bidx
^
2
]
&
~
7
)
<<
7
));
}
static
__device__
ushort
cvt
(
const
uchar4
&
src
,
int
bidx
)
static
__device__
__forceinline__
ushort
cvt
(
const
uchar4
&
src
,
int
bidx
)
{
return
(
ushort
)(((
&
src
.
x
)[
bidx
]
>>
3
)
|
((
src
.
y
&
~
7
)
<<
2
)
|
(((
&
src
.
x
)[
bidx
^
2
]
&
~
7
)
<<
7
)
|
(
src
.
w
?
0x8000
:
0
));
}
...
...
@@ -245,7 +245,7 @@ namespace cv { namespace gpu { namespace color
explicit
RGB2RGB5x5
(
int
bidx
)
:
bidx
(
bidx
)
{}
__device__
ushort
operator
()(
const
src_t
&
src
)
__device__
__forceinline__
ushort
operator
()(
const
src_t
&
src
)
{
return
RGB2RGB5x5Converter
<
GREEN_BITS
>::
cvt
(
src
,
bidx
);
}
...
...
@@ -299,7 +299,7 @@ namespace cv { namespace gpu { namespace color
typedef
T
src_t
;
typedef
typename
TypeVec
<
T
,
DSTCN
>::
vec_t
dst_t
;
__device__
dst_t
operator
()(
const
T
&
src
)
const
__device__
__forceinline__
dst_t
operator
()(
const
T
&
src
)
const
{
dst_t
dst
;
...
...
@@ -313,14 +313,14 @@ namespace cv { namespace gpu { namespace color
template
<
int
GREEN_BITS
>
struct
Gray2RGB5x5Converter
;
template
<
>
struct
Gray2RGB5x5Converter
<
6
>
{
static
__device__
ushort
cvt
(
uint
t
)
static
__device__
__forceinline__
ushort
cvt
(
uint
t
)
{
return
(
ushort
)((
t
>>
3
)
|
((
t
&
~
3
)
<<
3
)
|
((
t
&
~
7
)
<<
8
));
}
};
template
<
>
struct
Gray2RGB5x5Converter
<
5
>
{
static
__device__
ushort
cvt
(
uint
t
)
static
__device__
__forceinline__
ushort
cvt
(
uint
t
)
{
t
>>=
3
;
return
(
ushort
)(
t
|
(
t
<<
5
)
|
(
t
<<
10
));
...
...
@@ -332,7 +332,7 @@ namespace cv { namespace gpu { namespace color
typedef
uchar
src_t
;
typedef
ushort
dst_t
;
__device__
ushort
operator
()(
uchar
src
)
const
__device__
__forceinline__
ushort
operator
()(
uchar
src
)
const
{
return
Gray2RGB5x5Converter
<
GREEN_BITS
>::
cvt
((
uint
)
src
);
}
...
...
@@ -406,14 +406,14 @@ namespace cv { namespace gpu { namespace color
template
<
int
GREEN_BITS
>
struct
RGB5x52GrayConverter
;
template
<
>
struct
RGB5x52GrayConverter
<
6
>
{
static
__device__
uchar
cvt
(
uint
t
)
static
__device__
__forceinline__
uchar
cvt
(
uint
t
)
{
return
(
uchar
)
CV_DESCALE
(((
t
<<
3
)
&
0xf8
)
*
B2Y
+
((
t
>>
3
)
&
0xfc
)
*
G2Y
+
((
t
>>
8
)
&
0xf8
)
*
R2Y
,
yuv_shift
);
}
};
template
<
>
struct
RGB5x52GrayConverter
<
5
>
{
static
__device__
uchar
cvt
(
uint
t
)
static
__device__
__forceinline__
uchar
cvt
(
uint
t
)
{
return
(
uchar
)
CV_DESCALE
(((
t
<<
3
)
&
0xf8
)
*
B2Y
+
((
t
>>
2
)
&
0xf8
)
*
G2Y
+
((
t
>>
7
)
&
0xf8
)
*
R2Y
,
yuv_shift
);
}
...
...
@@ -424,18 +424,18 @@ namespace cv { namespace gpu { namespace color
typedef
ushort
src_t
;
typedef
uchar
dst_t
;
__device__
uchar
operator
()(
ushort
src
)
const
__device__
__forceinline__
uchar
operator
()(
ushort
src
)
const
{
return
RGB5x52GrayConverter
<
GREEN_BITS
>::
cvt
((
uint
)
src
);
}
};
template
<
typename
T
>
__device__
T
RGB2GrayConvert
(
const
T
*
src
,
int
bidx
)
__device__
__forceinline__
T
RGB2GrayConvert
(
const
T
*
src
,
int
bidx
)
{
return
(
T
)
CV_DESCALE
((
unsigned
)(
src
[
bidx
]
*
B2Y
+
src
[
1
]
*
G2Y
+
src
[
bidx
^
2
]
*
R2Y
),
yuv_shift
);
}
__device__
float
RGB2GrayConvert
(
const
float
*
src
,
int
bidx
)
__device__
__forceinline__
float
RGB2GrayConvert
(
const
float
*
src
,
int
bidx
)
{
const
float
cr
=
0.299
f
;
const
float
cg
=
0.587
f
;
...
...
@@ -451,7 +451,7 @@ namespace cv { namespace gpu { namespace color
explicit
RGB2Gray
(
int
bidx
)
:
bidx
(
bidx
)
{}
__device__
T
operator
()(
const
src_t
&
src
)
__device__
__forceinline__
T
operator
()(
const
src_t
&
src
)
{
return
RGB2GrayConvert
(
&
src
.
x
,
bidx
);
}
...
...
@@ -515,7 +515,7 @@ namespace cv { namespace gpu { namespace color
__constant__
float
cYCrCbCoeffs_f
[
5
];
template
<
typename
T
,
typename
D
>
__device__
void
RGB2YCrCbConvert
(
const
T
*
src
,
D
&
dst
,
int
bidx
)
__device__
__forceinline__
void
RGB2YCrCbConvert
(
const
T
*
src
,
D
&
dst
,
int
bidx
)
{
const
int
delta
=
ColorChannel
<
T
>::
half
()
*
(
1
<<
yuv_shift
);
...
...
@@ -528,7 +528,7 @@ namespace cv { namespace gpu { namespace color
dst
.
z
=
saturate_cast
<
T
>
(
Cb
);
}
template
<
typename
D
>
static
__device__
void
RGB2YCrCbConvert
(
const
float
*
src
,
D
&
dst
,
int
bidx
)
static
__device__
__forceinline__
void
RGB2YCrCbConvert
(
const
float
*
src
,
D
&
dst
,
int
bidx
)
{
dst
.
x
=
src
[
0
]
*
cYCrCbCoeffs_f
[
0
]
+
src
[
1
]
*
cYCrCbCoeffs_f
[
1
]
+
src
[
2
]
*
cYCrCbCoeffs_f
[
2
];
dst
.
y
=
(
src
[
bidx
^
2
]
-
dst
.
x
)
*
cYCrCbCoeffs_f
[
3
]
+
ColorChannel
<
float
>::
half
();
...
...
@@ -561,7 +561,7 @@ namespace cv { namespace gpu { namespace color
RGB2YCrCb
(
int
bidx
,
const
coeff_t
coeffs
[
5
])
:
RGB2YCrCbBase
<
T
>
(
coeffs
),
bidx
(
bidx
)
{}
__device__
dst_t
operator
()(
const
src_t
&
src
)
const
__device__
__forceinline__
dst_t
operator
()(
const
src_t
&
src
)
const
{
dst_t
dst
;
RGB2YCrCbConvert
(
&
src
.
x
,
dst
,
bidx
);
...
...
@@ -573,7 +573,7 @@ namespace cv { namespace gpu { namespace color
};
template
<
typename
T
,
typename
D
>
__device__
void
YCrCb2RGBConvert
(
const
T
&
src
,
D
*
dst
,
int
bidx
)
__device__
__forceinline__
void
YCrCb2RGBConvert
(
const
T
&
src
,
D
*
dst
,
int
bidx
)
{
const
int
b
=
src
.
x
+
CV_DESCALE
((
src
.
z
-
ColorChannel
<
D
>::
half
())
*
cYCrCbCoeffs_i
[
3
],
yuv_shift
);
const
int
g
=
src
.
x
+
CV_DESCALE
((
src
.
z
-
ColorChannel
<
D
>::
half
())
*
cYCrCbCoeffs_i
[
2
]
+
(
src
.
y
-
ColorChannel
<
D
>::
half
())
*
cYCrCbCoeffs_i
[
1
],
yuv_shift
);
...
...
@@ -584,7 +584,7 @@ namespace cv { namespace gpu { namespace color
dst
[
bidx
^
2
]
=
saturate_cast
<
D
>
(
r
);
}
template
<
typename
T
>
__device__
void
YCrCb2RGBConvert
(
const
T
&
src
,
float
*
dst
,
int
bidx
)
__device__
__forceinline__
void
YCrCb2RGBConvert
(
const
T
&
src
,
float
*
dst
,
int
bidx
)
{
dst
[
bidx
]
=
src
.
x
+
(
src
.
z
-
ColorChannel
<
float
>::
half
())
*
cYCrCbCoeffs_f
[
3
];
dst
[
1
]
=
src
.
x
+
(
src
.
z
-
ColorChannel
<
float
>::
half
())
*
cYCrCbCoeffs_f
[
2
]
+
(
src
.
y
-
ColorChannel
<
float
>::
half
())
*
cYCrCbCoeffs_f
[
1
];
...
...
@@ -617,7 +617,7 @@ namespace cv { namespace gpu { namespace color
YCrCb2RGB
(
int
bidx
,
const
coeff_t
coeffs
[
4
])
:
YCrCb2RGBBase
<
T
>
(
coeffs
),
bidx
(
bidx
)
{}
__device__
dst_t
operator
()(
const
src_t
&
src
)
const
__device__
__forceinline__
dst_t
operator
()(
const
src_t
&
src
)
const
{
dst_t
dst
;
...
...
@@ -725,14 +725,14 @@ namespace cv { namespace gpu { namespace color
__constant__
float
cXYZ_D65f
[
9
];
template
<
typename
T
,
typename
D
>
__device__
void
RGB2XYZConvert
(
const
T
*
src
,
D
&
dst
)
__device__
__forceinline__
void
RGB2XYZConvert
(
const
T
*
src
,
D
&
dst
)
{
dst
.
x
=
saturate_cast
<
T
>
(
CV_DESCALE
(
src
[
0
]
*
cXYZ_D65i
[
0
]
+
src
[
1
]
*
cXYZ_D65i
[
1
]
+
src
[
2
]
*
cXYZ_D65i
[
2
],
xyz_shift
));
dst
.
y
=
saturate_cast
<
T
>
(
CV_DESCALE
(
src
[
0
]
*
cXYZ_D65i
[
3
]
+
src
[
1
]
*
cXYZ_D65i
[
4
]
+
src
[
2
]
*
cXYZ_D65i
[
5
],
xyz_shift
));
dst
.
z
=
saturate_cast
<
T
>
(
CV_DESCALE
(
src
[
0
]
*
cXYZ_D65i
[
6
]
+
src
[
1
]
*
cXYZ_D65i
[
7
]
+
src
[
2
]
*
cXYZ_D65i
[
8
],
xyz_shift
));
}
template
<
typename
D
>
__device__
void
RGB2XYZConvert
(
const
float
*
src
,
D
&
dst
)
__device__
__forceinline__
void
RGB2XYZConvert
(
const
float
*
src
,
D
&
dst
)
{
dst
.
x
=
src
[
0
]
*
cXYZ_D65f
[
0
]
+
src
[
1
]
*
cXYZ_D65f
[
1
]
+
src
[
2
]
*
cXYZ_D65f
[
2
];
dst
.
y
=
src
[
0
]
*
cXYZ_D65f
[
3
]
+
src
[
1
]
*
cXYZ_D65f
[
4
]
+
src
[
2
]
*
cXYZ_D65f
[
5
];
...
...
@@ -765,7 +765,7 @@ namespace cv { namespace gpu { namespace color
explicit
RGB2XYZ
(
const
coeff_t
coeffs
[
9
])
:
RGB2XYZBase
<
T
>
(
coeffs
)
{}
__device__
dst_t
operator
()(
const
src_t
&
src
)
const
__device__
__forceinline__
dst_t
operator
()(
const
src_t
&
src
)
const
{
dst_t
dst
;
RGB2XYZConvert
(
&
src
.
x
,
dst
);
...
...
@@ -774,14 +774,14 @@ namespace cv { namespace gpu { namespace color
};
template
<
typename
T
,
typename
D
>
__device__
void
XYZ2RGBConvert
(
const
T
&
src
,
D
*
dst
)
__device__
__forceinline__
void
XYZ2RGBConvert
(
const
T
&
src
,
D
*
dst
)
{
dst
[
0
]
=
saturate_cast
<
D
>
(
CV_DESCALE
(
src
.
x
*
cXYZ_D65i
[
0
]
+
src
.
y
*
cXYZ_D65i
[
1
]
+
src
.
z
*
cXYZ_D65i
[
2
],
xyz_shift
));
dst
[
1
]
=
saturate_cast
<
D
>
(
CV_DESCALE
(
src
.
x
*
cXYZ_D65i
[
3
]
+
src
.
y
*
cXYZ_D65i
[
4
]
+
src
.
z
*
cXYZ_D65i
[
5
],
xyz_shift
));
dst
[
2
]
=
saturate_cast
<
D
>
(
CV_DESCALE
(
src
.
x
*
cXYZ_D65i
[
6
]
+
src
.
y
*
cXYZ_D65i
[
7
]
+
src
.
z
*
cXYZ_D65i
[
8
],
xyz_shift
));
}
template
<
typename
T
>
__device__
void
XYZ2RGBConvert
(
const
T
&
src
,
float
*
dst
)
__device__
__forceinline__
void
XYZ2RGBConvert
(
const
T
&
src
,
float
*
dst
)
{
dst
[
0
]
=
src
.
x
*
cXYZ_D65f
[
0
]
+
src
.
y
*
cXYZ_D65f
[
1
]
+
src
.
z
*
cXYZ_D65f
[
2
];
dst
[
1
]
=
src
.
x
*
cXYZ_D65f
[
3
]
+
src
.
y
*
cXYZ_D65f
[
4
]
+
src
.
z
*
cXYZ_D65f
[
5
];
...
...
@@ -814,7 +814,7 @@ namespace cv { namespace gpu { namespace color
explicit
XYZ2RGB
(
const
coeff_t
coeffs
[
9
])
:
XYZ2RGBBase
<
T
>
(
coeffs
)
{}
__device__
dst_t
operator
()(
const
src_t
&
src
)
const
__device__
__forceinline__
dst_t
operator
()(
const
src_t
&
src
)
const
{
dst_t
dst
;
XYZ2RGBConvert
(
src
,
&
dst
.
x
);
...
...
@@ -987,7 +987,7 @@ namespace cv { namespace gpu { namespace color
explicit
RGB2HSV
(
int
bidx
)
:
bidx
(
bidx
)
{}
__device__
dst_t
operator
()(
const
src_t
&
src
)
const
__device__
__forceinline__
dst_t
operator
()(
const
src_t
&
src
)
const
{
dst_t
dst
;
RGB2HSVConvert
<
HR
>
(
&
src
.
x
,
dst
,
bidx
);
...
...
@@ -1062,7 +1062,7 @@ namespace cv { namespace gpu { namespace color
explicit
HSV2RGB
(
int
bidx
)
:
bidx
(
bidx
)
{}
__device__
dst_t
operator
()(
const
src_t
&
src
)
const
__device__
__forceinline__
dst_t
operator
()(
const
src_t
&
src
)
const
{
dst_t
dst
;
HSV2RGBConvert
<
HR
>
(
src
,
&
dst
.
x
,
bidx
);
...
...
@@ -1214,7 +1214,7 @@ namespace cv { namespace gpu { namespace color
explicit
RGB2HLS
(
int
bidx
)
:
bidx
(
bidx
)
{}
__device__
dst_t
operator
()(
const
src_t
&
src
)
const
__device__
__forceinline__
dst_t
operator
()(
const
src_t
&
src
)
const
{
dst_t
dst
;
RGB2HLSConvert
<
HR
>
(
&
src
.
x
,
dst
,
bidx
);
...
...
@@ -1295,7 +1295,7 @@ namespace cv { namespace gpu { namespace color
explicit
HLS2RGB
(
int
bidx
)
:
bidx
(
bidx
)
{}
__device__
dst_t
operator
()(
const
src_t
&
src
)
const
__device__
__forceinline__
dst_t
operator
()(
const
src_t
&
src
)
const
{
dst_t
dst
;
HLS2RGBConvert
<
HR
>
(
src
,
&
dst
.
x
,
bidx
);
...
...
modules/gpu/src/cuda/element_operations.cu
浏览文件 @
1c1a61dd
...
...
@@ -57,7 +57,7 @@ namespace cv { namespace gpu { namespace mathfunc
template
<
typename
T1
,
typename
T2
>
struct
NotEqual
{
__device__
uchar
operator
()(
const
T1
&
src1
,
const
T2
&
src2
)
__device__
__forceinline__
uchar
operator
()(
const
T1
&
src1
,
const
T2
&
src2
)
{
return
static_cast
<
uchar
>
(
static_cast
<
int
>
(
src1
!=
src2
)
*
255
);
}
...
...
@@ -91,7 +91,7 @@ namespace cv { namespace gpu { namespace mathfunc
template
<
typename
T
>
struct
UnOp
<
T
,
UN_OP_NOT
>
{
static
__device__
T
call
(
T
v
)
{
return
~
v
;
}
static
__device__
__forceinline__
T
call
(
T
v
)
{
return
~
v
;
}
};
...
...
@@ -199,20 +199,20 @@ namespace cv { namespace gpu { namespace mathfunc
template
<
typename
T
>
struct
BinOp
<
T
,
BIN_OP_OR
>
{
static
__device__
T
call
(
T
a
,
T
b
)
{
return
a
|
b
;
}
static
__device__
__forceinline__
T
call
(
T
a
,
T
b
)
{
return
a
|
b
;
}
};
template
<
typename
T
>
struct
BinOp
<
T
,
BIN_OP_AND
>
{
static
__device__
T
call
(
T
a
,
T
b
)
{
return
a
&
b
;
}
static
__device__
__forceinline__
T
call
(
T
a
,
T
b
)
{
return
a
&
b
;
}
};
template
<
typename
T
>
struct
BinOp
<
T
,
BIN_OP_XOR
>
{
static
__device__
T
call
(
T
a
,
T
b
)
{
return
a
^
b
;
}
static
__device__
__forceinline__
T
call
(
T
a
,
T
b
)
{
return
a
^
b
;
}
};
...
...
@@ -357,15 +357,15 @@ namespace cv { namespace gpu { namespace mathfunc
struct
MinOp
{
template
<
typename
T
>
__device__
T
operator
()(
T
a
,
T
b
)
__device__
__forceinline__
T
operator
()(
T
a
,
T
b
)
{
return
min
(
a
,
b
);
}
__device__
float
operator
()(
float
a
,
float
b
)
__device__
__forceinline__
float
operator
()(
float
a
,
float
b
)
{
return
fmin
(
a
,
b
);
}
__device__
double
operator
()(
double
a
,
double
b
)
__device__
__forceinline__
double
operator
()(
double
a
,
double
b
)
{
return
fmin
(
a
,
b
);
}
...
...
@@ -374,15 +374,15 @@ namespace cv { namespace gpu { namespace mathfunc
struct
MaxOp
{
template
<
typename
T
>
__device__
T
operator
()(
T
a
,
T
b
)
__device__
__forceinline__
T
operator
()(
T
a
,
T
b
)
{
return
max
(
a
,
b
);
}
__device__
float
operator
()(
float
a
,
float
b
)
__device__
__forceinline__
float
operator
()(
float
a
,
float
b
)
{
return
fmax
(
a
,
b
);
}
__device__
double
operator
()(
double
a
,
double
b
)
__device__
__forceinline__
double
operator
()(
double
a
,
double
b
)
{
return
fmax
(
a
,
b
);
}
...
...
@@ -394,7 +394,7 @@ namespace cv { namespace gpu { namespace mathfunc
explicit
ScalarMinOp
(
T
s_
)
:
s
(
s_
)
{}
__device__
T
operator
()(
T
a
)
__device__
__forceinline__
T
operator
()(
T
a
)
{
return
min
(
a
,
s
);
}
...
...
@@ -405,7 +405,7 @@ namespace cv { namespace gpu { namespace mathfunc
explicit
ScalarMinOp
(
float
s_
)
:
s
(
s_
)
{}
__device__
float
operator
()(
float
a
)
__device__
__forceinline__
float
operator
()(
float
a
)
{
return
fmin
(
a
,
s
);
}
...
...
@@ -416,7 +416,7 @@ namespace cv { namespace gpu { namespace mathfunc
explicit
ScalarMinOp
(
double
s_
)
:
s
(
s_
)
{}
__device__
double
operator
()(
double
a
)
__device__
__forceinline__
double
operator
()(
double
a
)
{
return
fmin
(
a
,
s
);
}
...
...
@@ -428,7 +428,7 @@ namespace cv { namespace gpu { namespace mathfunc
explicit
ScalarMaxOp
(
T
s_
)
:
s
(
s_
)
{}
__device__
T
operator
()(
T
a
)
__device__
__forceinline__
T
operator
()(
T
a
)
{
return
max
(
a
,
s
);
}
...
...
@@ -439,7 +439,7 @@ namespace cv { namespace gpu { namespace mathfunc
explicit
ScalarMaxOp
(
float
s_
)
:
s
(
s_
)
{}
__device__
float
operator
()(
float
a
)
__device__
__forceinline__
float
operator
()(
float
a
)
{
return
fmax
(
a
,
s
);
}
...
...
@@ -450,7 +450,7 @@ namespace cv { namespace gpu { namespace mathfunc
explicit
ScalarMaxOp
(
double
s_
)
:
s
(
s_
)
{}
__device__
double
operator
()(
double
a
)
__device__
__forceinline__
double
operator
()(
double
a
)
{
return
fmax
(
a
,
s
);
}
...
...
@@ -524,7 +524,7 @@ namespace cv { namespace gpu { namespace mathfunc
{
ThreshBinary
(
T
thresh_
,
T
maxVal_
)
:
thresh
(
thresh_
),
maxVal
(
maxVal_
)
{}
__device__
T
operator
()(
const
T
&
src
)
const
__device__
__forceinline__
T
operator
()(
const
T
&
src
)
const
{
return
src
>
thresh
?
maxVal
:
0
;
}
...
...
@@ -538,7 +538,7 @@ namespace cv { namespace gpu { namespace mathfunc
{
ThreshBinaryInv
(
T
thresh_
,
T
maxVal_
)
:
thresh
(
thresh_
),
maxVal
(
maxVal_
)
{}
__device__
T
operator
()(
const
T
&
src
)
const
__device__
__forceinline__
T
operator
()(
const
T
&
src
)
const
{
return
src
>
thresh
?
0
:
maxVal
;
}
...
...
@@ -552,7 +552,7 @@ namespace cv { namespace gpu { namespace mathfunc
{
ThreshTrunc
(
T
thresh_
,
T
)
:
thresh
(
thresh_
)
{}
__device__
T
operator
()(
const
T
&
src
)
const
__device__
__forceinline__
T
operator
()(
const
T
&
src
)
const
{
return
min
(
src
,
thresh
);
}
...
...
@@ -564,7 +564,7 @@ namespace cv { namespace gpu { namespace mathfunc
{
ThreshTrunc
(
float
thresh_
,
float
)
:
thresh
(
thresh_
)
{}
__device__
float
operator
()(
const
float
&
src
)
const
__device__
__forceinline__
float
operator
()(
const
float
&
src
)
const
{
return
fmin
(
src
,
thresh
);
}
...
...
@@ -576,7 +576,7 @@ namespace cv { namespace gpu { namespace mathfunc
{
ThreshTrunc
(
double
thresh_
,
double
)
:
thresh
(
thresh_
)
{}
__device__
double
operator
()(
const
double
&
src
)
const
__device__
__forceinline__
double
operator
()(
const
double
&
src
)
const
{
return
fmin
(
src
,
thresh
);
}
...
...
@@ -590,7 +590,7 @@ namespace cv { namespace gpu { namespace mathfunc
public:
ThreshToZero
(
T
thresh_
,
T
)
:
thresh
(
thresh_
)
{}
__device__
T
operator
()(
const
T
&
src
)
const
__device__
__forceinline__
T
operator
()(
const
T
&
src
)
const
{
return
src
>
thresh
?
src
:
0
;
}
...
...
@@ -604,7 +604,7 @@ namespace cv { namespace gpu { namespace mathfunc
public:
ThreshToZeroInv
(
T
thresh_
,
T
)
:
thresh
(
thresh_
)
{}
__device__
T
operator
()(
const
T
&
src
)
const
__device__
__forceinline__
T
operator
()(
const
T
&
src
)
const
{
return
src
>
thresh
?
0
:
src
;
}
...
...
modules/gpu/src/cuda/filters.cu
浏览文件 @
1c1a61dd
...
...
@@ -406,7 +406,7 @@ namespace bf_krnls
template
<
int
channels
>
struct
DistRgbMax
{
static
__device__
uchar
calc
(
const
uchar
*
a
,
const
uchar
*
b
)
static
__device__
__forceinline__
uchar
calc
(
const
uchar
*
a
,
const
uchar
*
b
)
{
uchar
x
=
abs
(
a
[
0
]
-
b
[
0
]);
uchar
y
=
abs
(
a
[
1
]
-
b
[
1
]);
...
...
@@ -418,7 +418,7 @@ namespace bf_krnls
template
<
>
struct
DistRgbMax
<
1
>
{
static
__device__
uchar
calc
(
const
uchar
*
a
,
const
uchar
*
b
)
static
__device__
__forceinline__
uchar
calc
(
const
uchar
*
a
,
const
uchar
*
b
)
{
return
abs
(
a
[
0
]
-
b
[
0
]);
}
...
...
modules/gpu/src/cuda/match_template.cu
浏览文件 @
1c1a61dd
...
...
@@ -48,35 +48,35 @@ using namespace cv::gpu::device;
namespace
cv
{
namespace
gpu
{
namespace
imgproc
{
__device__
float
sum
(
float
v
)
{
return
v
;
}
__device__
float
sum
(
float2
v
)
{
return
v
.
x
+
v
.
y
;
}
__device__
float
sum
(
float3
v
)
{
return
v
.
x
+
v
.
y
+
v
.
z
;
}
__device__
float
sum
(
float4
v
)
{
return
v
.
x
+
v
.
y
+
v
.
z
+
v
.
w
;
}
__device__
float
first
(
float
v
)
{
return
v
;
}
__device__
float
first
(
float2
v
)
{
return
v
.
x
;
}
__device__
float
first
(
float3
v
)
{
return
v
.
x
;
}
__device__
float
first
(
float4
v
)
{
return
v
.
x
;
}
__device__
float
mul
(
float
a
,
float
b
)
{
return
a
*
b
;
}
__device__
float2
mul
(
float2
a
,
float2
b
)
{
return
make_float2
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
);
}
__device__
float3
mul
(
float3
a
,
float3
b
)
{
return
make_float3
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
);
}
__device__
float4
mul
(
float4
a
,
float4
b
)
{
return
make_float4
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
,
a
.
w
*
b
.
w
);
}
__device__
float
mul
(
uchar
a
,
uchar
b
)
{
return
a
*
b
;
}
__device__
float2
mul
(
uchar2
a
,
uchar2
b
)
{
return
make_float2
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
);
}
__device__
float3
mul
(
uchar3
a
,
uchar3
b
)
{
return
make_float3
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
);
}
__device__
float4
mul
(
uchar4
a
,
uchar4
b
)
{
return
make_float4
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
,
a
.
w
*
b
.
w
);
}
__device__
float
sub
(
float
a
,
float
b
)
{
return
a
-
b
;
}
__device__
float2
sub
(
float2
a
,
float2
b
)
{
return
make_float2
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
);
}
__device__
float3
sub
(
float3
a
,
float3
b
)
{
return
make_float3
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
);
}
__device__
float4
sub
(
float4
a
,
float4
b
)
{
return
make_float4
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
,
a
.
w
-
b
.
w
);
}
__device__
float
sub
(
uchar
a
,
uchar
b
)
{
return
a
-
b
;
}
__device__
float2
sub
(
uchar2
a
,
uchar2
b
)
{
return
make_float2
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
);
}
__device__
float3
sub
(
uchar3
a
,
uchar3
b
)
{
return
make_float3
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
);
}
__device__
float4
sub
(
uchar4
a
,
uchar4
b
)
{
return
make_float4
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
,
a
.
w
-
b
.
w
);
}
__device__
__forceinline__
float
sum
(
float
v
)
{
return
v
;
}
__device__
__forceinline__
float
sum
(
float2
v
)
{
return
v
.
x
+
v
.
y
;
}
__device__
__forceinline__
float
sum
(
float3
v
)
{
return
v
.
x
+
v
.
y
+
v
.
z
;
}
__device__
__forceinline__
float
sum
(
float4
v
)
{
return
v
.
x
+
v
.
y
+
v
.
z
+
v
.
w
;
}
__device__
__forceinline__
float
first
(
float
v
)
{
return
v
;
}
__device__
__forceinline__
float
first
(
float2
v
)
{
return
v
.
x
;
}
__device__
__forceinline__
float
first
(
float3
v
)
{
return
v
.
x
;
}
__device__
__forceinline__
float
first
(
float4
v
)
{
return
v
.
x
;
}
__device__
__forceinline__
float
mul
(
float
a
,
float
b
)
{
return
a
*
b
;
}
__device__
__forceinline__
float2
mul
(
float2
a
,
float2
b
)
{
return
make_float2
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
);
}
__device__
__forceinline__
float3
mul
(
float3
a
,
float3
b
)
{
return
make_float3
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
);
}
__device__
__forceinline__
float4
mul
(
float4
a
,
float4
b
)
{
return
make_float4
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
,
a
.
w
*
b
.
w
);
}
__device__
__forceinline__
float
mul
(
uchar
a
,
uchar
b
)
{
return
a
*
b
;
}
__device__
__forceinline__
float2
mul
(
uchar2
a
,
uchar2
b
)
{
return
make_float2
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
);
}
__device__
__forceinline__
float3
mul
(
uchar3
a
,
uchar3
b
)
{
return
make_float3
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
);
}
__device__
__forceinline__
float4
mul
(
uchar4
a
,
uchar4
b
)
{
return
make_float4
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
,
a
.
w
*
b
.
w
);
}
__device__
__forceinline__
float
sub
(
float
a
,
float
b
)
{
return
a
-
b
;
}
__device__
__forceinline__
float2
sub
(
float2
a
,
float2
b
)
{
return
make_float2
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
);
}
__device__
__forceinline__
float3
sub
(
float3
a
,
float3
b
)
{
return
make_float3
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
);
}
__device__
__forceinline__
float4
sub
(
float4
a
,
float4
b
)
{
return
make_float4
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
,
a
.
w
-
b
.
w
);
}
__device__
__forceinline__
float
sub
(
uchar
a
,
uchar
b
)
{
return
a
-
b
;
}
__device__
__forceinline__
float2
sub
(
uchar2
a
,
uchar2
b
)
{
return
make_float2
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
);
}
__device__
__forceinline__
float3
sub
(
uchar3
a
,
uchar3
b
)
{
return
make_float3
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
);
}
__device__
__forceinline__
float4
sub
(
uchar4
a
,
uchar4
b
)
{
return
make_float4
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
,
a
.
w
-
b
.
w
);
}
template
<
typename
T
,
int
cn
>
...
...
modules/gpu/src/cuda/mathfunc.cu
浏览文件 @
1c1a61dd
...
...
@@ -60,27 +60,27 @@ namespace cv { namespace gpu { namespace mathfunc
{
struct
Nothing
{
static
__device__
void
calc
(
int
,
int
,
float
,
float
,
float
*
,
size_t
,
float
)
static
__device__
__forceinline__
void
calc
(
int
,
int
,
float
,
float
,
float
*
,
size_t
,
float
)
{
}
};
struct
Magnitude
{
static
__device__
void
calc
(
int
x
,
int
y
,
float
x_data
,
float
y_data
,
float
*
dst
,
size_t
dst_step
,
float
)
static
__device__
__forceinline__
void
calc
(
int
x
,
int
y
,
float
x_data
,
float
y_data
,
float
*
dst
,
size_t
dst_step
,
float
)
{
dst
[
y
*
dst_step
+
x
]
=
sqrtf
(
x_data
*
x_data
+
y_data
*
y_data
);
}
};
struct
MagnitudeSqr
{
static
__device__
void
calc
(
int
x
,
int
y
,
float
x_data
,
float
y_data
,
float
*
dst
,
size_t
dst_step
,
float
)
static
__device__
__forceinline__
void
calc
(
int
x
,
int
y
,
float
x_data
,
float
y_data
,
float
*
dst
,
size_t
dst_step
,
float
)
{
dst
[
y
*
dst_step
+
x
]
=
x_data
*
x_data
+
y_data
*
y_data
;
}
};
struct
Atan2
{
static
__device__
void
calc
(
int
x
,
int
y
,
float
x_data
,
float
y_data
,
float
*
dst
,
size_t
dst_step
,
float
scale
)
static
__device__
__forceinline__
void
calc
(
int
x
,
int
y
,
float
x_data
,
float
y_data
,
float
*
dst
,
size_t
dst_step
,
float
scale
)
{
dst
[
y
*
dst_step
+
x
]
=
scale
*
atan2f
(
y_data
,
x_data
);
}
...
...
@@ -104,14 +104,14 @@ namespace cv { namespace gpu { namespace mathfunc
struct
NonEmptyMag
{
static
__device__
float
get
(
const
float
*
mag
,
size_t
mag_step
,
int
x
,
int
y
)
static
__device__
__forceinline__
float
get
(
const
float
*
mag
,
size_t
mag_step
,
int
x
,
int
y
)
{
return
mag
[
y
*
mag_step
+
x
];
}
};
struct
EmptyMag
{
static
__device__
float
get
(
const
float
*
,
size_t
,
int
,
int
)
static
__device__
__forceinline__
float
get
(
const
float
*
,
size_t
,
int
,
int
)
{
return
1.0
f
;
}
...
...
modules/gpu/src/cuda/matrix_operations.cu
浏览文件 @
1c1a61dd
...
...
@@ -123,14 +123,14 @@ namespace cv { namespace gpu { namespace matrix_operations {
__constant__
float
scalar_32f
[
4
];
__constant__
double
scalar_64f
[
4
];
template
<
typename
T
>
__device__
T
readScalar
(
int
i
);
template
<
>
__device__
uchar
readScalar
<
uchar
>
(
int
i
)
{
return
scalar_8u
[
i
];}
template
<
>
__device__
schar
readScalar
<
schar
>
(
int
i
)
{
return
scalar_8s
[
i
];}
template
<
>
__device__
ushort
readScalar
<
ushort
>
(
int
i
)
{
return
scalar_16u
[
i
];}
template
<
>
__device__
short
readScalar
<
short
>
(
int
i
)
{
return
scalar_16s
[
i
];}
template
<
>
__device__
int
readScalar
<
int
>
(
int
i
)
{
return
scalar_32s
[
i
];}
template
<
>
__device__
float
readScalar
<
float
>
(
int
i
)
{
return
scalar_32f
[
i
];}
template
<
>
__device__
double
readScalar
<
double
>
(
int
i
)
{
return
scalar_64f
[
i
];}
template
<
typename
T
>
__device__
__forceinline__
T
readScalar
(
int
i
);
template
<
>
__device__
__forceinline__
uchar
readScalar
<
uchar
>
(
int
i
)
{
return
scalar_8u
[
i
];}
template
<
>
__device__
__forceinline__
schar
readScalar
<
schar
>
(
int
i
)
{
return
scalar_8s
[
i
];}
template
<
>
__device__
__forceinline__
ushort
readScalar
<
ushort
>
(
int
i
)
{
return
scalar_16u
[
i
];}
template
<
>
__device__
__forceinline__
short
readScalar
<
short
>
(
int
i
)
{
return
scalar_16s
[
i
];}
template
<
>
__device__
__forceinline__
int
readScalar
<
int
>
(
int
i
)
{
return
scalar_32s
[
i
];}
template
<
>
__device__
__forceinline__
float
readScalar
<
float
>
(
int
i
)
{
return
scalar_32f
[
i
];}
template
<
>
__device__
__forceinline__
double
readScalar
<
double
>
(
int
i
)
{
return
scalar_64f
[
i
];}
void
writeScalar
(
const
uchar
*
vals
)
{
...
...
@@ -243,7 +243,7 @@ namespace cv { namespace gpu { namespace matrix_operations {
public:
Convertor
(
double
alpha_
,
double
beta_
)
:
alpha
(
alpha_
),
beta
(
beta_
)
{}
__device__
D
operator
()(
const
T
&
src
)
__device__
__forceinline__
D
operator
()(
const
T
&
src
)
{
return
saturate_cast
<
D
>
(
alpha
*
src
+
beta
);
}
...
...
modules/gpu/src/cuda/matrix_reductions.cu
浏览文件 @
1c1a61dd
...
...
@@ -78,7 +78,7 @@ namespace cv { namespace gpu { namespace mathfunc
{
explicit
Mask8U
(
PtrStep
mask
)
:
mask
(
mask
)
{}
__device__
bool
operator
()(
int
y
,
int
x
)
const
__device__
__forceinline__
bool
operator
()(
int
y
,
int
x
)
const
{
return
mask
.
ptr
(
y
)[
x
];
}
...
...
@@ -89,7 +89,7 @@ namespace cv { namespace gpu { namespace mathfunc
struct
MaskTrue
{
__device__
bool
operator
()(
int
y
,
int
x
)
const
__device__
__forceinline__
bool
operator
()(
int
y
,
int
x
)
const
{
return
true
;
}
...
...
@@ -153,7 +153,7 @@ namespace cv { namespace gpu { namespace mathfunc
// Does min and max in shared memory
template
<
typename
T
>
__device__
void
merge
(
uint
tid
,
uint
offset
,
volatile
T
*
minval
,
volatile
T
*
maxval
)
__device__
__forceinline__
void
merge
(
uint
tid
,
uint
offset
,
volatile
T
*
minval
,
volatile
T
*
maxval
)
{
minval
[
tid
]
=
min
(
minval
[
tid
],
minval
[
tid
+
offset
]);
maxval
[
tid
]
=
max
(
maxval
[
tid
],
maxval
[
tid
+
offset
]);
...
...
@@ -976,16 +976,16 @@ namespace cv { namespace gpu { namespace mathfunc
template
<
>
struct
SumType
<
double
>
{
typedef
double
R
;
};
template
<
typename
R
>
struct
IdentityOp
{
static
__device__
R
call
(
R
x
)
{
return
x
;
}
};
struct
IdentityOp
{
static
__device__
__forceinline__
R
call
(
R
x
)
{
return
x
;
}
};
template
<
typename
R
>
struct
AbsOp
{
static
__device__
R
call
(
R
x
)
{
return
abs
(
x
);
}
};
struct
AbsOp
{
static
__device__
__forceinline__
R
call
(
R
x
)
{
return
abs
(
x
);
}
};
template
<
>
struct
AbsOp
<
uint
>
{
static
__device__
uint
call
(
uint
x
)
{
return
x
;
}
};
struct
AbsOp
<
uint
>
{
static
__device__
__forceinline__
uint
call
(
uint
x
)
{
return
x
;
}
};
template
<
typename
R
>
struct
SqrOp
{
static
__device__
R
call
(
R
x
)
{
return
x
*
x
;
}
};
struct
SqrOp
{
static
__device__
__forceinline__
R
call
(
R
x
)
{
return
x
*
x
;
}
};
__constant__
int
ctwidth
;
__constant__
int
ctheight
;
...
...
modules/gpu/src/cuda/stereobm.cu
浏览文件 @
1c1a61dd
...
...
@@ -68,7 +68,7 @@ __constant__ size_t cminSSD_step;
__constant__
int
cwidth
;
__constant__
int
cheight
;
__device__
int
SQ
(
int
a
)
__device__
__forceinline__
int
SQ
(
int
a
)
{
return
a
*
a
;
}
...
...
@@ -419,7 +419,7 @@ extern "C" void prefilter_xsobel(const DevMem2D& input, const DevMem2D& output,
texture
<
unsigned
char
,
2
,
cudaReadModeNormalizedFloat
>
texForTF
;
__device__
float
sobel
(
int
x
,
int
y
)
__device__
__forceinline__
float
sobel
(
int
x
,
int
y
)
{
float
conv
=
tex2D
(
texForTF
,
x
-
1
,
y
-
1
)
*
(
-
1
)
+
tex2D
(
texForTF
,
x
+
1
,
y
-
1
)
*
(
1
)
+
tex2D
(
texForTF
,
x
-
1
,
y
)
*
(
-
2
)
+
tex2D
(
texForTF
,
x
+
1
,
y
)
*
(
2
)
+
...
...
modules/gpu/src/cuda/stereobp.cu
浏览文件 @
1c1a61dd
...
...
@@ -76,11 +76,11 @@ namespace cv { namespace gpu { namespace bp
template
<
int
cn
>
struct
PixDiff
;
template
<
>
struct
PixDiff
<
1
>
{
__device__
PixDiff
(
const
uchar
*
ls
)
__device__
__forceinline__
PixDiff
(
const
uchar
*
ls
)
{
l
=
*
ls
;
}
__device__
float
operator
()(
const
uchar
*
rs
)
const
__device__
__forceinline__
float
operator
()(
const
uchar
*
rs
)
const
{
return
abs
((
int
)
l
-
*
rs
);
}
...
...
@@ -88,11 +88,11 @@ namespace cv { namespace gpu { namespace bp
};
template
<
>
struct
PixDiff
<
3
>
{
__device__
PixDiff
(
const
uchar
*
ls
)
__device__
__forceinline__
PixDiff
(
const
uchar
*
ls
)
{
l
=
*
((
uchar3
*
)
ls
);
}
__device__
float
operator
()(
const
uchar
*
rs
)
const
__device__
__forceinline__
float
operator
()(
const
uchar
*
rs
)
const
{
const
float
tr
=
0.299
f
;
const
float
tg
=
0.587
f
;
...
...
@@ -108,11 +108,11 @@ namespace cv { namespace gpu { namespace bp
};
template
<
>
struct
PixDiff
<
4
>
{
__device__
PixDiff
(
const
uchar
*
ls
)
__device__
__forceinline__
PixDiff
(
const
uchar
*
ls
)
{
l
=
*
((
uchar4
*
)
ls
);
}
__device__
float
operator
()(
const
uchar
*
rs
)
const
__device__
__forceinline__
float
operator
()(
const
uchar
*
rs
)
const
{
const
float
tr
=
0.299
f
;
const
float
tg
=
0.587
f
;
...
...
modules/gpu/src/cuda/stereocsbp.cu
浏览文件 @
1c1a61dd
...
...
@@ -102,14 +102,14 @@ namespace cv { namespace gpu { namespace csbp
template
<
int
channels
>
struct
DataCostPerPixel
;
template
<
>
struct
DataCostPerPixel
<
1
>
{
static
__device__
float
compute
(
const
uchar
*
left
,
const
uchar
*
right
)
static
__device__
__forceinline__
float
compute
(
const
uchar
*
left
,
const
uchar
*
right
)
{
return
fmin
(
cdata_weight
*
abs
((
int
)
*
left
-
*
right
),
cdata_weight
*
cmax_data_term
);
}
};
template
<
>
struct
DataCostPerPixel
<
3
>
{
static
__device__
float
compute
(
const
uchar
*
left
,
const
uchar
*
right
)
static
__device__
__forceinline__
float
compute
(
const
uchar
*
left
,
const
uchar
*
right
)
{
float
tb
=
0.114
f
*
abs
((
int
)
left
[
0
]
-
right
[
0
]);
float
tg
=
0.587
f
*
abs
((
int
)
left
[
1
]
-
right
[
1
]);
...
...
@@ -120,7 +120,7 @@ namespace cv { namespace gpu { namespace csbp
};
template
<
>
struct
DataCostPerPixel
<
4
>
{
static
__device__
float
compute
(
const
uchar
*
left
,
const
uchar
*
right
)
static
__device__
__forceinline__
float
compute
(
const
uchar
*
left
,
const
uchar
*
right
)
{
uchar4
l
=
*
((
const
uchar4
*
)
left
);
uchar4
r
=
*
((
const
uchar4
*
)
right
);
...
...
modules/gpu/src/cuda/surf.cu
浏览文件 @
1c1a61dd
...
...
@@ -122,7 +122,7 @@ namespace cv { namespace gpu { namespace surf
__constant__
float
c_DY
[
3
][
5
]
=
{
{
2
,
0
,
7
,
3
,
1
},
{
2
,
3
,
7
,
6
,
-
2
},
{
2
,
6
,
7
,
9
,
1
}
};
__constant__
float
c_DXY
[
4
][
5
]
=
{
{
1
,
1
,
4
,
4
,
1
},
{
5
,
1
,
8
,
4
,
-
1
},
{
1
,
5
,
4
,
8
,
-
1
},
{
5
,
5
,
8
,
8
,
1
}
};
__host__
__device__
int
calcSize
(
int
octave
,
int
layer
)
__host__
__device__
__forceinline__
int
calcSize
(
int
octave
,
int
layer
)
{
/* Wavelet size at first layer of first octave. */
const
int
HAAR_SIZE0
=
9
;
...
...
@@ -189,7 +189,7 @@ namespace cv { namespace gpu { namespace surf
struct
WithOutMask
{
static
__device__
bool
check
(
int
,
int
,
int
)
static
__device__
__forceinline__
bool
check
(
int
,
int
,
int
)
{
return
true
;
}
...
...
@@ -708,7 +708,7 @@ namespace cv { namespace gpu { namespace surf
3.695352233989979e-006
f
,
8.444558261544444e-006
f
,
1.760426494001877e-005
f
,
3.34794785885606e-005
f
,
5.808438800158911e-005
f
,
9.193058212986216e-005
f
,
0.0001327334757661447
f
,
0.0001748319627949968
f
,
0.0002100782439811155
f
,
0.0002302826324012131
f
,
0.0002302826324012131
f
,
0.0002100782439811155
f
,
0.0001748319627949968
f
,
0.0001327334757661447
f
,
9.193058212986216e-005
f
,
5.808438800158911e-005
f
,
3.34794785885606e-005
f
,
1.760426494001877e-005
f
,
8.444558261544444e-006
f
,
3.695352233989979e-006
f
};
__device__
unsigned
char
calcWin
(
int
i
,
int
j
,
float
centerX
,
float
centerY
,
float
win_offset
,
float
cos_dir
,
float
sin_dir
)
__device__
__forceinline__
unsigned
char
calcWin
(
int
i
,
int
j
,
float
centerX
,
float
centerY
,
float
win_offset
,
float
cos_dir
,
float
sin_dir
)
{
float
pixel_x
=
centerX
+
(
win_offset
+
j
)
*
cos_dir
+
(
win_offset
+
i
)
*
sin_dir
;
float
pixel_y
=
centerY
-
(
win_offset
+
j
)
*
sin_dir
+
(
win_offset
+
i
)
*
cos_dir
;
...
...
modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp
浏览文件 @
1c1a61dd
...
...
@@ -40,208 +40,207 @@
//
//M*/
#ifndef __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
#define __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/vecmath.hpp"
namespace
cv
{
namespace
gpu
namespace
cv
{
namespace
gpu
{
namespace
device
{
struct
BrdReflect101
{
explicit
BrdReflect101
(
int
len
)
:
last
(
len
-
1
)
{}
__device__
__forceinline__
int
idx_low
(
int
i
)
const
{
return
abs
(
i
);
}
__device__
__forceinline__
int
idx_high
(
int
i
)
const
{
return
last
-
abs
(
last
-
i
);
}
__device__
__forceinline__
int
idx
(
int
i
)
const
{
return
idx_low
(
idx_high
(
i
));
}
bool
is_range_safe
(
int
mini
,
int
maxi
)
const
{
return
-
last
<=
mini
&&
maxi
<=
2
*
last
;
}
private:
int
last
;
};
template
<
typename
D
>
struct
BrdRowReflect101
:
BrdReflect101
{
namespace
device
{
struct
BrdReflect101
{
explicit
BrdReflect101
(
int
len
)
:
last
(
len
-
1
)
{}
__device__
int
idx_low
(
int
i
)
const
{
return
abs
(
i
);
}
__device__
int
idx_high
(
int
i
)
const
{
return
last
-
abs
(
last
-
i
);
}
__device__
int
idx
(
int
i
)
const
{
return
idx_low
(
idx_high
(
i
));
}
bool
is_range_safe
(
int
mini
,
int
maxi
)
const
{
return
-
last
<=
mini
&&
maxi
<=
2
*
last
;
}
private:
int
last
;
};
template
<
typename
D
>
struct
BrdRowReflect101
:
BrdReflect101
{
explicit
BrdRowReflect101
(
int
len
)
:
BrdReflect101
(
len
)
{}
template
<
typename
T
>
__device__
D
at_low
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_low
(
i
)]);
}
template
<
typename
T
>
__device__
D
at_high
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_high
(
i
)]);
}
};
template
<
typename
D
>
struct
BrdColReflect101
:
BrdReflect101
{
BrdColReflect101
(
int
len
,
int
step
)
:
BrdReflect101
(
len
),
step
(
step
)
{}
template
<
typename
T
>
__device__
D
at_low
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_low
(
i
)
*
step
]);
}
template
<
typename
T
>
__device__
D
at_high
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_high
(
i
)
*
step
]);
}
private:
int
step
;
};
struct
BrdReplicate
{
explicit
BrdReplicate
(
int
len
)
:
last
(
len
-
1
)
{}
__device__
int
idx_low
(
int
i
)
const
{
return
max
(
i
,
0
);
}
__device__
int
idx_high
(
int
i
)
const
{
return
min
(
i
,
last
);
}
__device__
int
idx
(
int
i
)
const
{
return
idx_low
(
idx_high
(
i
));
}
bool
is_range_safe
(
int
mini
,
int
maxi
)
const
{
return
true
;
}
private:
int
last
;
};
template
<
typename
D
>
struct
BrdRowReplicate
:
BrdReplicate
{
explicit
BrdRowReplicate
(
int
len
)
:
BrdReplicate
(
len
)
{}
template
<
typename
T
>
__device__
D
at_low
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_low
(
i
)]);
}
template
<
typename
T
>
__device__
D
at_high
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_high
(
i
)]);
}
};
template
<
typename
D
>
struct
BrdColReplicate
:
BrdReplicate
{
BrdColReplicate
(
int
len
,
int
step
)
:
BrdReplicate
(
len
),
step
(
step
)
{}
template
<
typename
T
>
__device__
D
at_low
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_low
(
i
)
*
step
]);
}
template
<
typename
T
>
__device__
D
at_high
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_high
(
i
)
*
step
]);
}
private:
int
step
;
};
template
<
typename
D
>
struct
BrdRowConstant
{
explicit
BrdRowConstant
(
int
len_
,
const
D
&
val_
=
VecTraits
<
D
>::
all
(
0
))
:
len
(
len_
),
val
(
val_
)
{}
template
<
typename
T
>
__device__
D
at_low
(
int
i
,
const
T
*
data
)
const
{
return
i
>=
0
?
saturate_cast
<
D
>
(
data
[
i
])
:
val
;
}
template
<
typename
T
>
__device__
D
at_high
(
int
i
,
const
T
*
data
)
const
{
return
i
<
len
?
saturate_cast
<
D
>
(
data
[
i
])
:
val
;
}
bool
is_range_safe
(
int
mini
,
int
maxi
)
const
{
return
true
;
}
private:
int
len
;
D
val
;
};
template
<
typename
D
>
struct
BrdColConstant
{
BrdColConstant
(
int
len_
,
int
step_
,
const
D
&
val_
=
VecTraits
<
D
>::
all
(
0
))
:
len
(
len_
),
step
(
step_
),
val
(
val_
)
{}
template
<
typename
T
>
__device__
D
at_low
(
int
i
,
const
T
*
data
)
const
{
return
i
>=
0
?
saturate_cast
<
D
>
(
data
[
i
*
step
])
:
val
;
}
template
<
typename
T
>
__device__
D
at_high
(
int
i
,
const
T
*
data
)
const
{
return
i
<
len
?
saturate_cast
<
D
>
(
data
[
i
*
step
])
:
val
;
}
bool
is_range_safe
(
int
mini
,
int
maxi
)
const
{
return
true
;
}
private:
int
len
;
int
step
;
D
val
;
};
}
}
}
\ No newline at end of file
explicit
BrdRowReflect101
(
int
len
)
:
BrdReflect101
(
len
)
{}
template
<
typename
T
>
__device__
__forceinline__
D
at_low
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_low
(
i
)]);
}
template
<
typename
T
>
__device__
__forceinline__
D
at_high
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_high
(
i
)]);
}
};
template
<
typename
D
>
struct
BrdColReflect101
:
BrdReflect101
{
BrdColReflect101
(
int
len
,
int
step
)
:
BrdReflect101
(
len
),
step
(
step
)
{}
template
<
typename
T
>
__device__
__forceinline__
D
at_low
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_low
(
i
)
*
step
]);
}
template
<
typename
T
>
__device__
__forceinline__
D
at_high
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_high
(
i
)
*
step
]);
}
private:
int
step
;
};
struct
BrdReplicate
{
explicit
BrdReplicate
(
int
len
)
:
last
(
len
-
1
)
{}
__device__
__forceinline__
int
idx_low
(
int
i
)
const
{
return
max
(
i
,
0
);
}
__device__
__forceinline__
int
idx_high
(
int
i
)
const
{
return
min
(
i
,
last
);
}
__device__
__forceinline__
int
idx
(
int
i
)
const
{
return
idx_low
(
idx_high
(
i
));
}
bool
is_range_safe
(
int
mini
,
int
maxi
)
const
{
return
true
;
}
private:
int
last
;
};
template
<
typename
D
>
struct
BrdRowReplicate
:
BrdReplicate
{
explicit
BrdRowReplicate
(
int
len
)
:
BrdReplicate
(
len
)
{}
template
<
typename
T
>
__device__
__forceinline__
D
at_low
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_low
(
i
)]);
}
template
<
typename
T
>
__device__
__forceinline__
D
at_high
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_high
(
i
)]);
}
};
template
<
typename
D
>
struct
BrdColReplicate
:
BrdReplicate
{
BrdColReplicate
(
int
len
,
int
step
)
:
BrdReplicate
(
len
),
step
(
step
)
{}
template
<
typename
T
>
__device__
__forceinline__
D
at_low
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_low
(
i
)
*
step
]);
}
template
<
typename
T
>
__device__
__forceinline__
D
at_high
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_high
(
i
)
*
step
]);
}
private:
int
step
;
};
template
<
typename
D
>
struct
BrdRowConstant
{
explicit
BrdRowConstant
(
int
len_
,
const
D
&
val_
=
VecTraits
<
D
>::
all
(
0
))
:
len
(
len_
),
val
(
val_
)
{}
template
<
typename
T
>
__device__
__forceinline__
D
at_low
(
int
i
,
const
T
*
data
)
const
{
return
i
>=
0
?
saturate_cast
<
D
>
(
data
[
i
])
:
val
;
}
template
<
typename
T
>
__device__
__forceinline__
D
at_high
(
int
i
,
const
T
*
data
)
const
{
return
i
<
len
?
saturate_cast
<
D
>
(
data
[
i
])
:
val
;
}
bool
is_range_safe
(
int
mini
,
int
maxi
)
const
{
return
true
;
}
private:
int
len
;
D
val
;
};
template
<
typename
D
>
struct
BrdColConstant
{
BrdColConstant
(
int
len_
,
int
step_
,
const
D
&
val_
=
VecTraits
<
D
>::
all
(
0
))
:
len
(
len_
),
step
(
step_
),
val
(
val_
)
{}
template
<
typename
T
>
__device__
__forceinline__
D
at_low
(
int
i
,
const
T
*
data
)
const
{
return
i
>=
0
?
saturate_cast
<
D
>
(
data
[
i
*
step
])
:
val
;
}
template
<
typename
T
>
__device__
__forceinline__
D
at_high
(
int
i
,
const
T
*
data
)
const
{
return
i
<
len
?
saturate_cast
<
D
>
(
data
[
i
*
step
])
:
val
;
}
bool
is_range_safe
(
int
mini
,
int
maxi
)
const
{
return
true
;
}
private:
int
len
;
int
step
;
D
val
;
};
}}}
#endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
modules/gpu/src/opencv2/gpu/device/datamov_utils.hpp
浏览文件 @
1c1a61dd
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_DATAMOV_UTILS_HPP__
#define __OPENCV_GPU_DATAMOV_UTILS_HPP__
#include "internal_shared.hpp"
#if __CUDA_ARCH__ >= 200
namespace
cv
{
namespace
gpu
{
namespace
device
{
#if __CUDA_ARCH__ >= 200
// for Fermi memory space is detected automatically
template
<
typename
T
>
struct
ForceGlobLoad
{
__device__
__forceinline__
static
void
Ld
(
T
*
ptr
,
int
offset
,
T
&
val
)
{
val
=
d_ptr
[
offset
];
}
};
#else
// for Fermi memory space is detected automatically
template
<
typename
T
>
struct
ForceGlob
{
__device__
__forceinline__
static
void
Load
(
const
T
*
ptr
,
int
offset
,
T
&
val
)
{
val
=
d_ptr
[
offset
];
}
};
#else // __CUDA_ARCH__ >= 200
#if defined(_WIN64) || defined(__LP64__)
// 64-bit register modifier for inlined asm
#define _OPENCV_ASM_PTR_ "l"
#else
// 32-bit register modifier for inlined asm
#define _OPENCV_ASM_PTR_ "r"
#endif
#if defined(_WIN64) || defined(__LP64__)
// 64-bit register modifier for inlined asm
#define _OPENCV_ASM_PTR_ "l"
#else
// 32-bit register modifier for inlined asm
#define _OPENCV_ASM_PTR_ "r"
#endif
template
<
class
T
>
struct
ForceGlob
;
template
<
class
T
>
struct
ForceGlobLoad
;
#define DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
template <> struct ForceGlob<base_type> \
{ \
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
{ \
asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : _OPENCV_ASM_PTR_(ptr + offset)); \
} \
};
#define DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
template <> struct ForceGlob<base_type> \
{ \
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
{ \
asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : _OPENCV_ASM_PTR_(ptr + offset)); \
} \
};
DEFINE_FORCE_GLOB_B
(
uchar
,
u8
)
DEFINE_FORCE_GLOB_B
(
schar
,
s8
)
DEFINE_FORCE_GLOB_B
(
char
,
b8
)
DEFINE_FORCE_GLOB
(
ushort
,
u16
,
h
)
DEFINE_FORCE_GLOB
(
short
,
s16
,
h
)
DEFINE_FORCE_GLOB
(
uint
,
u32
,
r
)
DEFINE_FORCE_GLOB
(
int
,
s32
,
r
)
DEFINE_FORCE_GLOB
(
float
,
f32
,
f
)
DEFINE_FORCE_GLOB
(
double
,
f64
,
d
)
#undef DEFINE_FORCE_GLOB
#undef DEFINE_FORCE_GLOB_B
#undef _OPENCV_ASM_PTR_
#endif // __CUDA_ARCH__ >= 200
}}}
#define DEFINE_FORCE_GLOB_LOAD(base_type, ptx_type, reg_mod) \
template <> struct ForceGlobLoad<base_type> \
{ \
__device__ __forceinline__ static void Ld(type* ptr, int offset, type& val) \
{
\
asm
(
"ld.global."
#
ptx_type
" %0, [%1];"
:
"="
#
reg_mod
(
val
)
:
_OPENCV_ASM_PTR_
(
d_ptr
+
offset
));
\
}
\
}
;
DEFINE_FORCE_GLOB_LOAD
(
int
,
s32
,
r
)
DEFINE_FORCE_GLOB_LOAD
(
float
,
f32
,
f
)
#undef DEFINE_FORCE_GLOB_LOAD
#endif
#endif // __OPENCV_GPU_DATAMOV_UTILS_HPP__
modules/gpu/src/opencv2/gpu/device/dynamic_smem.hpp
浏览文件 @
1c1a61dd
...
...
@@ -40,44 +40,41 @@
//
//M*/
#ifndef __OPENCV_GPU_DYNAMIC_SMEM_HPP__
#define __OPENCV_GPU_DYNAMIC_SMEM_HPP__
namespace
cv
{
namespace
gpu
namespace
cv
{
namespace
gpu
{
namespace
device
{
template
<
class
T
>
struct
DynamicSharedMem
{
namespace
device
{
template
<
class
T
>
struct
DynamicSharedMem
{
__device__
operator
T
*
()
{
extern
__shared__
int
__smem
[];
return
(
T
*
)
__smem
;
}
__device__
__forceinline__
operator
T
*
()
{
extern
__shared__
int
__smem
[];
return
(
T
*
)
__smem
;
}
__devic
e__
operator
const
T
*
()
const
{
extern
__shared__
int
__smem
[];
return
(
T
*
)
__smem
;
}
};
__device__
__forceinlin
e__
operator
const
T
*
()
const
{
extern
__shared__
int
__smem
[];
return
(
T
*
)
__smem
;
}
};
// specialize for double to avoid unaligned memory access compile errors
template
<
>
struct
DynamicSharedMem
<
double
>
{
__devic
e__
operator
double
*
()
{
extern
__shared__
double
__smem_d
[];
return
(
double
*
)
__smem_d
;
}
// specialize for double to avoid unaligned memory access compile errors
template
<
>
struct
DynamicSharedMem
<
double
>
{
__device__
__forceinlin
e__
operator
double
*
()
{
extern
__shared__
double
__smem_d
[];
return
(
double
*
)
__smem_d
;
}
__device__
operator
const
double
*
()
const
{
extern
__shared__
double
__smem_d
[];
return
(
double
*
)
__smem_d
;
}
};
__device__
__forceinline__
operator
const
double
*
()
const
{
extern
__shared__
double
__smem_d
[];
return
(
double
*
)
__smem_d
;
}
};
}}}
}
}
\ No newline at end of file
#endif // __OPENCV_GPU_DYNAMIC_SMEM_HPP__
modules/gpu/src/opencv2/gpu/device/limits_gpu.hpp
浏览文件 @
1c1a61dd
...
...
@@ -40,181 +40,179 @@
//
//M*/
#ifndef __OPENCV_GPU_LIMITS_GPU_HPP__
#define __OPENCV_GPU_LIMITS_GPU_HPP__
namespace
cv
{
namespace
gpu
{
namespace
device
{
template
<
class
T
>
struct
numeric_limits_gpu
{
typedef
T
type
;
__device__
static
type
min
()
{
return
type
();
};
__device__
static
type
max
()
{
return
type
();
};
__device__
static
type
epsilon
()
{
return
type
();
}
__device__
static
type
round_error
()
{
return
type
();
}
__device__
static
type
denorm_min
()
{
return
type
();
}
__device__
static
type
infinity
()
{
return
type
();
}
__device__
static
type
quiet_NaN
()
{
return
type
();
}
__device__
static
type
signaling_NaN
()
{
return
T
();
}
static
const
bool
is_signed
;
};
namespace
cv
{
namespace
gpu
{
namespace
device
{
template
<
class
T
>
struct
numeric_limits_gpu
{
typedef
T
type
;
__device__
__forceinline__
static
type
min
()
{
return
type
();
};
__device__
__forceinline__
static
type
max
()
{
return
type
();
};
__device__
__forceinline__
static
type
epsilon
()
{
return
type
();
}
__device__
__forceinline__
static
type
round_error
()
{
return
type
();
}
__device__
__forceinline__
static
type
denorm_min
()
{
return
type
();
}
__device__
__forceinline__
static
type
infinity
()
{
return
type
();
}
__device__
__forceinline__
static
type
quiet_NaN
()
{
return
type
();
}
__device__
__forceinline__
static
type
signaling_NaN
()
{
return
T
();
}
static
const
bool
is_signed
;
};
template
<
>
struct
numeric_limits_gpu
<
bool
>
{
typedef
bool
type
;
__devic
e__
static
type
min
()
{
return
false
;
};
__devic
e__
static
type
max
()
{
return
true
;
};
__devic
e__
static
type
epsilon
();
__devic
e__
static
type
round_error
();
__devic
e__
static
type
denorm_min
();
__devic
e__
static
type
infinity
();
__devic
e__
static
type
quiet_NaN
();
__devic
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
false
;
};
template
<
>
struct
numeric_limits_gpu
<
bool
>
{
typedef
bool
type
;
__device__
__forceinlin
e__
static
type
min
()
{
return
false
;
};
__device__
__forceinlin
e__
static
type
max
()
{
return
true
;
};
__device__
__forceinlin
e__
static
type
epsilon
();
__device__
__forceinlin
e__
static
type
round_error
();
__device__
__forceinlin
e__
static
type
denorm_min
();
__device__
__forceinlin
e__
static
type
infinity
();
__device__
__forceinlin
e__
static
type
quiet_NaN
();
__device__
__forceinlin
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
false
;
};
template
<
>
struct
numeric_limits_gpu
<
char
>
{
typedef
char
type
;
__devic
e__
static
type
min
()
{
return
CHAR_MIN
;
};
__devic
e__
static
type
max
()
{
return
CHAR_MAX
;
};
__devic
e__
static
type
epsilon
();
__devic
e__
static
type
round_error
();
__devic
e__
static
type
denorm_min
();
__devic
e__
static
type
infinity
();
__devic
e__
static
type
quiet_NaN
();
__devic
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
(
char
)
-
1
==
-
1
;
};
template
<
>
struct
numeric_limits_gpu
<
char
>
{
typedef
char
type
;
__device__
__forceinlin
e__
static
type
min
()
{
return
CHAR_MIN
;
};
__device__
__forceinlin
e__
static
type
max
()
{
return
CHAR_MAX
;
};
__device__
__forceinlin
e__
static
type
epsilon
();
__device__
__forceinlin
e__
static
type
round_error
();
__device__
__forceinlin
e__
static
type
denorm_min
();
__device__
__forceinlin
e__
static
type
infinity
();
__device__
__forceinlin
e__
static
type
quiet_NaN
();
__device__
__forceinlin
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
(
char
)
-
1
==
-
1
;
};
template
<
>
struct
numeric_limits_gpu
<
unsigned
char
>
{
typedef
unsigned
char
type
;
__devic
e__
static
type
min
()
{
return
0
;
};
__devic
e__
static
type
max
()
{
return
UCHAR_MAX
;
};
__devic
e__
static
type
epsilon
();
__devic
e__
static
type
round_error
();
__devic
e__
static
type
denorm_min
();
__devic
e__
static
type
infinity
();
__devic
e__
static
type
quiet_NaN
();
__devic
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
false
;
};
template
<
>
struct
numeric_limits_gpu
<
unsigned
char
>
{
typedef
unsigned
char
type
;
__device__
__forceinlin
e__
static
type
min
()
{
return
0
;
};
__device__
__forceinlin
e__
static
type
max
()
{
return
UCHAR_MAX
;
};
__device__
__forceinlin
e__
static
type
epsilon
();
__device__
__forceinlin
e__
static
type
round_error
();
__device__
__forceinlin
e__
static
type
denorm_min
();
__device__
__forceinlin
e__
static
type
infinity
();
__device__
__forceinlin
e__
static
type
quiet_NaN
();
__device__
__forceinlin
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
false
;
};
template
<
>
struct
numeric_limits_gpu
<
short
>
{
typedef
short
type
;
__devic
e__
static
type
min
()
{
return
SHRT_MIN
;
};
__devic
e__
static
type
max
()
{
return
SHRT_MAX
;
};
__devic
e__
static
type
epsilon
();
__devic
e__
static
type
round_error
();
__devic
e__
static
type
denorm_min
();
__devic
e__
static
type
infinity
();
__devic
e__
static
type
quiet_NaN
();
__devic
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
true
;
};
template
<
>
struct
numeric_limits_gpu
<
short
>
{
typedef
short
type
;
__device__
__forceinlin
e__
static
type
min
()
{
return
SHRT_MIN
;
};
__device__
__forceinlin
e__
static
type
max
()
{
return
SHRT_MAX
;
};
__device__
__forceinlin
e__
static
type
epsilon
();
__device__
__forceinlin
e__
static
type
round_error
();
__device__
__forceinlin
e__
static
type
denorm_min
();
__device__
__forceinlin
e__
static
type
infinity
();
__device__
__forceinlin
e__
static
type
quiet_NaN
();
__device__
__forceinlin
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
true
;
};
template
<
>
struct
numeric_limits_gpu
<
unsigned
short
>
{
typedef
unsigned
short
type
;
__devic
e__
static
type
min
()
{
return
0
;
};
__devic
e__
static
type
max
()
{
return
USHRT_MAX
;
};
__devic
e__
static
type
epsilon
();
__devic
e__
static
type
round_error
();
__devic
e__
static
type
denorm_min
();
__devic
e__
static
type
infinity
();
__devic
e__
static
type
quiet_NaN
();
__devic
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
false
;
};
template
<
>
struct
numeric_limits_gpu
<
unsigned
short
>
{
typedef
unsigned
short
type
;
__device__
__forceinlin
e__
static
type
min
()
{
return
0
;
};
__device__
__forceinlin
e__
static
type
max
()
{
return
USHRT_MAX
;
};
__device__
__forceinlin
e__
static
type
epsilon
();
__device__
__forceinlin
e__
static
type
round_error
();
__device__
__forceinlin
e__
static
type
denorm_min
();
__device__
__forceinlin
e__
static
type
infinity
();
__device__
__forceinlin
e__
static
type
quiet_NaN
();
__device__
__forceinlin
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
false
;
};
template
<
>
struct
numeric_limits_gpu
<
int
>
{
typedef
int
type
;
__devic
e__
static
type
min
()
{
return
INT_MIN
;
};
__devic
e__
static
type
max
()
{
return
INT_MAX
;
};
__devic
e__
static
type
epsilon
();
__devic
e__
static
type
round_error
();
__devic
e__
static
type
denorm_min
();
__devic
e__
static
type
infinity
();
__devic
e__
static
type
quiet_NaN
();
__devic
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
true
;
};
template
<
>
struct
numeric_limits_gpu
<
int
>
{
typedef
int
type
;
__device__
__forceinlin
e__
static
type
min
()
{
return
INT_MIN
;
};
__device__
__forceinlin
e__
static
type
max
()
{
return
INT_MAX
;
};
__device__
__forceinlin
e__
static
type
epsilon
();
__device__
__forceinlin
e__
static
type
round_error
();
__device__
__forceinlin
e__
static
type
denorm_min
();
__device__
__forceinlin
e__
static
type
infinity
();
__device__
__forceinlin
e__
static
type
quiet_NaN
();
__device__
__forceinlin
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
true
;
};
template
<
>
struct
numeric_limits_gpu
<
unsigned
int
>
{
typedef
unsigned
int
type
;
__devic
e__
static
type
min
()
{
return
0
;
};
__devic
e__
static
type
max
()
{
return
UINT_MAX
;
};
__devic
e__
static
type
epsilon
();
__devic
e__
static
type
round_error
();
__devic
e__
static
type
denorm_min
();
__devic
e__
static
type
infinity
();
__devic
e__
static
type
quiet_NaN
();
__devic
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
false
;
};
template
<
>
struct
numeric_limits_gpu
<
unsigned
int
>
{
typedef
unsigned
int
type
;
__device__
__forceinlin
e__
static
type
min
()
{
return
0
;
};
__device__
__forceinlin
e__
static
type
max
()
{
return
UINT_MAX
;
};
__device__
__forceinlin
e__
static
type
epsilon
();
__device__
__forceinlin
e__
static
type
round_error
();
__device__
__forceinlin
e__
static
type
denorm_min
();
__device__
__forceinlin
e__
static
type
infinity
();
__device__
__forceinlin
e__
static
type
quiet_NaN
();
__device__
__forceinlin
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
false
;
};
template
<
>
struct
numeric_limits_gpu
<
long
>
{
typedef
long
type
;
__devic
e__
static
type
min
()
{
return
LONG_MIN
;
};
__devic
e__
static
type
max
()
{
return
LONG_MAX
;
};
__devic
e__
static
type
epsilon
();
__devic
e__
static
type
round_error
();
__devic
e__
static
type
denorm_min
();
__devic
e__
static
type
infinity
();
__devic
e__
static
type
quiet_NaN
();
__devic
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
true
;
};
template
<
>
struct
numeric_limits_gpu
<
long
>
{
typedef
long
type
;
__device__
__forceinlin
e__
static
type
min
()
{
return
LONG_MIN
;
};
__device__
__forceinlin
e__
static
type
max
()
{
return
LONG_MAX
;
};
__device__
__forceinlin
e__
static
type
epsilon
();
__device__
__forceinlin
e__
static
type
round_error
();
__device__
__forceinlin
e__
static
type
denorm_min
();
__device__
__forceinlin
e__
static
type
infinity
();
__device__
__forceinlin
e__
static
type
quiet_NaN
();
__device__
__forceinlin
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
true
;
};
template
<
>
struct
numeric_limits_gpu
<
unsigned
long
>
{
typedef
unsigned
long
type
;
__devic
e__
static
type
min
()
{
return
0
;
};
__devic
e__
static
type
max
()
{
return
ULONG_MAX
;
};
__devic
e__
static
type
epsilon
();
__devic
e__
static
type
round_error
();
__devic
e__
static
type
denorm_min
();
__devic
e__
static
type
infinity
();
__devic
e__
static
type
quiet_NaN
();
__devic
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
false
;
};
template
<
>
struct
numeric_limits_gpu
<
float
>
{
typedef
float
type
;
__devic
e__
static
type
min
()
{
return
1.175494351e-38
f
/*FLT_MIN*/
;
};
__devic
e__
static
type
max
()
{
return
3.402823466e+38
f
/*FLT_MAX*/
;
};
__devic
e__
static
type
epsilon
()
{
return
1.192092896e-07
f
/*FLT_EPSILON*/
;
};
__devic
e__
static
type
round_error
();
__devic
e__
static
type
denorm_min
();
__devic
e__
static
type
infinity
();
__devic
e__
static
type
quiet_NaN
();
__devic
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
true
;
};
template
<
>
struct
numeric_limits_gpu
<
unsigned
long
>
{
typedef
unsigned
long
type
;
__device__
__forceinlin
e__
static
type
min
()
{
return
0
;
};
__device__
__forceinlin
e__
static
type
max
()
{
return
ULONG_MAX
;
};
__device__
__forceinlin
e__
static
type
epsilon
();
__device__
__forceinlin
e__
static
type
round_error
();
__device__
__forceinlin
e__
static
type
denorm_min
();
__device__
__forceinlin
e__
static
type
infinity
();
__device__
__forceinlin
e__
static
type
quiet_NaN
();
__device__
__forceinlin
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
false
;
};
template
<
>
struct
numeric_limits_gpu
<
float
>
{
typedef
float
type
;
__device__
__forceinlin
e__
static
type
min
()
{
return
1.175494351e-38
f
/*FLT_MIN*/
;
};
__device__
__forceinlin
e__
static
type
max
()
{
return
3.402823466e+38
f
/*FLT_MAX*/
;
};
__device__
__forceinlin
e__
static
type
epsilon
()
{
return
1.192092896e-07
f
/*FLT_EPSILON*/
;
};
__device__
__forceinlin
e__
static
type
round_error
();
__device__
__forceinlin
e__
static
type
denorm_min
();
__device__
__forceinlin
e__
static
type
infinity
();
__device__
__forceinlin
e__
static
type
quiet_NaN
();
__device__
__forceinlin
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
true
;
};
template
<
>
struct
numeric_limits_gpu
<
double
>
{
typedef
double
type
;
__device__
static
type
min
()
{
return
2.2250738585072014e-308
/*DBL_MIN*/
;
};
__device__
static
type
max
()
{
return
1.7976931348623158e+308
/*DBL_MAX*/
;
};
__device__
static
type
epsilon
();
__device__
static
type
round_error
();
__device__
static
type
denorm_min
();
__device__
static
type
infinity
();
__device__
static
type
quiet_NaN
();
__device__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
true
;
};
}
}
}
\ No newline at end of file
template
<
>
struct
numeric_limits_gpu
<
double
>
{
typedef
double
type
;
__device__
__forceinline__
static
type
min
()
{
return
2.2250738585072014e-308
/*DBL_MIN*/
;
};
__device__
__forceinline__
static
type
max
()
{
return
1.7976931348623158e+308
/*DBL_MAX*/
;
};
__device__
__forceinline__
static
type
epsilon
();
__device__
__forceinline__
static
type
round_error
();
__device__
__forceinline__
static
type
denorm_min
();
__device__
__forceinline__
static
type
infinity
();
__device__
__forceinline__
static
type
quiet_NaN
();
__device__
__forceinline__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
true
;
};
}}}
#endif // __OPENCV_GPU_LIMITS_GPU_HPP__
modules/gpu/src/opencv2/gpu/device/saturate_cast.hpp
浏览文件 @
1c1a61dd
...
...
@@ -51,29 +51,29 @@ namespace cv
{
namespace
device
{
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
uchar
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
schar
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
ushort
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
short
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
uint
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
int
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
float
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
double
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
uchar
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
schar
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
ushort
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
short
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
uint
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
int
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
float
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
double
v
)
{
return
_Tp
(
v
);
}
template
<
>
static
__device__
uchar
saturate_cast
<
uchar
>
(
schar
v
)
template
<
>
static
__device__
__forceinline__
uchar
saturate_cast
<
uchar
>
(
schar
v
)
{
return
(
uchar
)
max
((
int
)
v
,
0
);
}
template
<
>
static
__device__
uchar
saturate_cast
<
uchar
>
(
ushort
v
)
template
<
>
static
__device__
__forceinline__
uchar
saturate_cast
<
uchar
>
(
ushort
v
)
{
return
(
uchar
)
min
((
uint
)
v
,
(
uint
)
UCHAR_MAX
);
}
template
<
>
static
__device__
uchar
saturate_cast
<
uchar
>
(
int
v
)
template
<
>
static
__device__
__forceinline__
uchar
saturate_cast
<
uchar
>
(
int
v
)
{
return
(
uchar
)((
uint
)
v
<=
UCHAR_MAX
?
v
:
v
>
0
?
UCHAR_MAX
:
0
);
}
template
<
>
static
__device__
uchar
saturate_cast
<
uchar
>
(
uint
v
)
template
<
>
static
__device__
__forceinline__
uchar
saturate_cast
<
uchar
>
(
uint
v
)
{
return
(
uchar
)
min
(
v
,
(
uint
)
UCHAR_MAX
);
}
template
<
>
static
__device__
uchar
saturate_cast
<
uchar
>
(
short
v
)
template
<
>
static
__device__
__forceinline__
uchar
saturate_cast
<
uchar
>
(
short
v
)
{
return
saturate_cast
<
uchar
>
((
uint
)
v
);
}
template
<
>
static
__device__
uchar
saturate_cast
<
uchar
>
(
float
v
)
template
<
>
static
__device__
__forceinline__
uchar
saturate_cast
<
uchar
>
(
float
v
)
{
int
iv
=
__float2int_rn
(
v
);
return
saturate_cast
<
uchar
>
(
iv
);
}
template
<
>
static
__device__
uchar
saturate_cast
<
uchar
>
(
double
v
)
template
<
>
static
__device__
__forceinline__
uchar
saturate_cast
<
uchar
>
(
double
v
)
{
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
int
iv
=
__double2int_rn
(
v
);
return
saturate_cast
<
uchar
>
(
iv
);
...
...
@@ -82,23 +82,23 @@ namespace cv
#endif
}
template
<
>
static
__device__
schar
saturate_cast
<
schar
>
(
uchar
v
)
template
<
>
static
__device__
__forceinline__
schar
saturate_cast
<
schar
>
(
uchar
v
)
{
return
(
schar
)
min
((
int
)
v
,
SCHAR_MAX
);
}
template
<
>
static
__device__
schar
saturate_cast
<
schar
>
(
ushort
v
)
template
<
>
static
__device__
__forceinline__
schar
saturate_cast
<
schar
>
(
ushort
v
)
{
return
(
schar
)
min
((
uint
)
v
,
(
uint
)
SCHAR_MAX
);
}
template
<
>
static
__device__
schar
saturate_cast
<
schar
>
(
int
v
)
template
<
>
static
__device__
__forceinline__
schar
saturate_cast
<
schar
>
(
int
v
)
{
return
(
schar
)((
uint
)(
v
-
SCHAR_MIN
)
<=
(
uint
)
UCHAR_MAX
?
v
:
v
>
0
?
SCHAR_MAX
:
SCHAR_MIN
);
}
template
<
>
static
__device__
schar
saturate_cast
<
schar
>
(
short
v
)
template
<
>
static
__device__
__forceinline__
schar
saturate_cast
<
schar
>
(
short
v
)
{
return
saturate_cast
<
schar
>
((
int
)
v
);
}
template
<
>
static
__device__
schar
saturate_cast
<
schar
>
(
uint
v
)
template
<
>
static
__device__
__forceinline__
schar
saturate_cast
<
schar
>
(
uint
v
)
{
return
(
schar
)
min
(
v
,
(
uint
)
SCHAR_MAX
);
}
template
<
>
static
__device__
schar
saturate_cast
<
schar
>
(
float
v
)
template
<
>
static
__device__
__forceinline__
schar
saturate_cast
<
schar
>
(
float
v
)
{
int
iv
=
__float2int_rn
(
v
);
return
saturate_cast
<
schar
>
(
iv
);
}
template
<
>
static
__device__
schar
saturate_cast
<
schar
>
(
double
v
)
template
<
>
static
__device__
__forceinline__
schar
saturate_cast
<
schar
>
(
double
v
)
{
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
int
iv
=
__double2int_rn
(
v
);
return
saturate_cast
<
schar
>
(
iv
);
...
...
@@ -107,17 +107,17 @@ namespace cv
#endif
}
template
<
>
static
__device__
ushort
saturate_cast
<
ushort
>
(
schar
v
)
template
<
>
static
__device__
__forceinline__
ushort
saturate_cast
<
ushort
>
(
schar
v
)
{
return
(
ushort
)
max
((
int
)
v
,
0
);
}
template
<
>
static
__device__
ushort
saturate_cast
<
ushort
>
(
short
v
)
template
<
>
static
__device__
__forceinline__
ushort
saturate_cast
<
ushort
>
(
short
v
)
{
return
(
ushort
)
max
((
int
)
v
,
0
);
}
template
<
>
static
__device__
ushort
saturate_cast
<
ushort
>
(
int
v
)
template
<
>
static
__device__
__forceinline__
ushort
saturate_cast
<
ushort
>
(
int
v
)
{
return
(
ushort
)((
uint
)
v
<=
(
uint
)
USHRT_MAX
?
v
:
v
>
0
?
USHRT_MAX
:
0
);
}
template
<
>
static
__device__
ushort
saturate_cast
<
ushort
>
(
uint
v
)
template
<
>
static
__device__
__forceinline__
ushort
saturate_cast
<
ushort
>
(
uint
v
)
{
return
(
ushort
)
min
(
v
,
(
uint
)
USHRT_MAX
);
}
template
<
>
static
__device__
ushort
saturate_cast
<
ushort
>
(
float
v
)
template
<
>
static
__device__
__forceinline__
ushort
saturate_cast
<
ushort
>
(
float
v
)
{
int
iv
=
__float2int_rn
(
v
);
return
saturate_cast
<
ushort
>
(
iv
);
}
template
<
>
static
__device__
ushort
saturate_cast
<
ushort
>
(
double
v
)
template
<
>
static
__device__
__forceinline__
ushort
saturate_cast
<
ushort
>
(
double
v
)
{
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
int
iv
=
__double2int_rn
(
v
);
return
saturate_cast
<
ushort
>
(
iv
);
...
...
@@ -126,18 +126,18 @@ namespace cv
#endif
}
template
<
>
static
__device__
short
saturate_cast
<
short
>
(
ushort
v
)
template
<
>
static
__device__
__forceinline__
short
saturate_cast
<
short
>
(
ushort
v
)
{
return
(
short
)
min
((
int
)
v
,
SHRT_MAX
);
}
template
<
>
static
__device__
short
saturate_cast
<
short
>
(
int
v
)
template
<
>
static
__device__
__forceinline__
short
saturate_cast
<
short
>
(
int
v
)
{
return
(
short
)((
uint
)(
v
-
SHRT_MIN
)
<=
(
uint
)
USHRT_MAX
?
v
:
v
>
0
?
SHRT_MAX
:
SHRT_MIN
);
}
template
<
>
static
__device__
short
saturate_cast
<
short
>
(
uint
v
)
template
<
>
static
__device__
__forceinline__
short
saturate_cast
<
short
>
(
uint
v
)
{
return
(
short
)
min
(
v
,
(
uint
)
SHRT_MAX
);
}
template
<
>
static
__device__
short
saturate_cast
<
short
>
(
float
v
)
template
<
>
static
__device__
__forceinline__
short
saturate_cast
<
short
>
(
float
v
)
{
int
iv
=
__float2int_rn
(
v
);
return
saturate_cast
<
short
>
(
iv
);
}
template
<
>
static
__device__
short
saturate_cast
<
short
>
(
double
v
)
template
<
>
static
__device__
__forceinline__
short
saturate_cast
<
short
>
(
double
v
)
{
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
int
iv
=
__double2int_rn
(
v
);
return
saturate_cast
<
short
>
(
iv
);
...
...
@@ -146,8 +146,8 @@ namespace cv
#endif
}
template
<
>
static
__device__
int
saturate_cast
<
int
>
(
float
v
)
{
return
__float2int_rn
(
v
);
}
template
<
>
static
__device__
int
saturate_cast
<
int
>
(
double
v
)
template
<
>
static
__device__
__forceinline__
int
saturate_cast
<
int
>
(
float
v
)
{
return
__float2int_rn
(
v
);
}
template
<
>
static
__device__
__forceinline__
int
saturate_cast
<
int
>
(
double
v
)
{
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
return
__double2int_rn
(
v
);
...
...
@@ -156,8 +156,8 @@ namespace cv
#endif
}
template
<
>
static
__device__
uint
saturate_cast
<
uint
>
(
float
v
){
return
__float2uint_rn
(
v
);
}
template
<
>
static
__device__
uint
saturate_cast
<
uint
>
(
double
v
)
template
<
>
static
__device__
__forceinline__
uint
saturate_cast
<
uint
>
(
float
v
){
return
__float2uint_rn
(
v
);
}
template
<
>
static
__device__
__forceinline__
uint
saturate_cast
<
uint
>
(
double
v
)
{
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
return
__double2uint_rn
(
v
);
...
...
modules/gpu/src/opencv2/gpu/device/transform.hpp
浏览文件 @
1c1a61dd
...
...
@@ -55,7 +55,7 @@ namespace cv { namespace gpu { namespace device
public:
explicit
MaskReader
(
const
PtrStep
&
mask_
)
:
mask
(
mask_
)
{}
__device__
bool
operator
()(
int
y
,
int
x
)
const
{
return
mask
.
ptr
(
y
)[
x
];
}
__device__
__forceinline__
bool
operator
()(
int
y
,
int
x
)
const
{
return
mask
.
ptr
(
y
)[
x
];
}
private:
PtrStep
mask
;
...
...
@@ -63,7 +63,7 @@ namespace cv { namespace gpu { namespace device
struct
NoMask
{
__device__
bool
operator
()(
int
y
,
int
x
)
const
{
return
true
;
}
__device__
__forceinline__
bool
operator
()(
int
y
,
int
x
)
const
{
return
true
;
}
};
//! Read Write Traits
...
...
@@ -121,14 +121,14 @@ namespace cv { namespace gpu { namespace device
template
<
>
struct
OpUnroller
<
1
>
{
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
static
__device__
void
unroll
(
const
T
&
src
,
D
&
dst
,
const
Mask
&
mask
,
UnOp
&
op
,
int
x_shifted
,
int
y
)
static
__device__
__forceinline__
void
unroll
(
const
T
&
src
,
D
&
dst
,
const
Mask
&
mask
,
UnOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
x
=
op
(
src
.
x
);
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
static
__device__
void
unroll
(
const
T1
&
src1
,
const
T2
&
src2
,
D
&
dst
,
const
Mask
&
mask
,
BinOp
&
op
,
int
x_shifted
,
int
y
)
static
__device__
__forceinline__
void
unroll
(
const
T1
&
src1
,
const
T2
&
src2
,
D
&
dst
,
const
Mask
&
mask
,
BinOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
x
=
op
(
src1
.
x
,
src2
.
x
);
...
...
@@ -137,7 +137,7 @@ namespace cv { namespace gpu { namespace device
template
<
>
struct
OpUnroller
<
2
>
{
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
static
__device__
void
unroll
(
const
T
&
src
,
D
&
dst
,
const
Mask
&
mask
,
UnOp
&
op
,
int
x_shifted
,
int
y
)
static
__device__
__forceinline__
void
unroll
(
const
T
&
src
,
D
&
dst
,
const
Mask
&
mask
,
UnOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
x
=
op
(
src
.
x
);
...
...
@@ -146,7 +146,7 @@ namespace cv { namespace gpu { namespace device
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
static
__device__
void
unroll
(
const
T1
&
src1
,
const
T2
&
src2
,
D
&
dst
,
const
Mask
&
mask
,
BinOp
&
op
,
int
x_shifted
,
int
y
)
static
__device__
__forceinline__
void
unroll
(
const
T1
&
src1
,
const
T2
&
src2
,
D
&
dst
,
const
Mask
&
mask
,
BinOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
x
=
op
(
src1
.
x
,
src2
.
x
);
...
...
@@ -157,7 +157,7 @@ namespace cv { namespace gpu { namespace device
template
<
>
struct
OpUnroller
<
3
>
{
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
static
__device__
void
unroll
(
const
T
&
src
,
D
&
dst
,
const
Mask
&
mask
,
UnOp
&
op
,
int
x_shifted
,
int
y
)
static
__device__
__forceinline__
void
unroll
(
const
T
&
src
,
D
&
dst
,
const
Mask
&
mask
,
UnOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
x
=
op
(
src
.
x
);
...
...
@@ -168,7 +168,7 @@ namespace cv { namespace gpu { namespace device
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
static
__device__
void
unroll
(
const
T1
&
src1
,
const
T2
&
src2
,
D
&
dst
,
const
Mask
&
mask
,
BinOp
&
op
,
int
x_shifted
,
int
y
)
static
__device__
__forceinline__
void
unroll
(
const
T1
&
src1
,
const
T2
&
src2
,
D
&
dst
,
const
Mask
&
mask
,
BinOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
x
=
op
(
src1
.
x
,
src2
.
x
);
...
...
@@ -181,7 +181,7 @@ namespace cv { namespace gpu { namespace device
template
<
>
struct
OpUnroller
<
4
>
{
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
static
__device__
void
unroll
(
const
T
&
src
,
D
&
dst
,
const
Mask
&
mask
,
UnOp
&
op
,
int
x_shifted
,
int
y
)
static
__device__
__forceinline__
void
unroll
(
const
T
&
src
,
D
&
dst
,
const
Mask
&
mask
,
UnOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
x
=
op
(
src
.
x
);
...
...
@@ -194,7 +194,7 @@ namespace cv { namespace gpu { namespace device
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
static
__device__
void
unroll
(
const
T1
&
src1
,
const
T2
&
src2
,
D
&
dst
,
const
Mask
&
mask
,
BinOp
&
op
,
int
x_shifted
,
int
y
)
static
__device__
__forceinline__
void
unroll
(
const
T1
&
src1
,
const
T2
&
src2
,
D
&
dst
,
const
Mask
&
mask
,
BinOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
x
=
op
(
src1
.
x
,
src2
.
x
);
...
...
modules/gpu/src/opencv2/gpu/device/vecmath.hpp
浏览文件 @
1c1a61dd
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录