Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
cc34a8ac
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
cc34a8ac
编写于
4月 16, 2013
作者:
V
Vladislav Vinogradov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
new implementation for GpuMat::setTo (without constant memory)
上级
69be49ba
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
31 addition
and
99 deletion
+31
-99
modules/core/src/cuda/matrix_operations.cu
modules/core/src/cuda/matrix_operations.cu
+31
-99
未找到文件。
modules/core/src/cuda/matrix_operations.cu
浏览文件 @
cc34a8ac
...
...
@@ -44,42 +44,33 @@
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/type_traits.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "matrix_operations.hpp"
namespace
cv
{
namespace
gpu
{
namespace
cudev
{
template
<
typename
T
>
struct
shift_and_sizeof
;
template
<
>
struct
shift_and_sizeof
<
signed
char
>
{
enum
{
shift
=
0
};
};
template
<
>
struct
shift_and_sizeof
<
unsigned
char
>
{
enum
{
shift
=
0
};
};
template
<
>
struct
shift_and_sizeof
<
short
>
{
enum
{
shift
=
1
};
};
template
<
>
struct
shift_and_sizeof
<
unsigned
short
>
{
enum
{
shift
=
1
};
};
template
<
>
struct
shift_and_sizeof
<
int
>
{
enum
{
shift
=
2
};
};
template
<
>
struct
shift_and_sizeof
<
float
>
{
enum
{
shift
=
2
};
};
template
<
>
struct
shift_and_sizeof
<
double
>
{
enum
{
shift
=
3
};
};
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// CopyTo /////////////////////////////////
///////////////////////////////////////////////////////////////////////////
// copyWithMask
template
<
typename
T
>
void
copyWithMask
(
PtrStepSzb
src
,
PtrStepSzb
dst
,
int
cn
,
PtrStepSzb
mask
,
bool
multiChannelMask
,
cudaStream_t
stream
)
{
if
(
multiChannelMask
)
cv
::
gpu
::
cudev
::
transform
((
PtrStepSz
<
T
>
)
src
,
(
PtrStepSz
<
T
>
)
dst
,
identity
<
T
>
(),
SingleMask
(
mask
),
stream
);
cv
::
gpu
::
cudev
::
transform
((
PtrStepSz
<
T
>
)
src
,
(
PtrStepSz
<
T
>
)
dst
,
identity
<
T
>
(),
SingleMask
(
mask
),
stream
);
else
cv
::
gpu
::
cudev
::
transform
((
PtrStepSz
<
T
>
)
src
,
(
PtrStepSz
<
T
>
)
dst
,
identity
<
T
>
(),
SingleMaskChannels
(
mask
,
cn
),
stream
);
cv
::
gpu
::
cudev
::
transform
((
PtrStepSz
<
T
>
)
src
,
(
PtrStepSz
<
T
>
)
dst
,
identity
<
T
>
(),
SingleMaskChannels
(
mask
,
cn
),
stream
);
}
void
copyWithMask
(
PtrStepSzb
src
,
PtrStepSzb
dst
,
size_t
elemSize1
,
int
cn
,
PtrStepSzb
mask
,
bool
multiChannelMask
,
cudaStream_t
stream
)
{
typedef
void
(
*
func_t
)(
PtrStepSzb
src
,
PtrStepSzb
dst
,
int
cn
,
PtrStepSzb
mask
,
bool
multiChannelMask
,
cudaStream_t
stream
);
static
func_t
tab
[]
=
static
const
func_t
tab
[]
=
{
0
,
copyWithMask
<
u
nsigned
char
>
,
copyWithMask
<
u
nsigned
short
>
,
copyWithMask
<
uchar
>
,
copyWithMask
<
ushort
>
,
0
,
copyWithMask
<
int
>
,
0
,
...
...
@@ -88,81 +79,39 @@ namespace cv { namespace gpu { namespace cudev
copyWithMask
<
double
>
};
tab
[
elemSize1
](
src
,
dst
,
cn
,
mask
,
multiChannelMask
,
stream
);
const
func_t
func
=
tab
[
elemSize1
];
CV_DbgAssert
(
func
!=
0
);
func
(
src
,
dst
,
cn
,
mask
,
multiChannelMask
,
stream
);
}
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// SetTo //////////////////////////////////
///////////////////////////////////////////////////////////////////////////
// set
__constant__
uchar
scalar_8u
[
4
];
__constant__
schar
scalar_8s
[
4
];
__constant__
ushort
scalar_16u
[
4
];
__constant__
short
scalar_16s
[
4
];
__constant__
int
scalar_32s
[
4
];
__constant__
float
scalar_32f
[
4
];
__constant__
double
scalar_64f
[
4
];
template
<
typename
T
>
__device__
__forceinline__
T
readScalar
(
int
i
);
template
<
>
__device__
__forceinline__
uchar
readScalar
<
uchar
>
(
int
i
)
{
return
scalar_8u
[
i
];}
template
<
>
__device__
__forceinline__
schar
readScalar
<
schar
>
(
int
i
)
{
return
scalar_8s
[
i
];}
template
<
>
__device__
__forceinline__
ushort
readScalar
<
ushort
>
(
int
i
)
{
return
scalar_16u
[
i
];}
template
<
>
__device__
__forceinline__
short
readScalar
<
short
>
(
int
i
)
{
return
scalar_16s
[
i
];}
template
<
>
__device__
__forceinline__
int
readScalar
<
int
>
(
int
i
)
{
return
scalar_32s
[
i
];}
template
<
>
__device__
__forceinline__
float
readScalar
<
float
>
(
int
i
)
{
return
scalar_32f
[
i
];}
template
<
>
__device__
__forceinline__
double
readScalar
<
double
>
(
int
i
)
{
return
scalar_64f
[
i
];}
static
inline
void
writeScalar
(
const
uchar
*
vals
)
{
cudaSafeCall
(
cudaMemcpyToSymbol
(
scalar_8u
,
vals
,
sizeof
(
uchar
)
*
4
)
);
}
static
inline
void
writeScalar
(
const
schar
*
vals
)
{
cudaSafeCall
(
cudaMemcpyToSymbol
(
scalar_8s
,
vals
,
sizeof
(
schar
)
*
4
)
);
}
static
inline
void
writeScalar
(
const
ushort
*
vals
)
{
cudaSafeCall
(
cudaMemcpyToSymbol
(
scalar_16u
,
vals
,
sizeof
(
ushort
)
*
4
)
);
}
static
inline
void
writeScalar
(
const
short
*
vals
)
{
cudaSafeCall
(
cudaMemcpyToSymbol
(
scalar_16s
,
vals
,
sizeof
(
short
)
*
4
)
);
}
static
inline
void
writeScalar
(
const
int
*
vals
)
{
cudaSafeCall
(
cudaMemcpyToSymbol
(
scalar_32s
,
vals
,
sizeof
(
int
)
*
4
)
);
}
static
inline
void
writeScalar
(
const
float
*
vals
)
template
<
typename
T
,
class
Mask
>
__global__
void
set
(
PtrStepSz
<
T
>
mat
,
const
Mask
mask
,
const
int
channels
,
const
typename
TypeVec
<
T
,
4
>::
vec_type
value
)
{
cudaSafeCall
(
cudaMemcpyToSymbol
(
scalar_32f
,
vals
,
sizeof
(
float
)
*
4
)
);
}
static
inline
void
writeScalar
(
const
double
*
vals
)
{
cudaSafeCall
(
cudaMemcpyToSymbol
(
scalar_64f
,
vals
,
sizeof
(
double
)
*
4
)
);
}
const
int
x
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
const
int
y
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
template
<
typename
T
>
__global__
void
set
(
T
*
mat
,
int
cols
,
int
rows
,
size_t
step
,
int
channels
)
{
size_t
x
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
size_t
y
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
if
(
x
>=
mat
.
cols
*
channels
||
y
>=
mat
.
rows
)
return
;
if
((
x
<
cols
*
channels
)
&&
(
y
<
rows
))
{
size_t
idx
=
y
*
(
step
>>
shift_and_sizeof
<
T
>::
shift
)
+
x
;
mat
[
idx
]
=
readScalar
<
T
>
(
x
%
channels
);
}
const
T
scalar
[
4
]
=
{
value
.
x
,
value
.
y
,
value
.
z
,
value
.
w
};
if
(
mask
(
y
,
x
/
channels
))
mat
(
y
,
x
)
=
scalar
[
x
%
channels
];
}
template
<
typename
T
>
void
set
(
PtrStepSz
<
T
>
mat
,
const
T
*
scalar
,
int
channels
,
cudaStream_t
stream
)
{
writeScalar
(
scalar
)
;
typedef
typename
TypeVec
<
T
,
4
>::
vec_type
scalar_t
;
dim3
threadsPerBlock
(
32
,
8
,
1
);
dim3
numBlocks
(
mat
.
cols
*
channels
/
threadsPerBlock
.
x
+
1
,
mat
.
rows
/
threadsPerBlock
.
y
+
1
,
1
);
dim3
block
(
32
,
8
);
dim3
grid
(
divUp
(
mat
.
cols
*
channels
,
block
.
x
),
divUp
(
mat
.
rows
,
block
.
y
)
);
set
<
T
><<<
numBlocks
,
threadsPerBlock
,
0
,
stream
>>>
(
mat
.
data
,
mat
.
cols
,
mat
.
rows
,
mat
.
step
,
channels
);
set
<
T
><<<
grid
,
block
,
0
,
stream
>>>
(
mat
,
WithOutMask
(),
channels
,
VecTraits
<
scalar_t
>::
make
(
scalar
)
);
cudaSafeCall
(
cudaGetLastError
()
);
if
(
stream
==
0
)
...
...
@@ -177,29 +126,15 @@ namespace cv { namespace gpu { namespace cudev
template
void
set
<
float
>(
PtrStepSz
<
float
>
mat
,
const
float
*
scalar
,
int
channels
,
cudaStream_t
stream
);
template
void
set
<
double
>(
PtrStepSz
<
double
>
mat
,
const
double
*
scalar
,
int
channels
,
cudaStream_t
stream
);
template
<
typename
T
>
__global__
void
set
(
T
*
mat
,
const
uchar
*
mask
,
int
cols
,
int
rows
,
size_t
step
,
int
channels
,
size_t
step_mask
)
{
size_t
x
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
size_t
y
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
if
((
x
<
cols
*
channels
)
&&
(
y
<
rows
))
if
(
mask
[
y
*
step_mask
+
x
/
channels
]
!=
0
)
{
size_t
idx
=
y
*
(
step
>>
shift_and_sizeof
<
T
>::
shift
)
+
x
;
mat
[
idx
]
=
readScalar
<
T
>
(
x
%
channels
);
}
}
template
<
typename
T
>
void
set
(
PtrStepSz
<
T
>
mat
,
const
T
*
scalar
,
PtrStepSzb
mask
,
int
channels
,
cudaStream_t
stream
)
{
writeScalar
(
scalar
)
;
typedef
typename
TypeVec
<
T
,
4
>::
vec_type
scalar_t
;
dim3
threadsPerBlock
(
32
,
8
,
1
);
dim3
numBlocks
(
mat
.
cols
*
channels
/
threadsPerBlock
.
x
+
1
,
mat
.
rows
/
threadsPerBlock
.
y
+
1
,
1
);
dim3
block
(
32
,
8
);
dim3
grid
(
divUp
(
mat
.
cols
*
channels
,
block
.
x
),
divUp
(
mat
.
rows
,
block
.
y
)
);
set
<
T
><<<
numBlocks
,
threadsPerBlock
,
0
,
stream
>>>
(
mat
.
data
,
mask
.
data
,
mat
.
cols
,
mat
.
rows
,
mat
.
step
,
channels
,
mask
.
step
);
set
<
T
><<<
grid
,
block
,
0
,
stream
>>>
(
mat
,
SingleMask
(
mask
),
channels
,
VecTraits
<
scalar_t
>::
make
(
scalar
)
);
cudaSafeCall
(
cudaGetLastError
()
);
if
(
stream
==
0
)
...
...
@@ -215,8 +150,7 @@ namespace cv { namespace gpu { namespace cudev
template
void
set
<
double
>(
PtrStepSz
<
double
>
mat
,
const
double
*
scalar
,
PtrStepSzb
mask
,
int
channels
,
cudaStream_t
stream
);
///////////////////////////////////////////////////////////////////////////
//////////////////////////////// ConvertTo ////////////////////////////////
///////////////////////////////////////////////////////////////////////////
// convert
template
<
typename
T
,
typename
D
,
typename
S
>
struct
Convertor
:
unary_function
<
T
,
D
>
{
...
...
@@ -281,8 +215,6 @@ namespace cv { namespace gpu { namespace cudev
template
<
typename
T
,
typename
D
,
typename
S
>
void
cvt_
(
PtrStepSzb
src
,
PtrStepSzb
dst
,
double
alpha
,
double
beta
,
cudaStream_t
stream
)
{
cudaSafeCall
(
cudaSetDoubleForDevice
(
&
alpha
)
);
cudaSafeCall
(
cudaSetDoubleForDevice
(
&
beta
)
);
Convertor
<
T
,
D
,
S
>
op
(
static_cast
<
S
>
(
alpha
),
static_cast
<
S
>
(
beta
));
cv
::
gpu
::
cudev
::
transform
((
PtrStepSz
<
T
>
)
src
,
(
PtrStepSz
<
D
>
)
dst
,
op
,
WithOutMask
(),
stream
);
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录