Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
6c8b6bd0
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
6c8b6bd0
编写于
7月 17, 2014
作者:
A
Alexander Karsakov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Added packing to CCS format
上级
ed07241f
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
82 addition
and
101 deletion
+82
-101
modules/core/src/dxt.cpp
modules/core/src/dxt.cpp
+27
-90
modules/core/src/opencl/fft.cl
modules/core/src/opencl/fft.cl
+42
-6
modules/core/test/ocl/test_dft.cpp
modules/core/test/ocl/test_dft.cpp
+13
-5
未找到文件。
modules/core/src/dxt.cpp
浏览文件 @
6c8b6bd0
...
...
@@ -2083,20 +2083,19 @@ struct OCL_FftPlan
int
dft_size
;
int
flags
;
OCL_FftPlan
(
int
_size
,
int
_flags
)
:
dft_size
(
_size
),
flags
(
_flags
)
bool
status
;
OCL_FftPlan
(
int
_size
,
int
_flags
)
:
dft_size
(
_size
),
flags
(
_flags
)
,
status
(
true
)
{
int
min_radix
=
INT_MAX
;
std
::
vector
<
int
>
radixes
,
blocks
;
ocl_getRadixes
(
dft_size
,
radixes
,
blocks
,
min_radix
);
thread_count
=
(
dft_size
+
min_radix
-
1
)
/
min_radix
;
printf
(
"cols: %d - "
,
dft_size
);
for
(
int
i
=
0
;
i
<
radixes
.
size
();
i
++
)
if
(
thread_count
>
ocl
::
Device
::
getDefault
().
maxWorkGroupSize
())
{
printf
(
"%d "
,
radixes
[
i
]);
status
=
false
;
return
;
}
printf
(
"min radix - %d
\n
"
,
min_radix
);
// generate string with radix calls
String
radix_processing
;
...
...
@@ -2142,6 +2141,9 @@ struct OCL_FftPlan
bool
enqueueTransform
(
InputArray
_src
,
OutputArray
_dst
,
int
dft_size
,
int
flags
,
bool
rows
=
true
)
const
{
if
(
!
status
)
return
false
;
UMat
src
=
_src
.
getUMat
();
UMat
dst
=
_dst
.
getUMat
();
...
...
@@ -2162,11 +2164,14 @@ struct OCL_FftPlan
kernel_name
=
"fft_multi_radix_cols"
;
}
bool
is1d
=
(
flags
&
DFT_ROWS
)
!=
0
||
dft_size
==
1
;
String
options
=
buildOptions
;
if
(
src
.
channels
()
==
1
)
options
+=
" -D REAL_INPUT"
;
if
(
dst
.
channels
()
==
1
)
options
+=
" -D CCS_OUTPUT"
;
if
((
is1d
&&
src
.
channels
()
==
1
)
||
(
rows
&&
(
flags
&
DFT_REAL_OUTPUT
)))
options
+=
" -D NO_CONJUGATE"
;
ocl
::
Kernel
k
(
kernel_name
.
c_str
(),
ocl
::
core
::
fft_oclsrc
,
options
);
if
(
k
.
empty
())
...
...
@@ -2219,61 +2224,16 @@ protected:
std
::
vector
<
OCL_FftPlan
*>
planStorage
;
};
static
bool
ocl_packToCCS
(
InputArray
_src
,
OutputArray
_dst
,
int
flags
)
{
UMat
src
=
_src
.
getUMat
();
_dst
.
create
(
src
.
size
(),
CV_32F
);
UMat
dst
=
_dst
.
getUMat
();
src
=
src
.
reshape
(
1
);
if
((
flags
&
DFT_ROWS
)
==
0
&&
src
.
rows
>
1
)
{
// pack to CCS by rows
if
(
dst
.
cols
>
2
)
src
.
colRange
(
2
,
dst
.
cols
+
(
dst
.
cols
%
2
)).
copyTo
(
dst
.
colRange
(
1
,
dst
.
cols
-
1
+
(
dst
.
cols
%
2
)));
Mat
dst_mat
=
dst
.
getMat
(
ACCESS_WRITE
);
Mat
buffer_mat
=
src
.
getMat
(
ACCESS_READ
);
dst_mat
.
at
<
float
>
(
0
,
0
)
=
buffer_mat
.
at
<
float
>
(
0
,
0
);
dst_mat
.
at
<
float
>
(
dst_mat
.
rows
-
1
,
0
)
=
buffer_mat
.
at
<
float
>
(
src
.
rows
/
2
,
0
);
for
(
int
i
=
1
;
i
<
dst_mat
.
rows
-
1
;
i
+=
2
)
{
dst_mat
.
at
<
float
>
(
i
,
0
)
=
buffer_mat
.
at
<
float
>
((
i
+
1
)
/
2
,
0
);
dst_mat
.
at
<
float
>
(
i
+
1
,
0
)
=
buffer_mat
.
at
<
float
>
((
i
+
1
)
/
2
,
1
);
}
if
(
dst_mat
.
cols
%
2
==
0
)
{
dst_mat
.
at
<
float
>
(
0
,
dst_mat
.
cols
-
1
)
=
buffer_mat
.
at
<
float
>
(
0
,
src
.
cols
/
2
);
dst_mat
.
at
<
float
>
(
dst_mat
.
rows
-
1
,
dst_mat
.
cols
-
1
)
=
buffer_mat
.
at
<
float
>
(
src
.
rows
/
2
,
src
.
cols
/
2
);
for
(
int
i
=
1
;
i
<
dst_mat
.
rows
-
1
;
i
+=
2
)
{
dst_mat
.
at
<
float
>
(
i
,
dst_mat
.
cols
-
1
)
=
buffer_mat
.
at
<
float
>
((
i
+
1
)
/
2
,
src
.
cols
/
2
);
dst_mat
.
at
<
float
>
(
i
+
1
,
dst_mat
.
cols
-
1
)
=
buffer_mat
.
at
<
float
>
((
i
+
1
)
/
2
,
src
.
cols
/
2
+
1
);
}
}
}
else
{
// pack to CCS each row
src
.
colRange
(
0
,
1
).
copyTo
(
dst
.
colRange
(
0
,
1
));
src
.
colRange
(
2
,
(
dst
.
cols
+
1
)).
copyTo
(
dst
.
colRange
(
1
,
dst
.
cols
));
}
return
true
;
}
static
bool
ocl_dft_C2C_rows
(
InputArray
_src
,
OutputArray
_dst
,
int
nonzero_rows
,
int
flags
)
{
const
OCL_FftPlan
*
plan
=
OCL_FftPlanCache
::
getInstance
().
getFftPlan
(
_src
.
cols
(),
flags
);
return
plan
->
enqueueTransform
(
_src
,
_dst
,
nonzero_rows
,
flags
,
true
);
}
static
bool
ocl_dft_C2C_cols
(
InputArray
_src
,
OutputArray
_dst
,
int
flags
)
static
bool
ocl_dft_C2C_cols
(
InputArray
_src
,
OutputArray
_dst
,
int
nonzero_cols
,
int
flags
)
{
const
OCL_FftPlan
*
plan
=
OCL_FftPlanCache
::
getInstance
().
getFftPlan
(
_src
.
rows
(),
flags
);
return
plan
->
enqueueTransform
(
_src
,
_dst
,
_src
.
cols
()
,
flags
,
false
);
return
plan
->
enqueueTransform
(
_src
,
_dst
,
nonzero_cols
,
flags
,
false
);
}
static
bool
ocl_dft
(
InputArray
_src
,
OutputArray
_dst
,
int
flags
,
int
nonzero_rows
)
...
...
@@ -2315,6 +2275,8 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
// Forward Complex to CCS not supported
if
(
complex_input
&&
real_output
&&
!
inv
)
{
flags
^=
DFT_REAL_OUTPUT
;
flags
|=
DFT_COMPLEX_OUTPUT
;
real_output
=
0
;
complex_output
=
1
;
}
...
...
@@ -2344,23 +2306,21 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
if
(
complex_output
)
{
if
(
real_input
&&
is1d
&&
!
inv
)
output
.
create
(
src
.
size
(),
CV_32FC2
);
else
{
_dst
.
create
(
src
.
size
(),
CV_32FC2
);
output
=
_dst
.
getUMat
();
}
}
else
_dst
.
create
(
src
.
size
(),
CV_32FC2
);
output
=
_dst
.
getUMat
();
}
else
{
// CCS
if
(
is1d
)
{
_dst
.
create
(
src
.
size
(),
CV_32FC1
);
output
=
_dst
.
getUMat
();
}
else
{
_dst
.
create
(
src
.
size
(),
CV_32FC1
);
output
.
create
(
src
.
size
(),
CV_32FC2
);
}
}
if
(
!
ocl_dft_C2C_rows
(
input
,
output
,
nonzero_rows
,
flags
))
...
...
@@ -2368,32 +2328,13 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
if
(
!
is1d
)
{
if
(
!
ocl_dft_C2C_cols
(
output
,
output
,
flags
))
int
nonzero_cols
=
real_input
&&
real_output
?
output
.
cols
/
2
+
1
:
output
.
cols
;
if
(
!
ocl_dft_C2C_cols
(
output
,
_dst
,
nonzero_cols
,
flags
))
return
false
;
}
if
(
complex_output
)
}
else
{
if
(
real_input
&&
is1d
&&
!
inv
)
_dst
.
assign
(
output
.
colRange
(
0
,
output
.
cols
/
2
+
1
));
else
_dst
.
assign
(
output
);
_dst
.
assign
(
output
);
}
else
{
if
(
!
inv
)
{
if
(
!
is1d
)
ocl_packToCCS
(
output
,
_dst
,
flags
);
else
_dst
.
assign
(
output
);
}
else
{
// copy real part to dst
}
}
//printf("OCL!\n");
return
true
;
}
...
...
@@ -2435,7 +2376,6 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
int
elem_size
=
(
int
)
src
.
elemSize1
(),
complex_elem_size
=
elem_size
*
2
;
int
factors
[
34
];
bool
inplace_transform
=
false
;
bool
is1d
=
(
flags
&
DFT_ROWS
)
!=
0
||
src
.
rows
==
1
;
#ifdef USE_IPP_DFT
AutoBuffer
<
uchar
>
ippbuf
;
int
ipp_norm_flag
=
!
(
flags
&
DFT_SCALE
)
?
8
:
inv
?
2
:
1
;
...
...
@@ -2444,10 +2384,7 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
CV_Assert
(
type
==
CV_32FC1
||
type
==
CV_32FC2
||
type
==
CV_64FC1
||
type
==
CV_64FC2
);
if
(
!
inv
&&
src
.
channels
()
==
1
&&
(
flags
&
DFT_COMPLEX_OUTPUT
)
)
if
(
!
is1d
)
_dst
.
create
(
src
.
size
(),
CV_MAKETYPE
(
depth
,
2
)
);
else
_dst
.
create
(
Size
(
src
.
cols
/
2
+
1
,
src
.
rows
),
CV_MAKETYPE
(
depth
,
2
)
);
_dst
.
create
(
src
.
size
(),
CV_MAKETYPE
(
depth
,
2
)
);
else
if
(
inv
&&
src
.
channels
()
==
2
&&
(
flags
&
DFT_REAL_OUTPUT
)
)
_dst
.
create
(
src
.
size
(),
depth
);
else
...
...
modules/core/src/opencl/fft.cl
浏览文件 @
6c8b6bd0
...
...
@@ -331,10 +331,17 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
RADIX_PROCESS
;
#
ifndef
CCS_OUTPUT
__global
float2*
dst
=
(
__global
float2*
)(
dst_ptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
dst_offset
)))
;
#
ifdef
NO_CONJUGATE
//
copy
result
without
complex
conjugate
const
int
cols
=
dst_cols/2
+
1
;
#
else
const
int
cols
=
dst_cols
;
#
endif
__global
float2*
dst
=
(
__global
float2*
)(
dst_ptr
+
mad24
(
y,
dst_step,
dst_offset
))
;
#
pragma
unroll
for
(
int
i=
0
; i<kercn; i++
)
dst[i
*block_size]
=
smem[x
+
i*block_size
]
;
for
(
int
i=
x
; i<cols; i+=block_size
)
dst[i
]
=
smem[i
]
;
#
else
//
pack
row
to
CCS
__local
float*
smem_1cn
=
(
__local
float*
)
smem
;
...
...
@@ -358,7 +365,6 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
{
__local
float2
smem[LOCAL_SIZE]
;
__global
const
uchar*
src
=
src_ptr
+
mad24
(
y,
src_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
src_offset
))
;
__global
uchar*
dst
=
dst_ptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
dst_offset
))
;
__constant
const
float2*
twiddles
=
(
__constant
float2*
)
twiddles_ptr
;
const
int
ind
=
y
;
const
int
block_size
=
LOCAL_SIZE/kercn
;
...
...
@@ -370,9 +376,39 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
RADIX_PROCESS
;
//
copy
data
to
dst
#
ifndef
CCS_OUTPUT
__global
uchar*
dst
=
dst_ptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
dst_offset
))
;
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
*
((
__global
float2*
)(
dst
+
i*block_size*src_step
))
=
smem[y
+
i*block_size]
;
*
((
__global
float2*
)(
dst
+
i*block_size*dst_step
))
=
smem[y
+
i*block_size]
;
#
else
if
(
x
==
0
)
{
//
pack
first
column
to
CCS
__local
float*
smem_1cn
=
(
__local
float*
)
smem
;
__global
uchar*
dst
=
dst_ptr
+
mad24
(
y+1,
dst_step,
dst_offset
)
;
for
(
int
i=y
; i<dst_rows-1; i+=block_size, dst+=dst_step*block_size)
*
((
__global
float*
)
dst
)
=
smem_1cn[i+2]
;
if
(
y
==
0
)
*
((
__global
float*
)
(
dst_ptr
+
dst_offset
))
=
smem_1cn[0]
;
}
else
if
(
x
==
(
dst_cols+1
)
/2
)
{
//
pack
last
column
to
CCS
(
if
needed
)
__local
float*
smem_1cn
=
(
__local
float*
)
smem
;
__global
uchar*
dst
=
dst_ptr
+
mad24
(
dst_cols-1,
(
int
)
sizeof
(
float
)
,
mad24
(
y+1,
dst_step,
dst_offset
))
;
for
(
int
i=y
; i<dst_rows-1; i+=block_size, dst+=dst_step*block_size)
*
((
__global
float*
)
dst
)
=
smem_1cn[i+2]
;
if
(
y
==
0
)
*
((
__global
float*
)
(
dst_ptr
+
mad24
(
dst_cols-1,
(
int
)
sizeof
(
float
)
,
dst_offset
)))
=
smem_1cn[0]
;
}
else
{
__global
uchar*
dst
=
dst_ptr
+
mad24
(
x,
(
int
)
sizeof
(
float
)
*2,
mad24
(
y,
dst_step,
dst_offset
-
(
int
)
sizeof
(
float
)))
;
#
pragma
unroll
for
(
int
i=y
; i<dst_rows; i+=block_size, dst+=block_size*dst_step)
vstore2
(
smem[i],
0
,
(
__global
float*
)
dst
)
;
}
#
endif
}
}
\ No newline at end of file
modules/core/test/ocl/test_dft.cpp
浏览文件 @
6c8b6bd0
...
...
@@ -67,6 +67,7 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool)
cv
::
Size
dft_size
;
int
dft_flags
,
depth
,
cn
,
dft_type
;
bool
inplace
;
bool
is1d
;
TEST_DECLARE_INPUT_PARAMETER
(
src
);
TEST_DECLARE_OUTPUT_PARAMETER
(
dst
);
...
...
@@ -96,6 +97,7 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool)
inplace
=
GET_PARAM
(
3
);
if
(
inplace
&&
dft_type
==
0
)
inplace
=
0
;
is1d
=
(
dft_flags
&
DFT_ROWS
)
!=
0
||
dft_size
.
height
==
1
;
}
void
generateTestData
()
...
...
@@ -114,17 +116,23 @@ OCL_TEST_P(Dft, Mat)
OCL_OFF
(
cv
::
dft
(
src
,
dst
,
dft_flags
));
OCL_ON
(
cv
::
dft
(
usrc
,
udst
,
dft_flags
));
if
(
dft_type
==
R2C
&&
is1d
)
{
dst
=
dst
(
cv
::
Range
(
0
,
dst
.
rows
),
cv
::
Range
(
0
,
dst
.
cols
/
2
+
1
));
udst
=
udst
(
cv
::
Range
(
0
,
udst
.
rows
),
cv
::
Range
(
0
,
udst
.
cols
/
2
+
1
));
}
//
Mat gpu = udst.getMat(ACCESS_READ);
Mat
gpu
=
udst
.
getMat
(
ACCESS_READ
);
//std::cout << src << std::endl;
//std::cout << dst << std::endl;
//std::cout << gpu << std::endl;
//int cn = udst.channels();
//
//Mat df;
//absdiff(dst, gpu, df);
//std::cout <<
df
<< std::endl;
//std::cout <<
Mat_<int>(df)
<< std::endl;
double
eps
=
src
.
size
().
area
()
*
1e-4
;
EXPECT_MAT_NEAR
(
dst
,
udst
,
eps
);
...
...
@@ -181,9 +189,9 @@ OCL_TEST_P(MulSpectrums, Mat)
OCL_INSTANTIATE_TEST_CASE_P
(
OCL_ImgProc
,
MulSpectrums
,
testing
::
Combine
(
Bool
(),
Bool
()));
OCL_INSTANTIATE_TEST_CASE_P
(
Core
,
Dft
,
Combine
(
Values
(
cv
::
Size
(
6
,
1
),
cv
::
Size
(
5
,
8
),
cv
::
Size
(
30
,
20
),
OCL_INSTANTIATE_TEST_CASE_P
(
Core
,
Dft
,
Combine
(
Values
(
cv
::
Size
(
6
,
4
),
cv
::
Size
(
5
,
8
),
cv
::
Size
(
6
,
6
),
cv
::
Size
(
512
,
1
),
cv
::
Size
(
1280
,
768
)),
Values
((
OCL_FFT_TYPE
)
R2C
,
(
OCL_FFT_TYPE
)
C2C
,
(
OCL_FFT_TYPE
)
R2R
/*, (OCL_FFT_TYPE) C2R*/
),
Values
((
OCL_FFT_TYPE
)
R2C
,
(
OCL_FFT_TYPE
)
C2C
,
(
OCL_FFT_TYPE
)
R2R
,
(
OCL_FFT_TYPE
)
C2R
),
Bool
(),
// DFT_ROWS
Bool
()
// inplace
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录