Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
962b5197
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
962b5197
编写于
7月 28, 2014
作者:
V
Vadim Pisarevsky
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2996 from akarsakov:ocl_dft_new_concept
上级
0c749fd7
fecfaf40
变更
3
展开全部
隐藏空白更改
内联
并排
Showing
3 changed file
with
1244 addition
and
17 deletion
+1244
-17
modules/core/include/opencv2/core/cvdef.h
modules/core/include/opencv2/core/cvdef.h
+1
-0
modules/core/src/dxt.cpp
modules/core/src/dxt.cpp
+379
-17
modules/core/src/opencl/fft.cl
modules/core/src/opencl/fft.cl
+864
-0
未找到文件。
modules/core/include/opencv2/core/cvdef.h
浏览文件 @
962b5197
...
...
@@ -244,6 +244,7 @@ typedef signed char schar;
/* fundamental constants */
#define CV_PI 3.1415926535897932384626433832795
#define CV_2PI 6.283185307179586476925286766559
#define CV_LOG2 0.69314718055994530941723212145818
/****************************************************************************************\
...
...
modules/core/src/dxt.cpp
浏览文件 @
962b5197
...
...
@@ -43,6 +43,7 @@
#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
#include "opencv2/core/opencl/runtime/opencl_core.hpp"
#include "opencl_kernels.hpp"
#include <map>
namespace
cv
{
...
...
@@ -1781,6 +1782,375 @@ static bool ippi_DFT_R_32F(const Mat& src, Mat& dst, bool inv, int norm_flag)
#endif
}
#ifdef HAVE_OPENCL
namespace
cv
{
enum
FftType
{
R2R
=
0
,
// real to CCS in case forward transform, CCS to real otherwise
C2R
=
1
,
// complex to real in case inverse transform
R2C
=
2
,
// real to complex in case forward transform
C2C
=
3
// complex to complex
};
struct
OCL_FftPlan
{
private:
UMat
twiddles
;
String
buildOptions
;
int
thread_count
;
bool
status
;
int
dft_size
;
public:
OCL_FftPlan
(
int
_size
)
:
dft_size
(
_size
),
status
(
true
)
{
int
min_radix
;
std
::
vector
<
int
>
radixes
,
blocks
;
ocl_getRadixes
(
dft_size
,
radixes
,
blocks
,
min_radix
);
thread_count
=
dft_size
/
min_radix
;
if
(
thread_count
>
(
int
)
ocl
::
Device
::
getDefault
().
maxWorkGroupSize
())
{
status
=
false
;
return
;
}
// generate string with radix calls
String
radix_processing
;
int
n
=
1
,
twiddle_size
=
0
;
for
(
size_t
i
=
0
;
i
<
radixes
.
size
();
i
++
)
{
int
radix
=
radixes
[
i
],
block
=
blocks
[
i
];
if
(
block
>
1
)
radix_processing
+=
format
(
"fft_radix%d_B%d(smem,twiddles+%d,ind,%d,%d);"
,
radix
,
block
,
twiddle_size
,
n
,
dft_size
/
radix
);
else
radix_processing
+=
format
(
"fft_radix%d(smem,twiddles+%d,ind,%d,%d);"
,
radix
,
twiddle_size
,
n
,
dft_size
/
radix
);
twiddle_size
+=
(
radix
-
1
)
*
n
;
n
*=
radix
;
}
Mat
tw
(
1
,
twiddle_size
,
CV_32FC2
);
float
*
ptr
=
tw
.
ptr
<
float
>
();
int
ptr_index
=
0
;
n
=
1
;
for
(
size_t
i
=
0
;
i
<
radixes
.
size
();
i
++
)
{
int
radix
=
radixes
[
i
];
n
*=
radix
;
for
(
int
j
=
1
;
j
<
radix
;
j
++
)
{
double
theta
=
-
CV_2PI
*
j
/
n
;
for
(
int
k
=
0
;
k
<
(
n
/
radix
);
k
++
)
{
ptr
[
ptr_index
++
]
=
(
float
)
cos
(
k
*
theta
);
ptr
[
ptr_index
++
]
=
(
float
)
sin
(
k
*
theta
);
}
}
}
twiddles
=
tw
.
getUMat
(
ACCESS_READ
);
buildOptions
=
format
(
"-D LOCAL_SIZE=%d -D kercn=%d -D RADIX_PROCESS=%s"
,
dft_size
,
min_radix
,
radix_processing
.
c_str
());
}
bool
enqueueTransform
(
InputArray
_src
,
OutputArray
_dst
,
int
num_dfts
,
int
flags
,
int
fftType
,
bool
rows
=
true
)
const
{
if
(
!
status
)
return
false
;
UMat
src
=
_src
.
getUMat
();
UMat
dst
=
_dst
.
getUMat
();
size_t
globalsize
[
2
];
size_t
localsize
[
2
];
String
kernel_name
;
bool
is1d
=
(
flags
&
DFT_ROWS
)
!=
0
||
num_dfts
==
1
;
bool
inv
=
(
flags
&
DFT_INVERSE
)
!=
0
;
String
options
=
buildOptions
;
if
(
rows
)
{
globalsize
[
0
]
=
thread_count
;
globalsize
[
1
]
=
src
.
rows
;
localsize
[
0
]
=
thread_count
;
localsize
[
1
]
=
1
;
kernel_name
=
!
inv
?
"fft_multi_radix_rows"
:
"ifft_multi_radix_rows"
;
if
((
is1d
||
inv
)
&&
(
flags
&
DFT_SCALE
))
options
+=
" -D DFT_SCALE"
;
}
else
{
globalsize
[
0
]
=
num_dfts
;
globalsize
[
1
]
=
thread_count
;
localsize
[
0
]
=
1
;
localsize
[
1
]
=
thread_count
;
kernel_name
=
!
inv
?
"fft_multi_radix_cols"
:
"ifft_multi_radix_cols"
;
if
(
flags
&
DFT_SCALE
)
options
+=
" -D DFT_SCALE"
;
}
options
+=
src
.
channels
()
==
1
?
" -D REAL_INPUT"
:
" -D COMPLEX_INPUT"
;
options
+=
dst
.
channels
()
==
1
?
" -D REAL_OUTPUT"
:
" -D COMPLEX_OUTPUT"
;
options
+=
is1d
?
" -D IS_1D"
:
""
;
if
(
!
inv
)
{
if
((
is1d
&&
src
.
channels
()
==
1
)
||
(
rows
&&
(
fftType
==
R2R
)))
options
+=
" -D NO_CONJUGATE"
;
}
else
{
if
(
rows
&&
(
fftType
==
C2R
||
fftType
==
R2R
))
options
+=
" -D NO_CONJUGATE"
;
if
(
dst
.
cols
%
2
==
0
)
options
+=
" -D EVEN"
;
}
ocl
::
Kernel
k
(
kernel_name
.
c_str
(),
ocl
::
core
::
fft_oclsrc
,
options
);
if
(
k
.
empty
())
return
false
;
k
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
),
ocl
::
KernelArg
::
PtrReadOnly
(
twiddles
),
thread_count
,
num_dfts
);
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
}
private:
static
void
ocl_getRadixes
(
int
cols
,
std
::
vector
<
int
>&
radixes
,
std
::
vector
<
int
>&
blocks
,
int
&
min_radix
)
{
int
factors
[
34
];
int
nf
=
DFTFactorize
(
cols
,
factors
);
int
n
=
1
;
int
factor_index
=
0
;
min_radix
=
INT_MAX
;
// 2^n transforms
if
((
factors
[
factor_index
]
&
1
)
==
0
)
{
for
(
;
n
<
factors
[
factor_index
];)
{
int
radix
=
2
,
block
=
1
;
if
(
8
*
n
<=
factors
[
0
])
radix
=
8
;
else
if
(
4
*
n
<=
factors
[
0
])
{
radix
=
4
;
if
(
cols
%
12
==
0
)
block
=
3
;
else
if
(
cols
%
8
==
0
)
block
=
2
;
}
else
{
if
(
cols
%
10
==
0
)
block
=
5
;
else
if
(
cols
%
8
==
0
)
block
=
4
;
else
if
(
cols
%
6
==
0
)
block
=
3
;
else
if
(
cols
%
4
==
0
)
block
=
2
;
}
radixes
.
push_back
(
radix
);
blocks
.
push_back
(
block
);
min_radix
=
min
(
min_radix
,
block
*
radix
);
n
*=
radix
;
}
factor_index
++
;
}
// all the other transforms
for
(
;
factor_index
<
nf
;
factor_index
++
)
{
int
radix
=
factors
[
factor_index
],
block
=
1
;
if
(
radix
==
3
)
{
if
(
cols
%
12
==
0
)
block
=
4
;
else
if
(
cols
%
9
==
0
)
block
=
3
;
else
if
(
cols
%
6
==
0
)
block
=
2
;
}
else
if
(
radix
==
5
)
{
if
(
cols
%
10
==
0
)
block
=
2
;
}
radixes
.
push_back
(
radix
);
blocks
.
push_back
(
block
);
min_radix
=
min
(
min_radix
,
block
*
radix
);
}
}
};
class
OCL_FftPlanCache
{
public:
static
OCL_FftPlanCache
&
getInstance
()
{
static
OCL_FftPlanCache
planCache
;
return
planCache
;
}
Ptr
<
OCL_FftPlan
>
getFftPlan
(
int
dft_size
)
{
std
::
map
<
int
,
Ptr
<
OCL_FftPlan
>
>::
iterator
f
=
planStorage
.
find
(
dft_size
);
if
(
f
!=
planStorage
.
end
())
{
return
f
->
second
;
}
else
{
Ptr
<
OCL_FftPlan
>
newPlan
=
Ptr
<
OCL_FftPlan
>
(
new
OCL_FftPlan
(
dft_size
));
planStorage
[
dft_size
]
=
newPlan
;
return
newPlan
;
}
}
~
OCL_FftPlanCache
()
{
planStorage
.
clear
();
}
protected:
OCL_FftPlanCache
()
:
planStorage
()
{
}
std
::
map
<
int
,
Ptr
<
OCL_FftPlan
>
>
planStorage
;
};
static
bool
ocl_dft_rows
(
InputArray
_src
,
OutputArray
_dst
,
int
nonzero_rows
,
int
flags
,
int
fftType
)
{
Ptr
<
OCL_FftPlan
>
plan
=
OCL_FftPlanCache
::
getInstance
().
getFftPlan
(
_src
.
cols
());
return
plan
->
enqueueTransform
(
_src
,
_dst
,
nonzero_rows
,
flags
,
fftType
,
true
);
}
static
bool
ocl_dft_cols
(
InputArray
_src
,
OutputArray
_dst
,
int
nonzero_cols
,
int
flags
,
int
fftType
)
{
Ptr
<
OCL_FftPlan
>
plan
=
OCL_FftPlanCache
::
getInstance
().
getFftPlan
(
_src
.
rows
());
return
plan
->
enqueueTransform
(
_src
,
_dst
,
nonzero_cols
,
flags
,
fftType
,
false
);
}
static
bool
ocl_dft
(
InputArray
_src
,
OutputArray
_dst
,
int
flags
,
int
nonzero_rows
)
{
int
type
=
_src
.
type
(),
cn
=
CV_MAT_CN
(
type
);
Size
ssize
=
_src
.
size
();
if
(
!
(
type
==
CV_32FC1
||
type
==
CV_32FC2
)
)
return
false
;
// if is not a multiplication of prime numbers { 2, 3, 5 }
if
(
ssize
.
area
()
!=
getOptimalDFTSize
(
ssize
.
area
()))
return
false
;
UMat
src
=
_src
.
getUMat
();
int
complex_input
=
cn
==
2
?
1
:
0
;
int
complex_output
=
(
flags
&
DFT_COMPLEX_OUTPUT
)
!=
0
;
int
real_input
=
cn
==
1
?
1
:
0
;
int
real_output
=
(
flags
&
DFT_REAL_OUTPUT
)
!=
0
;
bool
inv
=
(
flags
&
DFT_INVERSE
)
!=
0
?
1
:
0
;
if
(
nonzero_rows
<=
0
||
nonzero_rows
>
_src
.
rows
()
)
nonzero_rows
=
_src
.
rows
();
bool
is1d
=
(
flags
&
DFT_ROWS
)
!=
0
||
nonzero_rows
==
1
;
// if output format is not specified
if
(
complex_output
+
real_output
==
0
)
{
if
(
real_input
)
real_output
=
1
;
else
complex_output
=
1
;
}
FftType
fftType
=
(
FftType
)(
complex_input
<<
0
|
complex_output
<<
1
);
// Forward Complex to CCS not supported
if
(
fftType
==
C2R
&&
!
inv
)
fftType
=
C2C
;
// Inverse CCS to Complex not supported
if
(
fftType
==
R2C
&&
inv
)
fftType
=
R2R
;
UMat
output
;
if
(
fftType
==
C2C
||
fftType
==
R2C
)
{
// complex output
_dst
.
create
(
src
.
size
(),
CV_32FC2
);
output
=
_dst
.
getUMat
();
}
else
{
// real output
if
(
is1d
)
{
_dst
.
create
(
src
.
size
(),
CV_32FC1
);
output
=
_dst
.
getUMat
();
}
else
{
_dst
.
create
(
src
.
size
(),
CV_32FC1
);
output
.
create
(
src
.
size
(),
CV_32FC2
);
}
}
if
(
!
inv
)
{
if
(
!
ocl_dft_rows
(
src
,
output
,
nonzero_rows
,
flags
,
fftType
))
return
false
;
if
(
!
is1d
)
{
int
nonzero_cols
=
fftType
==
R2R
?
output
.
cols
/
2
+
1
:
output
.
cols
;
if
(
!
ocl_dft_cols
(
output
,
_dst
,
nonzero_cols
,
flags
,
fftType
))
return
false
;
}
}
else
{
if
(
fftType
==
C2C
)
{
// complex output
if
(
!
ocl_dft_rows
(
src
,
output
,
nonzero_rows
,
flags
,
fftType
))
return
false
;
if
(
!
is1d
)
{
if
(
!
ocl_dft_cols
(
output
,
output
,
output
.
cols
,
flags
,
fftType
))
return
false
;
}
}
else
{
if
(
is1d
)
{
if
(
!
ocl_dft_rows
(
src
,
output
,
nonzero_rows
,
flags
,
fftType
))
return
false
;
}
else
{
int
nonzero_cols
=
src
.
cols
/
2
+
1
;
if
(
!
ocl_dft_cols
(
src
,
output
,
nonzero_cols
,
flags
,
fftType
))
return
false
;
if
(
!
ocl_dft_rows
(
output
,
_dst
,
nonzero_rows
,
flags
,
fftType
))
return
false
;
}
}
}
return
true
;
}
}
// namespace cv;
#endif
#ifdef HAVE_CLAMDFFT
namespace
cv
{
...
...
@@ -1791,14 +2161,6 @@ namespace cv {
CV_Assert(s == CLFFT_SUCCESS); \
}
enum
FftType
{
R2R
=
0
,
// real to real
C2R
=
1
,
// opencl HERMITIAN_INTERLEAVED to real
R2C
=
2
,
// real to opencl HERMITIAN_INTERLEAVED
C2C
=
3
// complex to complex
};
class
PlanCache
{
struct
FftPlan
...
...
@@ -1923,7 +2285,7 @@ public:
}
// no baked plan is found, so let's create a new one
FftPlan
*
newPlan
=
new
FftPlan
(
dft_size
,
src_step
,
dst_step
,
doubleFP
,
inplace
,
flags
,
fftType
);
Ptr
<
FftPlan
>
newPlan
=
Ptr
<
FftPlan
>
(
new
FftPlan
(
dft_size
,
src_step
,
dst_step
,
doubleFP
,
inplace
,
flags
,
fftType
)
);
planStorage
.
push_back
(
newPlan
);
return
newPlan
->
plHandle
;
...
...
@@ -1931,8 +2293,6 @@ public:
~
PlanCache
()
{
for
(
std
::
vector
<
FftPlan
*>::
iterator
i
=
planStorage
.
begin
(),
end
=
planStorage
.
end
();
i
!=
end
;
++
i
)
delete
(
*
i
);
planStorage
.
clear
();
}
...
...
@@ -1942,7 +2302,7 @@ protected:
{
}
std
::
vector
<
FftPlan
*
>
planStorage
;
std
::
vector
<
Ptr
<
FftPlan
>
>
planStorage
;
};
extern
"C"
{
...
...
@@ -1960,7 +2320,7 @@ static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
}
static
bool
ocl_dft
(
InputArray
_src
,
OutputArray
_dst
,
int
flags
)
static
bool
ocl_dft
_amdfft
(
InputArray
_src
,
OutputArray
_dst
,
int
flags
)
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
Size
ssize
=
_src
.
size
();
...
...
@@ -2019,7 +2379,6 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags)
tmpBuffer
.
addref
();
clSetEventCallback
(
e
,
CL_COMPLETE
,
oclCleanupCallback
,
tmpBuffer
.
u
);
return
true
;
}
...
...
@@ -2034,7 +2393,12 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
#ifdef HAVE_CLAMDFFT
CV_OCL_RUN
(
ocl
::
haveAmdFft
()
&&
ocl
::
Device
::
getDefault
().
type
()
!=
ocl
::
Device
::
TYPE_CPU
&&
_dst
.
isUMat
()
&&
_src0
.
dims
()
<=
2
&&
nonzero_rows
==
0
,
ocl_dft
(
_src0
,
_dst
,
flags
))
ocl_dft_amdfft
(
_src0
,
_dst
,
flags
))
#endif
#ifdef HAVE_OPENCL
CV_OCL_RUN
(
_dst
.
isUMat
()
&&
_src0
.
dims
()
<=
2
,
ocl_dft
(
_src0
,
_dst
,
flags
,
nonzero_rows
))
#endif
static
DFTFunc
dft_tbl
[
6
]
=
...
...
@@ -2046,10 +2410,8 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
(
DFTFunc
)
RealDFT_64f
,
(
DFTFunc
)
CCSIDFT_64f
};
AutoBuffer
<
uchar
>
buf
;
void
*
spec
=
0
;
Mat
src0
=
_src0
.
getMat
(),
src
=
src0
;
int
prev_len
=
0
,
stage
=
0
;
bool
inv
=
(
flags
&
DFT_INVERSE
)
!=
0
;
...
...
modules/core/src/opencl/fft.cl
0 → 100644
浏览文件 @
962b5197
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录