Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
b17bf031
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
b17bf031
编写于
7月 17, 2014
作者:
A
Alexander Karsakov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Added DFT_SCALE for forward transforms
上级
6c8b6bd0
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
49 addition
and
86 deletion
+49
-86
modules/core/src/dxt.cpp
modules/core/src/dxt.cpp
+10
-3
modules/core/src/opencl/fft.cl
modules/core/src/opencl/fft.cl
+22
-9
modules/core/test/ocl/test_dft.cpp
modules/core/test/ocl/test_dft.cpp
+11
-11
samples/cpp/dft.cpp
samples/cpp/dft.cpp
+6
-63
未找到文件。
modules/core/src/dxt.cpp
浏览文件 @
b17bf031
...
...
@@ -2151,27 +2151,34 @@ struct OCL_FftPlan
size_t
localsize
[
2
];
String
kernel_name
;
bool
is1d
=
(
flags
&
DFT_ROWS
)
!=
0
||
dft_size
==
1
;
String
options
=
buildOptions
;
if
(
rows
)
{
globalsize
[
0
]
=
thread_count
;
globalsize
[
1
]
=
dft_size
;
localsize
[
0
]
=
thread_count
;
localsize
[
1
]
=
1
;
kernel_name
=
"fft_multi_radix_rows"
;
if
(
is1d
&&
(
flags
&
DFT_SCALE
))
options
+=
" -D DFT_SCALE"
;
}
else
{
globalsize
[
0
]
=
dft_size
;
globalsize
[
1
]
=
thread_count
;
localsize
[
0
]
=
1
;
localsize
[
1
]
=
thread_count
;
kernel_name
=
"fft_multi_radix_cols"
;
if
(
flags
&
DFT_SCALE
)
options
+=
" -D DFT_SCALE"
;
}
bool
is1d
=
(
flags
&
DFT_ROWS
)
!=
0
||
dft_size
==
1
;
String
options
=
buildOptions
;
if
(
src
.
channels
()
==
1
)
options
+=
" -D REAL_INPUT"
;
if
(
dst
.
channels
()
==
1
)
options
+=
" -D CCS_OUTPUT"
;
if
((
is1d
&&
src
.
channels
()
==
1
)
||
(
rows
&&
(
flags
&
DFT_REAL_OUTPUT
)))
options
+=
" -D NO_CONJUGATE"
;
if
(
is1d
)
options
+=
" -D IS_1D"
;
ocl
::
Kernel
k
(
kernel_name
.
c_str
(),
ocl
::
core
::
fft_oclsrc
,
options
);
if
(
k
.
empty
())
...
...
modules/core/src/opencl/fft.cl
浏览文件 @
b17bf031
...
...
@@ -301,6 +301,12 @@ void fft_radix5(__local float2* smem, __constant const float2* twiddles, const i
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
#
ifdef
DFT_SCALE
#
define
VAL
(
x,
scale
)
x*scale
#
else
#
define
VAL
(
x,
scale
)
x
#
endif
__kernel
void
fft_multi_radix_rows
(
__global
const
uchar*
src_ptr,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__global
uchar*
dst_ptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__constant
float2
*
twiddles_ptr,
const
int
t
,
const
int
nz
)
...
...
@@ -314,6 +320,11 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
__constant
const
float2*
twiddles
=
(
__constant
float2*
)
twiddles_ptr
;
const
int
ind
=
x
;
const
int
block_size
=
LOCAL_SIZE/kercn
;
#
ifdef
IS_1D
float
scale
=
1.f/dst_cols
;
#
else
float
scale
=
1.f/
(
dst_cols*dst_rows
)
;
#
endif
#
ifndef
REAL_INPUT
__global
const
float2*
src
=
(
__global
const
float2*
)(
src_ptr
+
mad24
(
y,
src_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
src_offset
)))
;
...
...
@@ -341,15 +352,15 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
__global
float2*
dst
=
(
__global
float2*
)(
dst_ptr
+
mad24
(
y,
dst_step,
dst_offset
))
;
#
pragma
unroll
for
(
int
i=x
; i<cols; i+=block_size)
dst[i]
=
smem[i]
;
dst[i]
=
VAL
(
smem[i],
scale
)
;
#
else
//
pack
row
to
CCS
__local
float*
smem_1cn
=
(
__local
float*
)
smem
;
__global
float*
dst
=
(
__global
float*
)(
dst_ptr
+
mad24
(
y,
dst_step,
dst_offset
))
;
for
(
int
i=x
; i<dst_cols-1; i+=block_size)
dst[i+1]
=
smem_1cn[i+2]
;
dst[i+1]
=
VAL
(
smem_1cn[i+2],
scale
)
;
if
(
x
==
0
)
dst[0]
=
smem_1cn[0]
;
dst[0]
=
VAL
(
smem_1cn[0],
scale
)
;
#
endif
}
}
...
...
@@ -368,6 +379,8 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
__constant
const
float2*
twiddles
=
(
__constant
float2*
)
twiddles_ptr
;
const
int
ind
=
y
;
const
int
block_size
=
LOCAL_SIZE/kercn
;
float
scale
=
1.f/
(
dst_rows*dst_cols
)
;
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
smem[y+i*block_size]
=
*
((
__global
const
float2*
)(
src
+
i*block_size*src_step
))
;
...
...
@@ -380,7 +393,7 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
__global
uchar*
dst
=
dst_ptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
dst_offset
))
;
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
*
((
__global
float2*
)(
dst
+
i*block_size*dst_step
))
=
smem[y
+
i*block_size]
;
*
((
__global
float2*
)(
dst
+
i*block_size*dst_step
))
=
VAL
(
smem[y
+
i*block_size],
scale
)
;
#
else
if
(
x
==
0
)
{
...
...
@@ -388,9 +401,9 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
__local
float*
smem_1cn
=
(
__local
float*
)
smem
;
__global
uchar*
dst
=
dst_ptr
+
mad24
(
y+1,
dst_step,
dst_offset
)
;
for
(
int
i=y
; i<dst_rows-1; i+=block_size, dst+=dst_step*block_size)
*
((
__global
float*
)
dst
)
=
smem_1cn[i+2]
;
*
((
__global
float*
)
dst
)
=
VAL
(
smem_1cn[i+2],
scale
)
;
if
(
y
==
0
)
*
((
__global
float*
)
(
dst_ptr
+
dst_offset
))
=
smem_1cn[0]
;
*
((
__global
float*
)
(
dst_ptr
+
dst_offset
))
=
VAL
(
smem_1cn[0],
scale
)
;
}
else
if
(
x
==
(
dst_cols+1
)
/2
)
{
...
...
@@ -398,16 +411,16 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
__local
float*
smem_1cn
=
(
__local
float*
)
smem
;
__global
uchar*
dst
=
dst_ptr
+
mad24
(
dst_cols-1,
(
int
)
sizeof
(
float
)
,
mad24
(
y+1,
dst_step,
dst_offset
))
;
for
(
int
i=y
; i<dst_rows-1; i+=block_size, dst+=dst_step*block_size)
*
((
__global
float*
)
dst
)
=
smem_1cn[i+2]
;
*
((
__global
float*
)
dst
)
=
VAL
(
smem_1cn[i+2],
scale
)
;
if
(
y
==
0
)
*
((
__global
float*
)
(
dst_ptr
+
mad24
(
dst_cols-1,
(
int
)
sizeof
(
float
)
,
dst_offset
)))
=
smem_1cn[0]
;
*
((
__global
float*
)
(
dst_ptr
+
mad24
(
dst_cols-1,
(
int
)
sizeof
(
float
)
,
dst_offset
)))
=
VAL
(
smem_1cn[0],
scale
)
;
}
else
{
__global
uchar*
dst
=
dst_ptr
+
mad24
(
x,
(
int
)
sizeof
(
float
)
*2,
mad24
(
y,
dst_step,
dst_offset
-
(
int
)
sizeof
(
float
)))
;
#
pragma
unroll
for
(
int
i=y
; i<dst_rows; i+=block_size, dst+=block_size*dst_step)
vstore2
(
smem[i]
,
0
,
(
__global
float*
)
dst
)
;
vstore2
(
VAL
(
smem[i],
scale
)
,
0
,
(
__global
float*
)
dst
)
;
}
#
endif
}
...
...
modules/core/test/ocl/test_dft.cpp
浏览文件 @
b17bf031
...
...
@@ -62,7 +62,7 @@ namespace ocl {
////////////////////////////////////////////////////////////////////////////
// Dft
PARAM_TEST_CASE
(
Dft
,
cv
::
Size
,
OCL_FFT_TYPE
,
bool
,
bool
)
PARAM_TEST_CASE
(
Dft
,
cv
::
Size
,
OCL_FFT_TYPE
,
bool
,
bool
,
bool
)
{
cv
::
Size
dft_size
;
int
dft_flags
,
depth
,
cn
,
dft_type
;
...
...
@@ -88,15 +88,14 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool)
}
if
(
GET_PARAM
(
2
))
dft_flags
|=
cv
::
DFT_ROWS
;
//if (GET_PARAM(3))
// if (dft_type == C2C) dft_flags |= cv::DFT_INVERSE;
//if (GET_PARAM(3))
// dft_flags |= cv::DFT_SCALE;
inplace
=
GET_PARAM
(
3
);
if
(
inplace
&&
dft_type
==
0
)
inplace
=
0
;
dft_flags
|=
cv
::
DFT_ROWS
;
if
(
GET_PARAM
(
3
))
dft_flags
|=
cv
::
DFT_SCALE
;
//if (GET_PARAM(4))
// dft_flags |= cv::DFT_INVERSE;
inplace
=
GET_PARAM
(
4
);
is1d
=
(
dft_flags
&
DFT_ROWS
)
!=
0
||
dft_size
.
height
==
1
;
}
...
...
@@ -123,7 +122,7 @@ OCL_TEST_P(Dft, Mat)
udst
=
udst
(
cv
::
Range
(
0
,
udst
.
rows
),
cv
::
Range
(
0
,
udst
.
cols
/
2
+
1
));
}
Mat
gpu
=
udst
.
getMat
(
ACCESS_READ
);
//
Mat gpu = udst.getMat(ACCESS_READ);
//std::cout << src << std::endl;
//std::cout << dst << std::endl;
//std::cout << gpu << std::endl;
...
...
@@ -193,6 +192,7 @@ OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(6, 4), cv::Size(5
cv
::
Size
(
512
,
1
),
cv
::
Size
(
1280
,
768
)),
Values
((
OCL_FFT_TYPE
)
R2C
,
(
OCL_FFT_TYPE
)
C2C
,
(
OCL_FFT_TYPE
)
R2R
,
(
OCL_FFT_TYPE
)
C2R
),
Bool
(),
// DFT_ROWS
Bool
(),
// DFT_SCALE
Bool
()
// inplace
)
);
...
...
samples/cpp/dft.cpp
浏览文件 @
b17bf031
...
...
@@ -5,8 +5,6 @@
#include "opencv2/highgui.hpp"
#include <stdio.h>
#include <iostream>
#include <chrono>
using
namespace
cv
;
using
namespace
std
;
...
...
@@ -26,31 +24,6 @@ const char* keys =
int
main
(
int
argc
,
const
char
**
argv
)
{
//int cols = 4;
//int rows = 768;
//srand(0);
//Mat input(Size(cols, rows), CV_32FC2);
//for (int i=0; i<cols; i++)
// for (int j=0; j<rows; j++)
// input.at<Vec2f>(j,i) = Vec2f((float) rand() / RAND_MAX, (float) rand() / RAND_MAX);
//Mat dst;
//
//UMat gpu_input, gpu_dst;
//input.copyTo(gpu_input);
//auto start = std::chrono::system_clock::now();
//dft(input, dst, DFT_ROWS);
//auto cpu_duration = chrono::duration_cast<chrono::milliseconds>(chrono::system_clock::now() - start);
//
//start = std::chrono::system_clock::now();
//dft(gpu_input, gpu_dst, DFT_ROWS);
//auto gpu_duration = chrono::duration_cast<chrono::milliseconds>(chrono::system_clock::now() - start);
//double n = norm(dst, gpu_dst);
//cout << "norm = " << n << endl;
//cout << "CPU time: " << cpu_duration.count() << "ms" << endl;
//cout << "GPU time: " << gpu_duration.count() << "ms" << endl;
help
();
CommandLineParser
parser
(
argc
,
argv
,
keys
);
string
filename
=
parser
.
get
<
string
>
(
0
);
...
...
@@ -62,46 +35,16 @@ int main(int argc, const char ** argv)
printf
(
"Cannot read image file: %s
\n
"
,
filename
.
c_str
());
return
-
1
;
}
Mat
small_img
=
img
(
Rect
(
0
,
0
,
6
,
6
));
int
M
=
getOptimalDFTSize
(
small_img
.
rows
);
int
N
=
getOptimalDFTSize
(
small_img
.
cols
);
int
M
=
getOptimalDFTSize
(
img
.
rows
);
int
N
=
getOptimalDFTSize
(
img
.
cols
);
Mat
padded
;
copyMakeBorder
(
small_img
,
padded
,
0
,
M
-
small_img
.
rows
,
0
,
N
-
small_
img
.
cols
,
BORDER_CONSTANT
,
Scalar
::
all
(
0
));
copyMakeBorder
(
img
,
padded
,
0
,
M
-
img
.
rows
,
0
,
N
-
img
.
cols
,
BORDER_CONSTANT
,
Scalar
::
all
(
0
));
Mat
planes
[]
=
{
Mat_
<
float
>
(
padded
),
Mat
::
one
s
(
padded
.
size
(),
CV_32F
)};
Mat
complexImg
,
complexImg1
,
complexInput
;
Mat
planes
[]
=
{
Mat_
<
float
>
(
padded
),
Mat
::
zero
s
(
padded
.
size
(),
CV_32F
)};
Mat
complexImg
;
merge
(
planes
,
2
,
complexImg
);
Mat
realInput
;
padded
.
convertTo
(
realInput
,
CV_32F
);
complexInput
=
complexImg
;
//cout << complexImg << endl;
//dft(complexImg, complexImg, DFT_REAL_OUTPUT);
//cout << "Complex to Complex" << endl;
//cout << complexImg << endl;
cout
<<
"Complex input"
<<
endl
<<
complexInput
<<
endl
;
cout
<<
"Real input"
<<
endl
<<
realInput
<<
endl
;
dft
(
complexInput
,
complexImg1
,
DFT_COMPLEX_OUTPUT
);
cout
<<
"Complex to Complex image: "
<<
endl
;
cout
<<
endl
<<
complexImg1
<<
endl
;
Mat
realImg1
;
dft
(
complexInput
,
realImg1
,
DFT_REAL_OUTPUT
);
cout
<<
"Complex to Real image: "
<<
endl
;
cout
<<
endl
<<
realImg1
<<
endl
;
Mat
realOut
;
dft
(
complexImg1
,
realOut
,
DFT_INVERSE
|
DFT_COMPLEX_OUTPUT
);
cout
<<
"Complex to Complex (inverse):"
<<
endl
;
cout
<<
realOut
<<
endl
;
Mat
complexOut
;
dft
(
realImg1
,
complexOut
,
DFT_INVERSE
|
DFT_REAL_OUTPUT
|
DFT_SCALE
);
cout
<<
"Complex to Real (inverse):"
<<
endl
;
cout
<<
complexOut
<<
endl
;
dft
(
complexImg
,
complexImg
);
// compute log(1 + sqrt(Re(DFT(img))**2 + Im(DFT(img))**2))
split
(
complexImg
,
planes
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录