Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
762a5c83
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
762a5c83
编写于
6月 26, 2020
作者:
A
Alexander Alekhin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
imgproc: align GaussianBlur/sepFilter2D OpenCL with CPU version
上级
2fed41df
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
273 addition
and
108 deletion
+273
-108
modules/imgproc/src/filter.dispatch.cpp
modules/imgproc/src/filter.dispatch.cpp
+177
-56
modules/imgproc/src/filter.hpp
modules/imgproc/src/filter.hpp
+17
-5
modules/imgproc/src/opencl/filterSepCol.cl
modules/imgproc/src/opencl/filterSepCol.cl
+13
-11
modules/imgproc/src/opencl/filterSepRow.cl
modules/imgproc/src/opencl/filterSepRow.cl
+7
-3
modules/imgproc/src/opencl/filterSep_singlePass.cl
modules/imgproc/src/opencl/filterSep_singlePass.cl
+9
-7
modules/imgproc/src/smooth.dispatch.cpp
modules/imgproc/src/smooth.dispatch.cpp
+44
-24
modules/stitching/src/exposure_compensate.cpp
modules/stitching/src/exposure_compensate.cpp
+6
-2
未找到文件。
modules/imgproc/src/filter.dispatch.cpp
浏览文件 @
762a5c83
...
...
@@ -729,11 +729,12 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
}
const
int
shift_bits
=
8
;
static
bool
ocl_sepRowFilter2D
(
const
UMat
&
src
,
UMat
&
buf
,
const
Mat
&
kernelX
,
int
anchor
,
int
borderType
,
int
ddepth
,
bool
fast8uc1
,
bool
int_arithm
)
int
borderType
,
int
ddepth
,
bool
fast8uc1
,
bool
int_arithm
,
int
shift_bits
)
{
CV_Assert
(
shift_bits
==
0
||
int_arithm
);
int
type
=
src
.
type
(),
cn
=
CV_MAT_CN
(
type
),
sdepth
=
CV_MAT_DEPTH
(
type
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
Size
bufSize
=
buf
.
size
();
...
...
@@ -801,8 +802,11 @@ static bool ocl_sepRowFilter2D(const UMat & src, UMat & buf, const Mat & kernelX
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
}
static
bool
ocl_sepColFilter2D
(
const
UMat
&
buf
,
UMat
&
dst
,
const
Mat
&
kernelY
,
double
delta
,
int
anchor
,
bool
int_arithm
)
static
bool
ocl_sepColFilter2D
(
const
UMat
&
buf
,
UMat
&
dst
,
const
Mat
&
kernelY
,
double
delta
,
int
anchor
,
bool
int_arithm
,
int
shift_bits
)
{
CV_Assert
(
shift_bits
==
0
||
int_arithm
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
dst
.
depth
()
==
CV_64F
&&
!
doubleSupport
)
return
false
;
...
...
@@ -821,13 +825,16 @@ static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY
globalsize
[
1
]
=
DIVUP
(
sz
.
height
,
localsize
[
1
])
*
localsize
[
1
];
globalsize
[
0
]
=
DIVUP
(
sz
.
width
,
localsize
[
0
])
*
localsize
[
0
];
char
cvt
[
40
];
char
cvt
[
2
][
40
];
int
floatT
=
std
::
max
(
CV_32F
,
bdepth
);
cv
::
String
build_options
=
cv
::
format
(
"-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d"
" -D srcT=%s -D dstT=%s -D convertToDstT=%s"
" -D srcT=%s -D dstT=%s -D convertTo
FloatT=%s -D floatT=%s -D convertTo
DstT=%s"
" -D srcT1=%s -D dstT1=%s -D SHIFT_BITS=%d%s%s"
,
anchor
,
(
int
)
localsize
[
0
],
(
int
)
localsize
[
1
],
cn
,
ocl
::
typeToStr
(
buf_type
),
ocl
::
typeToStr
(
dtype
),
ocl
::
convertTypeStr
(
bdepth
,
ddepth
,
cn
,
cvt
),
ocl
::
convertTypeStr
(
bdepth
,
floatT
,
cn
,
cvt
[
0
]),
ocl
::
typeToStr
(
CV_MAKETYPE
(
floatT
,
cn
)),
ocl
::
convertTypeStr
(
shift_bits
?
floatT
:
bdepth
,
ddepth
,
cn
,
cvt
[
1
]),
ocl
::
typeToStr
(
bdepth
),
ocl
::
typeToStr
(
ddepth
),
2
*
shift_bits
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
int_arithm
?
" -D INTEGER_ARITHMETIC"
:
""
);
...
...
@@ -839,7 +846,7 @@ static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY
return
false
;
k
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
buf
),
ocl
::
KernelArg
::
WriteOnly
(
dst
),
static_cast
<
float
>
(
delta
));
static_cast
<
float
>
(
delta
*
(
1u
<<
(
2
*
shift_bits
))
));
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
}
...
...
@@ -848,16 +855,21 @@ const int optimizedSepFilterLocalWidth = 16;
const
int
optimizedSepFilterLocalHeight
=
8
;
static
bool
ocl_sepFilter2D_SinglePass
(
InputArray
_src
,
OutputArray
_dst
,
Mat
row_kernel
,
Mat
col_kernel
,
double
delta
,
int
borderType
,
int
ddepth
,
int
bdepth
,
bool
int_arithm
)
const
Mat
&
kernelX_
,
const
Mat
&
kernelY_
,
double
delta
,
int
borderType
,
int
ddepth
,
int
bdepth
,
bool
int_arithm
,
int
shift_bits
)
{
Size
size
=
_src
.
size
(),
wholeSize
;
Point
origin
;
//CV_Assert(shift_bits == 0 || int_arithm);
const
ocl
::
Device
&
d
=
ocl
::
Device
::
getDefault
();
Size
size
=
_src
.
size
();
int
stype
=
_src
.
type
(),
sdepth
=
CV_MAT_DEPTH
(
stype
),
cn
=
CV_MAT_CN
(
stype
),
esz
=
CV_ELEM_SIZE
(
stype
),
wdepth
=
std
::
max
(
std
::
max
(
sdepth
,
ddepth
),
bdepth
),
dtype
=
CV_MAKE_TYPE
(
ddepth
,
cn
);
size_t
src_step
=
_src
.
step
(),
src_offset
=
_src
.
offset
();
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
bool
doubleSupport
=
d
.
doubleFPConfig
()
>
0
;
if
(
esz
==
0
||
src_step
==
0
||
(
src_offset
%
src_step
)
%
esz
!=
0
...
...
@@ -869,6 +881,13 @@ static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
||
borderType
==
BORDER_REFLECT_101
))
return
false
;
Mat
kernelX
,
kernelY
;
kernelX_
.
convertTo
(
kernelX
,
wdepth
);
if
(
kernelX_
.
data
!=
kernelY_
.
data
)
kernelY_
.
convertTo
(
kernelY
,
wdepth
);
else
kernelY
=
kernelX
;
size_t
lt2
[
2
]
=
{
optimizedSepFilterLocalWidth
,
optimizedSepFilterLocalHeight
};
size_t
gt2
[
2
]
=
{
lt2
[
0
]
*
(
1
+
(
size
.
width
-
1
)
/
lt2
[
0
]),
lt2
[
1
]};
...
...
@@ -879,9 +898,9 @@ static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
String
opts
=
cv
::
format
(
"-D BLK_X=%d -D BLK_Y=%d -D RADIUSX=%d -D RADIUSY=%d%s%s"
" -D srcT=%s -D convertToWT=%s -D WT=%s -D dstT=%s -D convertToDstT=%s"
" -D %s -D srcT1=%s -D dstT1=%s -D WT1=%s -D CN=%d -D SHIFT_BITS=%d%s"
,
(
int
)
lt2
[
0
],
(
int
)
lt2
[
1
],
row_kernel
.
cols
/
2
,
col_kernel
.
cols
/
2
,
ocl
::
kernelToStr
(
row_kernel
,
wdepth
,
"KERNEL_MATRIX_X"
).
c_str
(),
ocl
::
kernelToStr
(
col_kernel
,
wdepth
,
"KERNEL_MATRIX_Y"
).
c_str
(),
(
int
)
lt2
[
0
],
(
int
)
lt2
[
1
],
kernelX
.
cols
/
2
,
kernelY
.
cols
/
2
,
ocl
::
kernelToStr
(
kernelX
,
wdepth
,
"KERNEL_MATRIX_X"
).
c_str
(),
ocl
::
kernelToStr
(
kernelY
,
wdepth
,
"KERNEL_MATRIX_Y"
).
c_str
(),
ocl
::
typeToStr
(
stype
),
ocl
::
convertTypeStr
(
sdepth
,
wdepth
,
cn
,
cvt
[
0
]),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
wdepth
,
cn
)),
ocl
::
typeToStr
(
dtype
),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
cn
,
cvt
[
1
]),
borderMap
[
borderType
],
...
...
@@ -896,21 +915,30 @@ static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
_dst
.
create
(
size
,
dtype
);
UMat
dst
=
_dst
.
getUMat
();
int
src_offset_x
=
static_cast
<
int
>
((
src_offset
%
src_step
)
/
esz
);
int
src_offset_y
=
static_cast
<
int
>
(
src_offset
/
src_step
);
// TODO Future: emit error on inplace processing
//CV_Assert(src.u != dst.u && "Inplace processing is not allowed with UMat");
if
(
src
.
u
==
dst
.
u
)
{
CV_LOG_ONCE_WARNING
(
NULL
,
"sepFilter2D: inplace arguments are not allowed for non-inplace operations. Performance impact warning."
);
src
=
src
.
clone
();
}
Size
wholeSize
;
Point
origin
;
src
.
locateROI
(
wholeSize
,
origin
);
k
.
args
(
ocl
::
KernelArg
::
PtrReadOnly
(
src
),
(
int
)
src_step
,
src_offset_x
,
src_offset_
y
,
k
.
args
(
ocl
::
KernelArg
::
PtrReadOnly
(
src
),
(
int
)
src_step
,
origin
.
x
,
origin
.
y
,
wholeSize
.
height
,
wholeSize
.
width
,
ocl
::
KernelArg
::
WriteOnly
(
dst
),
static_cast
<
float
>
(
delta
));
static_cast
<
float
>
(
delta
*
(
1u
<<
(
2
*
shift_bits
))
));
return
k
.
run
(
2
,
gt2
,
lt2
,
false
);
}
bool
ocl_sepFilter2D
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
InputArray
_kernelX
,
InputArray
_kernelY
,
Point
anchor
,
double
delta
,
int
borderType
)
bool
ocl_sepFilter2D
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
InputArray
_kernelX
,
InputArray
_kernelY
,
Point
anchor
,
double
delta
,
int
borderType
)
{
const
ocl
::
Device
&
d
=
ocl
::
Device
::
getDefault
();
Size
imgSize
=
_src
.
size
();
...
...
@@ -934,59 +962,152 @@ bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
if
(
anchor
.
y
<
0
)
anchor
.
y
=
kernelY
.
cols
>>
1
;
int
rtype
=
getKernelType
(
kernelX
,
kernelX
.
rows
==
1
?
Point
(
anchor
.
x
,
0
)
:
Point
(
0
,
anchor
.
x
));
int
ctype
=
getKernelType
(
kernelY
,
kernelY
.
rows
==
1
?
Point
(
anchor
.
y
,
0
)
:
Point
(
0
,
anchor
.
y
));
int
bdepth
=
CV_32F
;
bool
int_arithm
=
false
;
i
f
(
sdepth
==
CV_8U
&&
ddepth
==
CV_8U
&&
rtype
==
KERNEL_SMOOTH
+
KERNEL_SYMMETRICAL
&&
ctype
==
KERNEL_SMOOTH
+
KERNEL_SYMMETRICAL
)
i
nt
shift_bits
=
0
;
while
(
sdepth
==
CV_8U
&&
ddepth
==
CV_8U
)
{
if
(
ocl
::
Device
::
getDefault
().
isIntel
())
int
bits_
=
8
;
if
(
delta
*
256.0
f
!=
(
float
)(
int
)(
delta
*
256
))
{
for
(
int
i
=
0
;
i
<
kernelX
.
cols
;
i
++
)
kernelX
.
at
<
float
>
(
0
,
i
)
=
(
float
)
cvRound
(
kernelX
.
at
<
float
>
(
0
,
i
)
*
(
1
<<
shift_bits
));
if
(
kernelX
.
data
!=
kernelY
.
data
)
for
(
int
i
=
0
;
i
<
kernelX
.
cols
;
i
++
)
kernelY
.
at
<
float
>
(
0
,
i
)
=
(
float
)
cvRound
(
kernelY
.
at
<
float
>
(
0
,
i
)
*
(
1
<<
shift_bits
));
}
else
CV_LOG_DEBUG
(
NULL
,
"ocl_sepFilter2D: bit-exact delta can't be applied: delta="
<<
delta
);
break
;
}
Mat
kernelX_BitExact
,
kernelY_BitExact
;
bool
isValidBitExactRowKernel
=
createBitExactKernel_32S
(
kernelX
,
kernelX_BitExact
,
bits_
);
bool
isValidBitExactColumnKernel
=
createBitExactKernel_32S
(
kernelY
,
kernelY_BitExact
,
bits_
);
if
(
!
isValidBitExactRowKernel
)
{
CV_LOG_DEBUG
(
NULL
,
"ocl_sepFilter2D: bit-exact row-kernel can't be applied: ksize="
<<
kernelX_BitExact
.
total
());
}
else
if
(
!
isValidBitExactColumnKernel
)
{
CV_LOG_DEBUG
(
NULL
,
"ocl_sepFilter2D: bit-exact column-kernel can't be applied: ksize="
<<
kernelY_BitExact
.
total
());
}
else
{
bdepth
=
CV_32S
;
kernelX
.
convertTo
(
kernelX
,
bdepth
,
1
<<
shift_bits
);
kernelY
.
convertTo
(
kernelY
,
bdepth
,
1
<<
shift_bits
);
shift_bits
=
bits_
;
int_arithm
=
true
;
kernelX
=
kernelX_BitExact
;
kernelY
=
kernelY_BitExact
;
}
int_arithm
=
true
;
break
;
}
CV_OCL_RUN_
(
kernelY
.
cols
<=
21
&&
kernelX
.
cols
<=
21
&&
imgSize
.
width
>
optimizedSepFilterLocalWidth
+
anchor
.
x
&&
imgSize
.
height
>
optimizedSepFilterLocalHeight
+
anchor
.
y
&&
(
!
(
borderType
&
BORDER_ISOLATED
)
||
_src
.
offset
()
==
0
)
&&
anchor
==
Point
(
kernelX
.
cols
>>
1
,
kernelY
.
cols
>>
1
)
&&
OCL_PERFORMANCE_CHECK
(
d
.
isIntel
()),
// TODO FIXIT
ocl_sepFilter2D_SinglePass
(
_src
,
_dst
,
kernelX
,
kernelY
,
delta
,
borderType
&
~
BORDER_ISOLATED
,
ddepth
,
bdepth
,
int_arithm
),
true
)
CV_OCL_RUN_
(
kernelY
.
cols
<=
21
&&
kernelX
.
cols
<=
21
&&
imgSize
.
width
>
optimizedSepFilterLocalWidth
+
anchor
.
x
&&
imgSize
.
height
>
optimizedSepFilterLocalHeight
+
anchor
.
y
&&
(
!
(
borderType
&
BORDER_ISOLATED
)
||
_src
.
offset
()
==
0
)
&&
anchor
==
Point
(
kernelX
.
cols
>>
1
,
kernelY
.
cols
>>
1
)
&&
OCL_PERFORMANCE_CHECK
(
d
.
isIntel
()),
// TODO FIXIT
ocl_sepFilter2D_SinglePass
(
_src
,
_dst
,
kernelX
,
kernelY
,
delta
,
borderType
&
~
BORDER_ISOLATED
,
ddepth
,
CV_32F
,
// force FP32 mode
false
,
shift_bits
),
true
);
UMat
src
=
_src
.
getUMat
();
Size
srcWholeSize
;
Point
srcOffset
;
src
.
locateROI
(
srcWholeSize
,
srcOffset
);
bool
fast8uc1
=
type
==
CV_8UC1
&&
srcOffset
.
x
%
4
==
0
&&
src
.
cols
%
4
==
0
&&
src
.
step
%
4
==
0
;
bool
fast8uc1
=
false
;
if
(
type
==
CV_8UC1
)
{
Size
srcWholeSize
;
Point
srcOffset
;
src
.
locateROI
(
srcWholeSize
,
srcOffset
);
fast8uc1
=
srcOffset
.
x
%
4
==
0
&&
src
.
cols
%
4
==
0
&&
src
.
step
%
4
==
0
;
}
Size
srcSize
=
src
.
size
();
Size
bufSize
(
srcSize
.
width
,
srcSize
.
height
+
kernelY
.
cols
-
1
);
UMat
buf
(
bufSize
,
CV_MAKETYPE
(
bdepth
,
cn
));
if
(
!
ocl_sepRowFilter2D
(
src
,
buf
,
kernelX
,
anchor
.
x
,
borderType
,
ddepth
,
fast8uc1
,
int_arithm
,
shift_bits
))
return
false
;
_dst
.
create
(
srcSize
,
CV_MAKETYPE
(
ddepth
,
cn
));
UMat
dst
=
_dst
.
getUMat
();
return
ocl_sepColFilter2D
(
buf
,
dst
,
kernelY
,
delta
,
anchor
.
y
,
int_arithm
,
shift_bits
);
}
bool
ocl_sepFilter2D_BitExact
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
const
Size
&
ksize
,
const
uint16_t
*
fkx
,
const
uint16_t
*
fky
,
Point
anchor
,
double
delta
,
int
borderType
,
int
shift_bits
)
{
const
ocl
::
Device
&
d
=
ocl
::
Device
::
getDefault
();
Size
imgSize
=
_src
.
size
();
int
type
=
_src
.
type
(),
sdepth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
if
(
cn
>
4
)
return
false
;
if
(
ksize
.
width
%
2
!=
1
)
return
false
;
if
(
ksize
.
height
%
2
!=
1
)
return
false
;
Mat
kernelX
(
1
,
ksize
.
width
,
CV_16SC1
,
(
void
*
)
fkx
);
Mat
kernelY
(
1
,
ksize
.
height
,
CV_16SC1
,
(
void
*
)
fky
);
if
(
ddepth
<
0
)
ddepth
=
sdepth
;
if
(
anchor
.
x
<
0
)
anchor
.
x
=
kernelX
.
cols
>>
1
;
if
(
anchor
.
y
<
0
)
anchor
.
y
=
kernelY
.
cols
>>
1
;
int
bdepth
=
sdepth
==
CV_8U
?
CV_32S
:
CV_32F
;
CV_OCL_RUN_
(
kernelY
.
cols
<=
21
&&
kernelX
.
cols
<=
21
&&
imgSize
.
width
>
optimizedSepFilterLocalWidth
+
anchor
.
x
&&
imgSize
.
height
>
optimizedSepFilterLocalHeight
+
anchor
.
y
&&
(
!
(
borderType
&
BORDER_ISOLATED
)
||
_src
.
offset
()
==
0
)
&&
anchor
==
Point
(
kernelX
.
cols
>>
1
,
kernelY
.
cols
>>
1
)
&&
OCL_PERFORMANCE_CHECK
(
d
.
isIntel
()),
// TODO FIXIT
ocl_sepFilter2D_SinglePass
(
_src
,
_dst
,
kernelX
,
kernelY
,
delta
,
borderType
&
~
BORDER_ISOLATED
,
ddepth
,
bdepth
,
true
,
shift_bits
),
true
);
UMat
src
=
_src
.
getUMat
();
bool
fast8uc1
=
false
;
if
(
type
==
CV_8UC1
)
{
Size
srcWholeSize
;
Point
srcOffset
;
src
.
locateROI
(
srcWholeSize
,
srcOffset
);
fast8uc1
=
srcOffset
.
x
%
4
==
0
&&
src
.
cols
%
4
==
0
&&
src
.
step
%
4
==
0
;
}
Size
srcSize
=
src
.
size
();
Size
bufSize
(
srcSize
.
width
,
srcSize
.
height
+
kernelY
.
cols
-
1
);
UMat
buf
(
bufSize
,
CV_MAKETYPE
(
bdepth
,
cn
));
if
(
!
ocl_sepRowFilter2D
(
src
,
buf
,
kernelX
,
anchor
.
x
,
borderType
,
ddepth
,
fast8uc1
,
int_arithm
))
if
(
!
ocl_sepRowFilter2D
(
src
,
buf
,
kernelX
,
anchor
.
x
,
borderType
,
ddepth
,
fast8uc1
,
true
,
shift_bits
))
return
false
;
_dst
.
create
(
srcSize
,
CV_MAKETYPE
(
ddepth
,
cn
));
UMat
dst
=
_dst
.
getUMat
();
return
ocl_sepColFilter2D
(
buf
,
dst
,
kernelY
,
delta
,
anchor
.
y
,
int_arithm
);
return
ocl_sepColFilter2D
(
buf
,
dst
,
kernelY
,
delta
,
anchor
.
y
,
true
,
shift_bits
);
}
#endif
...
...
@@ -1444,7 +1565,7 @@ void sepFilter2D(InputArray _src, OutputArray _dst, int ddepth,
CV_Assert
(
!
_kernelX
.
empty
());
CV_Assert
(
!
_kernelY
.
empty
());
CV_OCL_RUN
(
_dst
.
isUMat
()
&&
_src
.
dims
()
<=
2
&&
(
size_t
)
_src
.
rows
()
>
_kernelY
.
total
()
&&
(
size_t
)
_src
.
cols
()
>
_kernelX
.
total
(),
CV_OCL_RUN
(
_dst
.
isUMat
()
&&
_src
.
dims
()
<=
2
&&
(
size_t
)
_src
.
rows
()
>
=
_kernelY
.
total
()
&&
(
size_t
)
_src
.
cols
()
>=
_kernelX
.
total
(),
ocl_sepFilter2D
(
_src
,
_dst
,
ddepth
,
_kernelX
,
_kernelY
,
anchor
,
delta
,
borderType
))
Mat
src
=
_src
.
getMat
(),
kernelX
=
_kernelX
.
getMat
(),
kernelY
=
_kernelY
.
getMat
();
...
...
modules/imgproc/src/filter.hpp
浏览文件 @
762a5c83
...
...
@@ -46,13 +46,25 @@
namespace
cv
{
#ifdef HAVE_OPENCL
bool
ocl_sepFilter2D
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
InputArray
_kernelX
,
InputArray
_kernelY
,
Point
anchor
,
double
delta
,
int
borderType
);
bool
ocl_sepFilter2D
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
InputArray
_kernelX
,
InputArray
_kernelY
,
Point
anchor
,
double
delta
,
int
borderType
);
bool
ocl_sepFilter2D_BitExact
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
const
Size
&
ksize
,
const
uint16_t
*
fkx
,
const
uint16_t
*
fky
,
Point
anchor
,
double
delta
,
int
borderType
,
int
shift_bits
);
#endif
void
preprocess2DKernel
(
const
Mat
&
kernel
,
std
::
vector
<
Point
>&
coords
,
std
::
vector
<
uchar
>&
coeffs
);
}
void
preprocess2DKernel
(
const
Mat
&
kernel
,
std
::
vector
<
Point
>&
coords
,
std
::
vector
<
uchar
>&
coeffs
);
}
// namespace
#endif
...
...
modules/imgproc/src/opencl/filterSepCol.cl
浏览文件 @
762a5c83
...
...
@@ -61,7 +61,11 @@
#
endif
#
define
DIG
(
a
)
a,
#
if
defined
(
INTEGER_ARITHMETIC
)
__constant
int
mat_kernel[]
=
{
COEFF
}
;
#
else
__constant
srcT1
mat_kernel[]
=
{
COEFF
}
;
#
endif
__kernel
void
col_filter
(
__global
const
uchar
*
src,
int
src_step,
int
src_offset,
int
src_whole_rows,
int
src_whole_cols,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
float
delta
)
...
...
@@ -92,30 +96,28 @@ __kernel void col_filter(__global const uchar * src, int src_step, int src_offse
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
read
pixels
from
lds
and
calculate
the
result
sum
=
LDS_DAT[l_y
+
RADIUSY][l_x]
*
mat_kernel[RADIUSY]
;
sum
=
LDS_DAT[l_y
+
RADIUSY][l_x]
*
mat_kernel[RADIUSY]
+
(
srcT
)
delta
;
for
(
int
i
=
1
; i <= RADIUSY; ++i)
{
temp[0]
=
LDS_DAT[l_y
+
RADIUSY
-
i][l_x]
;
temp[1]
=
LDS_DAT[l_y
+
RADIUSY
+
i][l_x]
;
#
if
(
defined
(
INTEGER_ARITHMETIC
)
&&
!INTEL_DEVICE
)
#
if
defined
(
INTEGER_ARITHMETIC
)
sum
+=
mad24
(
temp[0],mat_kernel[RADIUSY
-
i],
temp[1]
*
mat_kernel[RADIUSY
+
i]
)
;
#
else
sum
+=
mad
(
temp[0],
mat_kernel[RADIUSY
-
i],
temp[1]
*
mat_kernel[RADIUSY
+
i]
)
;
#
endif
}
#
ifdef
INTEGER_ARITHMETIC
#
ifdef
INTEL_DEVICE
sum
=
(
sum
+
(
1
<<
(
SHIFT_BITS-1
)))
/
(
1
<<
SHIFT_BITS
)
;
#
else
sum
=
(
sum
+
(
1
<<
(
SHIFT_BITS-1
)))
>>
SHIFT_BITS
;
#
endif
#
endif
//
write
the
result
to
dst
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
{
#
if
defined
(
SHIFT_BITS
)
&&
SHIFT_BITS
>
0
dstT
result
=
convertToDstT
(
convertToFloatT
(
sum
)
*
(
floatT
)(
1.0f
/
(
1
<<
SHIFT_BITS
)))
;
#
else
dstT
result
=
convertToDstT
(
sum
)
;
#
endif
start_addr
=
mad24
(
y,
dst_step,
mad24
(
DSTSIZE,
x,
dst_offset
))
;
storepix
(
convertToDstT
(
sum
+
(
srcT
)(
delta
))
,
dst
+
start_addr
)
;
storepix
(
result
,
dst
+
start_addr
)
;
}
}
modules/imgproc/src/opencl/filterSepRow.cl
浏览文件 @
762a5c83
...
...
@@ -139,9 +139,13 @@
#
endif
#
define
DIG
(
a
)
a,
#
if
defined
(
INTEGER_ARITHMETIC
)
__constant
int
mat_kernel[]
=
{
COEFF
}
;
#
else
__constant
dstT1
mat_kernel[]
=
{
COEFF
}
;
#
endif
#
if
(
defined
(
INTEGER_ARITHMETIC
)
&&
!INTEL_DEVICE
)
#
if
defined
(
INTEGER_ARITHMETIC
)
#
define
dstT4
int4
#
define
convertDstVec
convert_int4
#
else
...
...
@@ -263,7 +267,7 @@ __kernel void row_filter_C1_D0(__global const uchar * src, int src_step_in_pixel
{
temp[0]
=
vload4
(
0
,
(
__local
uchar*
)
&LDS_DAT[l_y][l_x]
+
RADIUSX
+
offset
-
i
)
;
temp[1]
=
vload4
(
0
,
(
__local
uchar*
)
&LDS_DAT[l_y][l_x]
+
RADIUSX
+
offset
+
i
)
;
#
if
(
defined
(
INTEGER_ARITHMETIC
)
&&
!INTEL_DEVICE
)
#
if
defined
(
INTEGER_ARITHMETIC
)
sum
+=
mad24
(
convertDstVec
(
temp[0]
)
,
mat_kernel[RADIUSX-i],
convertDstVec
(
temp[1]
)
*
mat_kernel[RADIUSX
+
i]
)
;
#
else
sum
+=
mad
(
convertDstVec
(
temp[0]
)
,
mat_kernel[RADIUSX-i],
convertDstVec
(
temp[1]
)
*
mat_kernel[RADIUSX
+
i]
)
;
...
...
@@ -368,7 +372,7 @@ __kernel void row_filter(__global const uchar * src, int src_step, int src_offse
{
temp[0]
=
LDS_DAT[l_y][l_x
+
RADIUSX
-
i]
;
temp[1]
=
LDS_DAT[l_y][l_x
+
RADIUSX
+
i]
;
#
if
(
defined
(
INTEGER_ARITHMETIC
)
&&
!INTEL_DEVICE
)
#
if
defined
(
INTEGER_ARITHMETIC
)
sum
+=
mad24
(
convertToDstT
(
temp[0]
)
,
mat_kernel[RADIUSX
-
i],
convertToDstT
(
temp[1]
)
*
mat_kernel[RADIUSX
+
i]
)
;
#
else
sum
+=
mad
(
convertToDstT
(
temp[0]
)
,
mat_kernel[RADIUSX
-
i],
convertToDstT
(
temp[1]
)
*
mat_kernel[RADIUSX
+
i]
)
;
...
...
modules/imgproc/src/opencl/filterSep_singlePass.cl
浏览文件 @
762a5c83
...
...
@@ -160,7 +160,7 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int
{
sum
=
(
WT
)
0
;
for
(
i=0
; i<=2*RADIUSY; i++)
#
if
(
defined
(
INTEGER_ARITHMETIC
)
&&
!INTEL_DEVICE
)
#
if
defined
(
INTEGER_ARITHMETIC
)
sum
=
mad24
(
lsmem[liy
+
i][clocX],
mat_kernelY[i],
sum
)
;
#
else
sum
=
mad
(
lsmem[liy
+
i][clocX],
mat_kernelY[i],
sum
)
;
...
...
@@ -177,25 +177,27 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int
{
//
do
second
horizontal
filter
pass
//
and
calculate
final
result
sum
=
0.0f
;
sum
=
(
WT
)(
delta
)
;
for
(
i=0
; i<=2*RADIUSX; i++)
#
if
(
defined
(
INTEGER_ARITHMETIC
)
&&
!INTEL_DEVICE
)
#
if
defined
(
INTEGER_ARITHMETIC
)
sum
=
mad24
(
lsmemDy[liy][lix+i],
mat_kernelX[i],
sum
)
;
#
else
sum
=
mad
(
lsmemDy[liy][lix+i],
mat_kernelX[i],
sum
)
;
#
endif
#
if
def
INTEGER_ARITHMETIC
#
if
def
INTEL_DEVICE
sum
=
(
sum
+
(
1
<<
(
SHIFT_BITS-1
)))
/
(
1
<<
SHIFT_BITS
)
;
#
if
defined
(
SHIFT_BITS
)
&&
SHIFT_BITS
>
0
#
if
!defined
(
INTEGER_ARITHMETIC
)
sum
=
sum
*
(
1.0f
/
(
1
<<
SHIFT_BITS
)
)
;
#
else
sum
=
(
sum
+
(
1
<<
(
SHIFT_BITS-1
)))
>>
SHIFT_BITS
;
#
endif
#
endif
//
store
result
into
destination
image
storepix
(
convertToDstT
(
sum
+
(
WT
)(
delta
)
)
,
Dst
+
mad24
(
y
+
liy,
dst_step,
mad24
(
x,
DSTSIZE,
dst_offset
)))
;
storepix
(
convertToDstT
(
sum
)
,
Dst
+
mad24
(
y
+
liy,
dst_step,
mad24
(
x,
DSTSIZE,
dst_offset
)))
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
i
=
liy
*
BLK_X
+
lix
; i < (RADIUSY*2) * (BLK_X+(RADIUSX*2)); i += BLK_X * BLK_Y)
{
int
clocX
=
i
%
(
BLK_X+
(
RADIUSX*2
))
;
...
...
modules/imgproc/src/smooth.dispatch.cpp
浏览文件 @
762a5c83
...
...
@@ -48,6 +48,7 @@
#include <opencv2/core/utils/configuration.private.hpp>
#include <vector>
#include <iostream>
#include "opencv2/core/hal/intrin.hpp"
#include "opencl_kernels_imgproc.hpp"
...
...
@@ -637,10 +638,9 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
return
;
}
bool
useOpenCL
=
(
ocl
::
isOpenCLActivated
()
&&
_dst
.
isUMat
()
&&
_src
.
dims
()
<=
2
&&
((
ksize
.
width
==
3
&&
ksize
.
height
==
3
)
||
(
ksize
.
width
==
5
&&
ksize
.
height
==
5
))
&&
_src
.
rows
()
>
ksize
.
height
&&
_src
.
cols
()
>
ksize
.
width
);
bool
useOpenCL
=
ocl
::
isOpenCLActivated
()
&&
_dst
.
isUMat
()
&&
_src
.
dims
()
<=
2
&&
_src
.
rows
()
>=
ksize
.
height
&&
_src
.
cols
()
>=
ksize
.
width
&&
ksize
.
width
>
1
&&
ksize
.
height
>
1
;
CV_UNUSED
(
useOpenCL
);
int
sdepth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
...
...
@@ -648,27 +648,13 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
Mat
kx
,
ky
;
createGaussianKernels
(
kx
,
ky
,
type
,
ksize
,
sigma1
,
sigma2
);
CV_OCL_RUN
(
useOpenCL
,
ocl_GaussianBlur_8UC1
(
_src
,
_dst
,
ksize
,
CV_MAT_DEPTH
(
type
),
kx
,
ky
,
borderType
));
CV_OCL_RUN
(
useOpenCL
&&
sdepth
==
CV_8U
&&
((
ksize
.
width
==
3
&&
ksize
.
height
==
3
)
||
(
ksize
.
width
==
5
&&
ksize
.
height
==
5
)),
ocl_GaussianBlur_8UC1
(
_src
,
_dst
,
ksize
,
CV_MAT_DEPTH
(
type
),
kx
,
ky
,
borderType
)
);
CV_OCL_RUN
(
_dst
.
isUMat
()
&&
_src
.
dims
()
<=
2
&&
(
size_t
)
_src
.
rows
()
>
kx
.
total
()
&&
(
size_t
)
_src
.
cols
()
>
kx
.
total
(),
ocl_sepFilter2D
(
_src
,
_dst
,
sdepth
,
kx
,
ky
,
Point
(
-
1
,
-
1
),
0
,
borderType
))
Mat
src
=
_src
.
getMat
();
Mat
dst
=
_dst
.
getMat
();
Point
ofs
;
Size
wsz
(
src
.
cols
,
src
.
rows
);
if
(
!
(
borderType
&
BORDER_ISOLATED
))
src
.
locateROI
(
wsz
,
ofs
);
CALL_HAL
(
gaussianBlur
,
cv_hal_gaussianBlur
,
src
.
ptr
(),
src
.
step
,
dst
.
ptr
(),
dst
.
step
,
src
.
cols
,
src
.
rows
,
sdepth
,
cn
,
ofs
.
x
,
ofs
.
y
,
wsz
.
width
-
src
.
cols
-
ofs
.
x
,
wsz
.
height
-
src
.
rows
-
ofs
.
y
,
ksize
.
width
,
ksize
.
height
,
sigma1
,
sigma2
,
borderType
&~
BORDER_ISOLATED
);
CV_OVX_RUN
(
true
,
openvx_gaussianBlur
(
src
,
dst
,
ksize
,
sigma1
,
sigma2
,
borderType
))
if
(
sdepth
==
CV_8U
&&
((
borderType
&
BORDER_ISOLATED
)
||
!
_src
.
getMat
().
isSubmatrix
()))
if
(
sdepth
==
CV_8U
&&
((
borderType
&
BORDER_ISOLATED
)
||
!
_src
.
isSubmatrix
()))
{
std
::
vector
<
ufixedpoint16
>
fkx
,
fky
;
createGaussianKernels
(
fkx
,
fky
,
type
,
ksize
,
sigma1
,
sigma2
);
...
...
@@ -684,6 +670,17 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
}
else
{
CV_OCL_RUN
(
useOpenCL
,
ocl_sepFilter2D_BitExact
(
_src
,
_dst
,
sdepth
,
ksize
,
(
const
uint16_t
*
)
&
fkx
[
0
],
(
const
uint16_t
*
)
&
fky
[
0
],
Point
(
-
1
,
-
1
),
0
,
borderType
,
8
/*shift_bits*/
)
);
Mat
src
=
_src
.
getMat
();
Mat
dst
=
_dst
.
getMat
();
if
(
src
.
data
==
dst
.
data
)
src
=
src
.
clone
();
CV_CPU_DISPATCH
(
GaussianBlurFixedPoint
,
(
src
,
dst
,
(
const
uint16_t
*
)
&
fkx
[
0
],
(
int
)
fkx
.
size
(),
(
const
uint16_t
*
)
&
fky
[
0
],
(
int
)
fky
.
size
(),
borderType
),
...
...
@@ -692,6 +689,29 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
}
}
#ifdef HAVE_OPENCL
if
(
useOpenCL
)
{
sepFilter2D
(
_src
,
_dst
,
sdepth
,
kx
,
ky
,
Point
(
-
1
,
-
1
),
0
,
borderType
);
return
;
}
#endif
Mat
src
=
_src
.
getMat
();
Mat
dst
=
_dst
.
getMat
();
Point
ofs
;
Size
wsz
(
src
.
cols
,
src
.
rows
);
if
(
!
(
borderType
&
BORDER_ISOLATED
))
src
.
locateROI
(
wsz
,
ofs
);
CALL_HAL
(
gaussianBlur
,
cv_hal_gaussianBlur
,
src
.
ptr
(),
src
.
step
,
dst
.
ptr
(),
dst
.
step
,
src
.
cols
,
src
.
rows
,
sdepth
,
cn
,
ofs
.
x
,
ofs
.
y
,
wsz
.
width
-
src
.
cols
-
ofs
.
x
,
wsz
.
height
-
src
.
rows
-
ofs
.
y
,
ksize
.
width
,
ksize
.
height
,
sigma1
,
sigma2
,
borderType
&~
BORDER_ISOLATED
);
CV_OVX_RUN
(
true
,
openvx_gaussianBlur
(
src
,
dst
,
ksize
,
sigma1
,
sigma2
,
borderType
))
#if defined ENABLE_IPP_GAUSSIAN_BLUR
// IPP is not bit-exact to OpenCV implementation
CV_IPP_RUN_FAST
(
ipp_GaussianBlur
(
src
,
dst
,
ksize
,
sigma1
,
sigma2
,
borderType
));
...
...
modules/stitching/src/exposure_compensate.cpp
浏览文件 @
762a5c83
...
...
@@ -275,8 +275,12 @@ void BlocksGainCompensator::feed(const std::vector<Point> &corners, const std::v
gain_map
(
by
,
bx
)
=
static_cast
<
float
>
(
gains
[
bl_idx
]);
}
sepFilter2D
(
gain_maps_
[
img_idx
],
gain_maps_
[
img_idx
],
CV_32F
,
ker
,
ker
);
sepFilter2D
(
gain_maps_
[
img_idx
],
gain_maps_
[
img_idx
],
CV_32F
,
ker
,
ker
);
// 2 smooth passes
UMat
result
;
sepFilter2D
(
gain_maps_
[
img_idx
],
result
,
CV_32F
,
ker
,
ker
);
UMat
result2
;
sepFilter2D
(
result
,
result2
,
CV_32F
,
ker
,
ker
);
swap
(
gain_maps_
[
img_idx
],
result2
);
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录