Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
3edf7c53
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
3edf7c53
编写于
12月 18, 2012
作者:
V
Vadim Pisarevsky
提交者:
OpenCV Buildbot
12月 18, 2012
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #220 from ilya-lavrenov:SIMDFastAreaResize
上级
d3354c54
a319bae6
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
204 addition
and
15 deletion
+204
-15
modules/imgproc/perf/perf_resize.cpp
modules/imgproc/perf/perf_resize.cpp
+1
-1
modules/imgproc/src/imgwarp.cpp
modules/imgproc/src/imgwarp.cpp
+203
-14
未找到文件。
modules/imgproc/perf/perf_resize.cpp
浏览文件 @
3edf7c53
...
...
@@ -71,7 +71,7 @@ typedef TestBaseWithParam<MatInfo_Size_Scale_t> MatInfo_Size_Scale;
PERF_TEST_P
(
MatInfo_Size_Scale
,
ResizeAreaFast
,
testing
::
Combine
(
testing
::
Values
(
CV_8UC1
,
CV_8UC4
),
testing
::
Values
(
CV_8UC1
,
CV_8UC
3
,
CV_8UC4
,
CV_16UC1
,
CV_16UC3
,
CV_16UC
4
),
testing
::
Values
(
szVGA
,
szqHD
,
sz720p
,
sz1080p
),
testing
::
Values
(
2
)
)
...
...
modules/imgproc/src/imgwarp.cpp
浏览文件 @
3edf7c53
...
...
@@ -1241,27 +1241,217 @@ static void resizeGeneric_( const Mat& src, Mat& dst,
template
<
typename
T
,
typename
WT
>
struct
ResizeAreaFastNoVec
{
ResizeAreaFastNoVec
(
int
/*_scale_x*/
,
int
/*_scale_y*/
,
int
/*_cn*/
,
int
/*_step*//*, const int**/
/*_ofs*/
)
{
}
int
operator
()
(
const
T
*
/*S*/
,
T
*
/*D*/
,
int
/*w*/
)
const
{
return
0
;
}
ResizeAreaFastNoVec
(
int
,
int
)
{
}
ResizeAreaFastNoVec
(
int
,
int
,
int
,
int
)
{
}
int
operator
()
(
const
T
*
,
T
*
,
int
)
const
{
return
0
;
}
};
template
<
typename
T
>
#if CV_SSE2
class
ResizeAreaFastVec_SIMD_8u
{
public:
ResizeAreaFastVec_SIMD_8u
(
int
_cn
,
int
_step
)
:
cn
(
_cn
),
step
(
_step
)
{
use_simd
=
checkHardwareSupport
(
CV_CPU_SSE2
);
}
int
operator
()
(
const
uchar
*
S
,
uchar
*
D
,
int
w
)
const
{
if
(
!
use_simd
)
return
0
;
int
dx
=
0
;
const
uchar
*
S0
=
S
;
const
uchar
*
S1
=
S0
+
step
;
__m128i
zero
=
_mm_setzero_si128
();
__m128i
delta2
=
_mm_set1_epi16
(
2
);
if
(
cn
==
1
)
{
__m128i
masklow
=
_mm_set1_epi16
(
0x00ff
);
for
(
;
dx
<
w
-
8
;
dx
+=
8
,
S0
+=
16
,
S1
+=
16
,
D
+=
8
)
{
__m128i
r0
=
_mm_loadu_si128
((
const
__m128i
*
)
S0
);
__m128i
r1
=
_mm_loadu_si128
((
const
__m128i
*
)
S1
);
__m128i
s0
=
_mm_add_epi16
(
_mm_srli_epi16
(
r0
,
8
),
_mm_and_si128
(
r0
,
masklow
));
__m128i
s1
=
_mm_add_epi16
(
_mm_srli_epi16
(
r1
,
8
),
_mm_and_si128
(
r1
,
masklow
));
s0
=
_mm_add_epi16
(
_mm_add_epi16
(
s0
,
s1
),
delta2
);
s0
=
_mm_packus_epi16
(
_mm_srli_epi16
(
s0
,
2
),
zero
);
_mm_storel_epi64
((
__m128i
*
)
D
,
s0
);
}
}
else
if
(
cn
==
3
)
for
(
;
dx
<
w
-
6
;
dx
+=
6
,
S0
+=
12
,
S1
+=
12
,
D
+=
6
)
{
__m128i
r0
=
_mm_loadu_si128
((
const
__m128i
*
)
S0
);
__m128i
r1
=
_mm_loadu_si128
((
const
__m128i
*
)
S1
);
__m128i
r0_16l
=
_mm_unpacklo_epi8
(
r0
,
zero
);
__m128i
r0_16h
=
_mm_unpacklo_epi8
(
_mm_srli_si128
(
r0
,
6
),
zero
);
__m128i
r1_16l
=
_mm_unpacklo_epi8
(
r1
,
zero
);
__m128i
r1_16h
=
_mm_unpacklo_epi8
(
_mm_srli_si128
(
r1
,
6
),
zero
);
__m128i
s0
=
_mm_add_epi16
(
r0_16l
,
_mm_srli_si128
(
r0_16l
,
6
));
__m128i
s1
=
_mm_add_epi16
(
r1_16l
,
_mm_srli_si128
(
r1_16l
,
6
));
s0
=
_mm_add_epi16
(
s1
,
_mm_add_epi16
(
s0
,
delta2
));
s0
=
_mm_packus_epi16
(
_mm_srli_epi16
(
s0
,
2
),
zero
);
_mm_storel_epi64
((
__m128i
*
)
D
,
s0
);
s0
=
_mm_add_epi16
(
r0_16h
,
_mm_srli_si128
(
r0_16h
,
6
));
s1
=
_mm_add_epi16
(
r1_16h
,
_mm_srli_si128
(
r1_16h
,
6
));
s0
=
_mm_add_epi16
(
s1
,
_mm_add_epi16
(
s0
,
delta2
));
s0
=
_mm_packus_epi16
(
_mm_srli_epi16
(
s0
,
2
),
zero
);
_mm_storel_epi64
((
__m128i
*
)(
D
+
3
),
s0
);
}
else
{
CV_Assert
(
cn
==
4
);
for
(
;
dx
<
w
-
8
;
dx
+=
8
,
S0
+=
16
,
S1
+=
16
,
D
+=
8
)
{
__m128i
r0
=
_mm_loadu_si128
((
const
__m128i
*
)
S0
);
__m128i
r1
=
_mm_loadu_si128
((
const
__m128i
*
)
S1
);
__m128i
r0_16l
=
_mm_unpacklo_epi8
(
r0
,
zero
);
__m128i
r0_16h
=
_mm_unpackhi_epi8
(
r0
,
zero
);
__m128i
r1_16l
=
_mm_unpacklo_epi8
(
r1
,
zero
);
__m128i
r1_16h
=
_mm_unpackhi_epi8
(
r1
,
zero
);
__m128i
s0
=
_mm_add_epi16
(
r0_16l
,
_mm_srli_si128
(
r0_16l
,
8
));
__m128i
s1
=
_mm_add_epi16
(
r1_16l
,
_mm_srli_si128
(
r1_16l
,
8
));
s0
=
_mm_add_epi16
(
s1
,
_mm_add_epi16
(
s0
,
delta2
));
s0
=
_mm_packus_epi16
(
_mm_srli_epi16
(
s0
,
2
),
zero
);
_mm_storel_epi64
((
__m128i
*
)
D
,
s0
);
s0
=
_mm_add_epi16
(
r0_16h
,
_mm_srli_si128
(
r0_16h
,
8
));
s1
=
_mm_add_epi16
(
r1_16h
,
_mm_srli_si128
(
r1_16h
,
8
));
s0
=
_mm_add_epi16
(
s1
,
_mm_add_epi16
(
s0
,
delta2
));
s0
=
_mm_packus_epi16
(
_mm_srli_epi16
(
s0
,
2
),
zero
);
_mm_storel_epi64
((
__m128i
*
)(
D
+
4
),
s0
);
}
}
return
dx
;
}
private:
int
cn
;
bool
use_simd
;
int
step
;
};
class
ResizeAreaFastVec_SIMD_16u
{
public:
ResizeAreaFastVec_SIMD_16u
(
int
_cn
,
int
_step
)
:
cn
(
_cn
),
step
(
_step
)
{
use_simd
=
checkHardwareSupport
(
CV_CPU_SSE2
);
}
int
operator
()
(
const
ushort
*
S
,
ushort
*
D
,
int
w
)
const
{
if
(
!
use_simd
)
return
0
;
int
dx
=
0
;
const
ushort
*
S0
=
(
const
ushort
*
)
S
;
const
ushort
*
S1
=
(
const
ushort
*
)((
const
uchar
*
)(
S
)
+
step
);
__m128i
masklow
=
_mm_set1_epi32
(
0x0000ffff
);
__m128i
zero
=
_mm_setzero_si128
();
__m128i
delta2
=
_mm_set1_epi32
(
2
);
#define _mm_packus_epi32(a, zero) _mm_packs_epi32(_mm_srai_epi32(_mm_slli_epi32(a, 16), 16), zero)
if
(
cn
==
1
)
{
for
(
;
dx
<
w
-
4
;
dx
+=
4
,
S0
+=
8
,
S1
+=
8
,
D
+=
4
)
{
__m128i
r0
=
_mm_loadu_si128
((
const
__m128i
*
)
S0
);
__m128i
r1
=
_mm_loadu_si128
((
const
__m128i
*
)
S1
);
__m128i
s0
=
_mm_add_epi32
(
_mm_srli_epi32
(
r0
,
16
),
_mm_and_si128
(
r0
,
masklow
));
__m128i
s1
=
_mm_add_epi32
(
_mm_srli_epi32
(
r1
,
16
),
_mm_and_si128
(
r1
,
masklow
));
s0
=
_mm_add_epi32
(
_mm_add_epi32
(
s0
,
s1
),
delta2
);
s0
=
_mm_srli_epi32
(
s0
,
2
);
s0
=
_mm_packus_epi32
(
s0
,
zero
);
_mm_storel_epi64
((
__m128i
*
)
D
,
s0
);
}
}
else
if
(
cn
==
3
)
for
(
;
dx
<
w
-
3
;
dx
+=
3
,
S0
+=
6
,
S1
+=
6
,
D
+=
3
)
{
__m128i
r0
=
_mm_loadu_si128
((
const
__m128i
*
)
S0
);
__m128i
r1
=
_mm_loadu_si128
((
const
__m128i
*
)
S1
);
__m128i
r0_16l
=
_mm_unpacklo_epi16
(
r0
,
zero
);
__m128i
r0_16h
=
_mm_unpacklo_epi16
(
_mm_srli_si128
(
r0
,
6
),
zero
);
__m128i
r1_16l
=
_mm_unpacklo_epi16
(
r1
,
zero
);
__m128i
r1_16h
=
_mm_unpacklo_epi16
(
_mm_srli_si128
(
r1
,
6
),
zero
);
__m128i
s0
=
_mm_add_epi16
(
r0_16l
,
r0_16h
);
__m128i
s1
=
_mm_add_epi16
(
r1_16l
,
r1_16h
);
s0
=
_mm_add_epi32
(
s1
,
_mm_add_epi32
(
s0
,
delta2
));
s0
=
_mm_packus_epi32
(
_mm_srli_epi32
(
s0
,
2
),
zero
);
_mm_storel_epi64
((
__m128i
*
)
D
,
s0
);
}
else
{
CV_Assert
(
cn
==
4
);
for
(
;
dx
<
w
-
4
;
dx
+=
4
,
S0
+=
8
,
S1
+=
8
,
D
+=
4
)
{
__m128i
r0
=
_mm_loadu_si128
((
const
__m128i
*
)
S0
);
__m128i
r1
=
_mm_loadu_si128
((
const
__m128i
*
)
S1
);
__m128i
r0_32l
=
_mm_unpacklo_epi16
(
r0
,
zero
);
__m128i
r0_32h
=
_mm_unpackhi_epi16
(
r0
,
zero
);
__m128i
r1_32l
=
_mm_unpacklo_epi16
(
r1
,
zero
);
__m128i
r1_32h
=
_mm_unpackhi_epi16
(
r1
,
zero
);
__m128i
s0
=
_mm_add_epi32
(
r0_32l
,
r0_32h
);
__m128i
s1
=
_mm_add_epi32
(
r1_32l
,
r1_32h
);
s0
=
_mm_add_epi32
(
s1
,
_mm_add_epi32
(
s0
,
delta2
));
s0
=
_mm_packus_epi32
(
_mm_srli_epi32
(
s0
,
2
),
zero
);
_mm_storel_epi64
((
__m128i
*
)
D
,
s0
);
}
}
#undef _mm_packus_epi32
return
dx
;
}
private:
int
cn
;
int
step
;
bool
use_simd
;
};
#else
typedef
ResizeAreaFastNoVec
<
uchar
,
uchar
>
ResizeAreaFastVec_SIMD_8u
;
typedef
ResizeAreaFastNoVec
<
ushort
,
ushort
>
ResizeAreaFastVec_SIMD_16u
;
#endif
template
<
typename
T
,
typename
SIMDVecOp
>
struct
ResizeAreaFastVec
{
ResizeAreaFastVec
(
int
_scale_x
,
int
_scale_y
,
int
_cn
,
int
_step
/*, const int* _ofs*/
)
:
scale_x
(
_scale_x
),
scale_y
(
_scale_y
),
cn
(
_cn
),
step
(
_step
)
/*, ofs(_ofs)*/
ResizeAreaFastVec
(
int
_scale_x
,
int
_scale_y
,
int
_cn
,
int
_step
)
:
scale_x
(
_scale_x
),
scale_y
(
_scale_y
),
cn
(
_cn
),
step
(
_step
)
,
vecOp
(
_cn
,
_step
)
{
fast_mode
=
scale_x
==
2
&&
scale_y
==
2
&&
(
cn
==
1
||
cn
==
3
||
cn
==
4
);
}
int
operator
()
(
const
T
*
S
,
T
*
D
,
int
w
)
const
{
if
(
!
fast_mode
)
if
(
!
fast_mode
)
return
0
;
const
T
*
nextS
=
(
const
T
*
)((
const
uchar
*
)
S
+
step
);
int
dx
=
0
;
int
dx
=
vecOp
(
S
,
D
,
w
)
;
if
(
cn
==
1
)
for
(
;
dx
<
w
;
++
dx
)
...
...
@@ -1279,7 +1469,7 @@ struct ResizeAreaFastVec
}
else
{
a
ssert
(
cn
==
4
);
CV_A
ssert
(
cn
==
4
);
for
(
;
dx
<
w
;
dx
+=
4
)
{
int
index
=
dx
*
2
;
...
...
@@ -1298,6 +1488,7 @@ private:
int
cn
;
bool
fast_mode
;
int
step
;
SIMDVecOp
vecOp
;
};
template
<
typename
T
,
typename
WT
,
typename
VecOp
>
...
...
@@ -1702,10 +1893,10 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
static
ResizeAreaFastFunc
areafast_tab
[]
=
{
resizeAreaFast_
<
uchar
,
int
,
ResizeAreaFastVec
<
uchar
>
>
,
resizeAreaFast_
<
uchar
,
int
,
ResizeAreaFastVec
<
uchar
,
ResizeAreaFastVec_SIMD_8u
>
>
,
0
,
resizeAreaFast_
<
ushort
,
float
,
ResizeAreaFastVec
<
ushort
>
>
,
resizeAreaFast_
<
short
,
float
,
ResizeAreaFastVec
<
short
>
>
,
resizeAreaFast_
<
ushort
,
float
,
ResizeAreaFastVec
<
ushort
,
ResizeAreaFastVec_SIMD_16u
>
>
,
resizeAreaFast_
<
short
,
float
,
ResizeAreaFastVec
<
short
,
ResizeAreaFastNoVec
<
short
,
float
>
>
>
,
0
,
resizeAreaFast_
<
float
,
float
,
ResizeAreaFastNoVec
<
float
,
float
>
>
,
resizeAreaFast_
<
double
,
double
,
ResizeAreaFastNoVec
<
double
,
double
>
>
,
...
...
@@ -1764,9 +1955,7 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
// in case of scale_x && scale_y is equal to 2
// INTER_AREA (fast) also is equal to INTER_LINEAR
if
(
interpolation
==
INTER_LINEAR
&&
is_area_fast
&&
iscale_x
==
2
&&
iscale_y
==
2
)
{
interpolation
=
INTER_AREA
;
}
// true "area" interpolation is only implemented for the case (scale_x <= 1 && scale_y <= 1).
// In other cases it is emulated using some variant of bilinear interpolation
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录