Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
1ca35b74
O
Opencv
项目概览
Greenplum
/
Opencv
大约 1 年 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
1ca35b74
编写于
1月 12, 2015
作者:
I
Ilya Lavrenov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
resize are fast
上级
56f3c927
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
144 addition
and
3 deletion
+144
-3
modules/imgproc/src/imgwarp.cpp
modules/imgproc/src/imgwarp.cpp
+140
-2
modules/imgproc/test/test_imgwarp.cpp
modules/imgproc/test/test_imgwarp.cpp
+4
-1
未找到文件。
modules/imgproc/src/imgwarp.cpp
浏览文件 @
1ca35b74
...
...
@@ -2199,8 +2199,146 @@ private:
bool
use_simd
;
};
typedef
ResizeAreaFastNoVec
<
short
,
short
>
ResizeAreaFastVec_SIMD_16s
;
typedef
ResizeAreaFastNoVec
<
float
,
float
>
ResizeAreaFastVec_SIMD_32f
;
class
ResizeAreaFastVec_SIMD_16s
{
public:
ResizeAreaFastVec_SIMD_16s
(
int
_cn
,
int
_step
)
:
cn
(
_cn
),
step
(
_step
)
{
use_simd
=
checkHardwareSupport
(
CV_CPU_SSE2
);
}
int
operator
()
(
const
short
*
S
,
short
*
D
,
int
w
)
const
{
if
(
!
use_simd
)
return
0
;
int
dx
=
0
;
const
short
*
S0
=
(
const
short
*
)
S
;
const
short
*
S1
=
(
const
short
*
)((
const
uchar
*
)(
S
)
+
step
);
__m128i
masklow
=
_mm_set1_epi32
(
0x0000ffff
);
__m128i
zero
=
_mm_setzero_si128
();
__m128i
delta2
=
_mm_set1_epi32
(
2
);
if
(
cn
==
1
)
{
for
(
;
dx
<=
w
-
4
;
dx
+=
4
,
S0
+=
8
,
S1
+=
8
,
D
+=
4
)
{
__m128i
r0
=
_mm_loadu_si128
((
const
__m128i
*
)
S0
);
__m128i
r1
=
_mm_loadu_si128
((
const
__m128i
*
)
S1
);
__m128i
s0
=
_mm_add_epi32
(
_mm_srai_epi32
(
r0
,
16
),
_mm_srai_epi32
(
_mm_slli_epi32
(
_mm_and_si128
(
r0
,
masklow
),
16
),
16
));
__m128i
s1
=
_mm_add_epi32
(
_mm_srai_epi32
(
r1
,
16
),
_mm_srai_epi32
(
_mm_slli_epi32
(
_mm_and_si128
(
r1
,
masklow
),
16
),
16
));
s0
=
_mm_add_epi32
(
_mm_add_epi32
(
s0
,
s1
),
delta2
);
s0
=
_mm_srai_epi32
(
s0
,
2
);
s0
=
_mm_packs_epi32
(
s0
,
zero
);
_mm_storel_epi64
((
__m128i
*
)
D
,
s0
);
}
}
else
if
(
cn
==
3
)
for
(
;
dx
<=
w
-
4
;
dx
+=
3
,
S0
+=
6
,
S1
+=
6
,
D
+=
3
)
{
__m128i
r0
=
_mm_loadu_si128
((
const
__m128i
*
)
S0
);
__m128i
r1
=
_mm_loadu_si128
((
const
__m128i
*
)
S1
);
__m128i
r0_16l
=
_mm_srai_epi32
(
_mm_unpacklo_epi16
(
zero
,
r0
),
16
);
__m128i
r0_16h
=
_mm_srai_epi32
(
_mm_unpacklo_epi16
(
zero
,
_mm_srli_si128
(
r0
,
6
)),
16
);
__m128i
r1_16l
=
_mm_srai_epi32
(
_mm_unpacklo_epi16
(
zero
,
r1
),
16
);
__m128i
r1_16h
=
_mm_srai_epi32
(
_mm_unpacklo_epi16
(
zero
,
_mm_srli_si128
(
r1
,
6
)),
16
);
__m128i
s0
=
_mm_add_epi32
(
r0_16l
,
r0_16h
);
__m128i
s1
=
_mm_add_epi32
(
r1_16l
,
r1_16h
);
s0
=
_mm_add_epi32
(
delta2
,
_mm_add_epi32
(
s0
,
s1
));
s0
=
_mm_packs_epi32
(
_mm_srai_epi32
(
s0
,
2
),
zero
);
_mm_storel_epi64
((
__m128i
*
)
D
,
s0
);
}
else
{
CV_Assert
(
cn
==
4
);
for
(
;
dx
<=
w
-
4
;
dx
+=
4
,
S0
+=
8
,
S1
+=
8
,
D
+=
4
)
{
__m128i
r0
=
_mm_loadu_si128
((
const
__m128i
*
)
S0
);
__m128i
r1
=
_mm_loadu_si128
((
const
__m128i
*
)
S1
);
__m128i
r0_32l
=
_mm_srai_epi32
(
_mm_unpacklo_epi16
(
zero
,
r0
),
16
);
__m128i
r0_32h
=
_mm_srai_epi32
(
_mm_unpackhi_epi16
(
zero
,
r0
),
16
);
__m128i
r1_32l
=
_mm_srai_epi32
(
_mm_unpacklo_epi16
(
zero
,
r1
),
16
);
__m128i
r1_32h
=
_mm_srai_epi32
(
_mm_unpackhi_epi16
(
zero
,
r1
),
16
);
__m128i
s0
=
_mm_add_epi32
(
r0_32l
,
r0_32h
);
__m128i
s1
=
_mm_add_epi32
(
r1_32l
,
r1_32h
);
s0
=
_mm_add_epi32
(
s1
,
_mm_add_epi32
(
s0
,
delta2
));
s0
=
_mm_packs_epi32
(
_mm_srai_epi32
(
s0
,
2
),
zero
);
_mm_storel_epi64
((
__m128i
*
)
D
,
s0
);
}
}
return
dx
;
}
private:
int
cn
;
int
step
;
bool
use_simd
;
};
struct
ResizeAreaFastVec_SIMD_32f
{
ResizeAreaFastVec_SIMD_32f
(
int
_scale_x
,
int
_scale_y
,
int
_cn
,
int
_step
)
:
scale_x
(
_scale_x
),
scale_y
(
_scale_y
),
cn
(
_cn
),
step
(
_step
)
{
fast_mode
=
scale_x
==
2
&&
scale_y
==
2
&&
(
cn
==
1
||
cn
==
3
||
cn
==
4
);
}
int
operator
()
(
const
float
*
S
,
float
*
D
,
int
w
)
const
{
if
(
!
fast_mode
)
return
0
;
const
float
*
S0
=
S
,
*
S1
=
(
const
float
*
)((
const
uchar
*
)(
S0
)
+
step
);
int
dx
=
0
;
__m128
v_025
=
_mm_set1_ps
(
0.25
f
);
if
(
cn
==
1
)
{
int
shuffle_lo
=
_MM_SHUFFLE
(
2
,
0
,
2
,
0
),
shuffle_hi
=
_MM_SHUFFLE
(
3
,
1
,
3
,
1
);
for
(
;
dx
<=
w
-
4
;
dx
+=
4
,
S0
+=
8
,
S1
+=
8
,
D
+=
4
)
{
__m128
v_row00
=
_mm_loadu_ps
(
S0
),
v_row01
=
_mm_loadu_ps
(
S0
+
4
),
v_row10
=
_mm_loadu_ps
(
S1
),
v_row11
=
_mm_loadu_ps
(
S1
+
4
);
__m128
v_dst0
=
_mm_add_ps
(
_mm_shuffle_ps
(
v_row00
,
v_row01
,
shuffle_lo
),
_mm_shuffle_ps
(
v_row00
,
v_row01
,
shuffle_hi
));
__m128
v_dst1
=
_mm_add_ps
(
_mm_shuffle_ps
(
v_row10
,
v_row11
,
shuffle_lo
),
_mm_shuffle_ps
(
v_row10
,
v_row11
,
shuffle_hi
));
_mm_storeu_ps
(
D
,
_mm_mul_ps
(
_mm_add_ps
(
v_dst0
,
v_dst1
),
v_025
));
}
}
else
if
(
cn
==
4
)
{
for
(
;
dx
<=
w
-
4
;
dx
+=
4
,
S0
+=
8
,
S1
+=
8
,
D
+=
4
)
{
__m128
v_dst0
=
_mm_add_ps
(
_mm_loadu_ps
(
S0
),
_mm_loadu_ps
(
S0
+
4
));
__m128
v_dst1
=
_mm_add_ps
(
_mm_loadu_ps
(
S1
),
_mm_loadu_ps
(
S1
+
4
));
_mm_storeu_ps
(
D
,
_mm_mul_ps
(
_mm_add_ps
(
v_dst0
,
v_dst1
),
v_025
));
}
}
return
dx
;
}
private:
int
scale_x
,
scale_y
;
int
cn
;
bool
fast_mode
;
int
step
;
};
#else
...
...
modules/imgproc/test/test_imgwarp.cpp
浏览文件 @
1ca35b74
...
...
@@ -1595,7 +1595,10 @@ void resizeArea(const cv::Mat & src, cv::Mat & dst)
TEST
(
Resize
,
Area_half
)
{
const
int
size
=
1000
;
int
types
[]
=
{
CV_8UC1
,
CV_8UC4
,
CV_16UC1
,
CV_16UC4
,
CV_16SC1
,
CV_16SC4
,
CV_32FC1
,
CV_32FC4
};
int
types
[]
=
{
CV_8UC1
,
CV_8UC4
,
CV_16UC1
,
CV_16UC4
,
CV_16SC1
,
CV_16SC3
,
CV_16SC4
,
CV_32FC1
,
CV_32FC4
};
cv
::
RNG
rng
(
17
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录