Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenCV
opencv
提交
67faf161
O
opencv
项目概览
OpenCV
/
opencv
上一次同步 9 个月
通知
993
Star
71100
Fork
55581
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
67faf161
编写于
7月 03, 2023
作者:
A
Alexander Alekhin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #23885 from hanliutong:UniversalIntrinsicRewriter
上级
377be68d
d1750705
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
33 addition
and
33 deletion
+33
-33
modules/core/src/mean.simd.hpp
modules/core/src/mean.simd.hpp
+33
-33
未找到文件。
modules/core/src/mean.simd.hpp
浏览文件 @
67faf161
...
...
@@ -24,7 +24,7 @@ struct SumSqr_SIMD
}
};
#if CV_SIMD
#if CV_SIMD
|| CV_SIMD_SCALABLE
template
<
>
struct
SumSqr_SIMD
<
uchar
,
int
,
int
>
...
...
@@ -39,37 +39,37 @@ struct SumSqr_SIMD<uchar, int, int>
v_int32
v_sum
=
vx_setzero_s32
();
v_int32
v_sqsum
=
vx_setzero_s32
();
const
int
len0
=
len
&
-
v_uint8
::
nlanes
;
const
int
len0
=
len
&
-
VTraits
<
v_uint8
>::
vlanes
()
;
while
(
x
<
len0
)
{
const
int
len_tmp
=
min
(
x
+
256
*
v_uint16
::
nlanes
,
len0
);
const
int
len_tmp
=
min
(
x
+
256
*
VTraits
<
v_uint16
>::
vlanes
()
,
len0
);
v_uint16
v_sum16
=
vx_setzero_u16
();
for
(
;
x
<
len_tmp
;
x
+=
v_uint8
::
nlanes
)
for
(
;
x
<
len_tmp
;
x
+=
VTraits
<
v_uint8
>::
vlanes
()
)
{
v_uint16
v_src0
=
vx_load_expand
(
src0
+
x
);
v_uint16
v_src1
=
vx_load_expand
(
src0
+
x
+
v_uint16
::
nlanes
);
v_sum16
+=
v_src0
+
v_src1
;
v_uint16
v_src1
=
vx_load_expand
(
src0
+
x
+
VTraits
<
v_uint16
>::
vlanes
()
);
v_sum16
=
v_add
(
v_sum16
,
v_add
(
v_src0
,
v_src1
))
;
v_int16
v_tmp0
,
v_tmp1
;
v_zip
(
v_reinterpret_as_s16
(
v_src0
),
v_reinterpret_as_s16
(
v_src1
),
v_tmp0
,
v_tmp1
);
v_sqsum
+=
v_dotprod
(
v_tmp0
,
v_tmp0
)
+
v_dotprod
(
v_tmp1
,
v_tmp1
);
v_sqsum
=
v_add
(
v_sqsum
,
v_add
(
v_dotprod
(
v_tmp0
,
v_tmp0
),
v_dotprod
(
v_tmp1
,
v_tmp1
))
);
}
v_uint32
v_half0
,
v_half1
;
v_expand
(
v_sum16
,
v_half0
,
v_half1
);
v_sum
+=
v_reinterpret_as_s32
(
v_half0
+
v_half1
);
v_sum
=
v_add
(
v_sum
,
v_reinterpret_as_s32
(
v_add
(
v_half0
,
v_half1
))
);
}
if
(
x
<=
len
-
v_uint16
::
nlanes
)
if
(
x
<=
len
-
VTraits
<
v_uint16
>::
vlanes
()
)
{
v_uint16
v_src
=
vx_load_expand
(
src0
+
x
);
v_uint16
v_half
=
v_combine_high
(
v_src
,
v_src
);
v_uint32
v_tmp0
,
v_tmp1
;
v_expand
(
v_
src
+
v_half
,
v_tmp0
,
v_tmp1
);
v_sum
+=
v_reinterpret_as_s32
(
v_tmp0
);
v_expand
(
v_
add
(
v_src
,
v_half
)
,
v_tmp0
,
v_tmp1
);
v_sum
=
v_add
(
v_sum
,
v_reinterpret_as_s32
(
v_tmp0
)
);
v_int16
v_tmp2
,
v_tmp3
;
v_zip
(
v_reinterpret_as_s16
(
v_src
),
v_reinterpret_as_s16
(
v_half
),
v_tmp2
,
v_tmp3
);
v_sqsum
+=
v_dotprod
(
v_tmp2
,
v_tmp2
);
x
+=
v_uint16
::
nlanes
;
v_sqsum
=
v_add
(
v_sqsum
,
v_dotprod
(
v_tmp2
,
v_tmp2
)
);
x
+=
VTraits
<
v_uint16
>::
vlanes
()
;
}
if
(
cn
==
1
)
...
...
@@ -79,13 +79,13 @@ struct SumSqr_SIMD<uchar, int, int>
}
else
{
int
CV_DECL_ALIGNED
(
CV_SIMD_WIDTH
)
ar
[
2
*
v_int32
::
nlanes
];
int
CV_DECL_ALIGNED
(
CV_SIMD_WIDTH
)
ar
[
2
*
VTraits
<
v_int32
>::
max_
nlanes
];
v_store
(
ar
,
v_sum
);
v_store
(
ar
+
v_int32
::
nlanes
,
v_sqsum
);
for
(
int
i
=
0
;
i
<
v_int32
::
nlanes
;
++
i
)
v_store
(
ar
+
VTraits
<
v_int32
>::
vlanes
()
,
v_sqsum
);
for
(
int
i
=
0
;
i
<
VTraits
<
v_int32
>::
vlanes
()
;
++
i
)
{
sum
[
i
%
cn
]
+=
ar
[
i
];
sqsum
[
i
%
cn
]
+=
ar
[
v_int32
::
nlanes
+
i
];
sqsum
[
i
%
cn
]
+=
ar
[
VTraits
<
v_int32
>::
vlanes
()
+
i
];
}
}
v_cleanup
();
...
...
@@ -106,37 +106,37 @@ struct SumSqr_SIMD<schar, int, int>
v_int32
v_sum
=
vx_setzero_s32
();
v_int32
v_sqsum
=
vx_setzero_s32
();
const
int
len0
=
len
&
-
v_int8
::
nlanes
;
const
int
len0
=
len
&
-
VTraits
<
v_int8
>::
vlanes
()
;
while
(
x
<
len0
)
{
const
int
len_tmp
=
min
(
x
+
256
*
v_int16
::
nlanes
,
len0
);
const
int
len_tmp
=
min
(
x
+
256
*
VTraits
<
v_int16
>::
vlanes
()
,
len0
);
v_int16
v_sum16
=
vx_setzero_s16
();
for
(;
x
<
len_tmp
;
x
+=
v_int8
::
nlanes
)
for
(;
x
<
len_tmp
;
x
+=
VTraits
<
v_int8
>::
vlanes
()
)
{
v_int16
v_src0
=
vx_load_expand
(
src0
+
x
);
v_int16
v_src1
=
vx_load_expand
(
src0
+
x
+
v_int16
::
nlanes
);
v_sum16
+=
v_src0
+
v_src1
;
v_int16
v_src1
=
vx_load_expand
(
src0
+
x
+
VTraits
<
v_int16
>::
vlanes
()
);
v_sum16
=
v_add
(
v_sum16
,
v_add
(
v_src0
,
v_src1
))
;
v_int16
v_tmp0
,
v_tmp1
;
v_zip
(
v_src0
,
v_src1
,
v_tmp0
,
v_tmp1
);
v_sqsum
+=
v_dotprod
(
v_tmp0
,
v_tmp0
)
+
v_dotprod
(
v_tmp1
,
v_tmp1
);
v_sqsum
=
v_add
(
v_sqsum
,
v_add
(
v_dotprod
(
v_tmp0
,
v_tmp0
),
v_dotprod
(
v_tmp1
,
v_tmp1
))
);
}
v_int32
v_half0
,
v_half1
;
v_expand
(
v_sum16
,
v_half0
,
v_half1
);
v_sum
+=
v_half0
+
v_half1
;
v_sum
=
v_add
(
v_sum
,
v_add
(
v_half0
,
v_half1
))
;
}
if
(
x
<=
len
-
v_int16
::
nlanes
)
if
(
x
<=
len
-
VTraits
<
v_int16
>::
vlanes
()
)
{
v_int16
v_src
=
vx_load_expand
(
src0
+
x
);
v_int16
v_half
=
v_combine_high
(
v_src
,
v_src
);
v_int32
v_tmp0
,
v_tmp1
;
v_expand
(
v_
src
+
v_half
,
v_tmp0
,
v_tmp1
);
v_sum
+=
v_tmp0
;
v_expand
(
v_
add
(
v_src
,
v_half
)
,
v_tmp0
,
v_tmp1
);
v_sum
=
v_add
(
v_sum
,
v_tmp0
)
;
v_int16
v_tmp2
,
v_tmp3
;
v_zip
(
v_src
,
v_half
,
v_tmp2
,
v_tmp3
);
v_sqsum
+=
v_dotprod
(
v_tmp2
,
v_tmp2
);
x
+=
v_int16
::
nlanes
;
v_sqsum
=
v_add
(
v_sqsum
,
v_dotprod
(
v_tmp2
,
v_tmp2
)
);
x
+=
VTraits
<
v_int16
>::
vlanes
()
;
}
if
(
cn
==
1
)
...
...
@@ -146,13 +146,13 @@ struct SumSqr_SIMD<schar, int, int>
}
else
{
int
CV_DECL_ALIGNED
(
CV_SIMD_WIDTH
)
ar
[
2
*
v_int32
::
nlanes
];
int
CV_DECL_ALIGNED
(
CV_SIMD_WIDTH
)
ar
[
2
*
VTraits
<
v_int32
>::
max_
nlanes
];
v_store
(
ar
,
v_sum
);
v_store
(
ar
+
v_int32
::
nlanes
,
v_sqsum
);
for
(
int
i
=
0
;
i
<
v_int32
::
nlanes
;
++
i
)
v_store
(
ar
+
VTraits
<
v_int32
>::
vlanes
()
,
v_sqsum
);
for
(
int
i
=
0
;
i
<
VTraits
<
v_int32
>::
vlanes
()
;
++
i
)
{
sum
[
i
%
cn
]
+=
ar
[
i
];
sqsum
[
i
%
cn
]
+=
ar
[
v_int32
::
nlanes
+
i
];
sqsum
[
i
%
cn
]
+=
ar
[
VTraits
<
v_int32
>::
vlanes
()
+
i
];
}
}
v_cleanup
();
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录