Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
dff8e29f
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
dff8e29f
编写于
12月 19, 2019
作者:
A
Alexander Alekhin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #16139 from alalek:core_flip_avoid_unaligned
上级
4733a19b
8d22ac20
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
102 addition
and
12 deletion
+102
-12
modules/core/include/opencv2/core/utility.hpp
modules/core/include/opencv2/core/utility.hpp
+37
-0
modules/core/src/copy.cpp
modules/core/src/copy.cpp
+65
-12
未找到文件。
modules/core/include/opencv2/core/utility.hpp
浏览文件 @
dff8e29f
...
...
@@ -514,6 +514,43 @@ static inline size_t roundUp(size_t a, unsigned int b)
return
a
+
b
-
1
-
(
a
+
b
-
1
)
%
b
;
}
/** @brief Alignment check of passed values
Usage: `isAligned<sizeof(int)>(...)`
@note Alignment(N) must be a power of 2 (2**k, 2^k)
*/
template
<
int
N
,
typename
T
>
static
inline
bool
isAligned
(
const
T
&
data
)
{
CV_StaticAssert
((
N
&
(
N
-
1
))
==
0
,
""
);
// power of 2
return
(((
size_t
)
data
)
&
(
N
-
1
))
==
0
;
}
/** @overload */
template
<
int
N
>
static
inline
bool
isAligned
(
const
void
*
p1
)
{
return
isAligned
<
N
>
((
size_t
)
p1
);
}
/** @overload */
template
<
int
N
>
static
inline
bool
isAligned
(
const
void
*
p1
,
const
void
*
p2
)
{
return
isAligned
<
N
>
(((
size_t
)
p1
)
|
((
size_t
)
p2
));
}
/** @overload */
template
<
int
N
>
static
inline
bool
isAligned
(
const
void
*
p1
,
const
void
*
p2
,
const
void
*
p3
)
{
return
isAligned
<
N
>
(((
size_t
)
p1
)
|
((
size_t
)
p2
)
|
((
size_t
)
p3
));
}
/** @overload */
template
<
int
N
>
static
inline
bool
isAligned
(
const
void
*
p1
,
const
void
*
p2
,
const
void
*
p3
,
const
void
*
p4
)
{
return
isAligned
<
N
>
(((
size_t
)
p1
)
|
((
size_t
)
p2
)
|
((
size_t
)
p3
)
|
((
size_t
)
p4
));
}
/** @brief Enables or disables the optimized code.
The function can be used to dynamically turn on and off optimized dispatched code (code that uses SSE4.2, AVX/AVX2,
...
...
modules/core/src/copy.cpp
浏览文件 @
dff8e29f
...
...
@@ -563,6 +563,12 @@ Mat& Mat::setTo(InputArray _value, InputArray _mask)
return
*
this
;
}
#if CV_NEON && !defined(__aarch64__)
#define CV_CHECK_ALIGNMENT 1
#else
#define CV_CHECK_ALIGNMENT 0
#endif
#if CV_SIMD128
template
<
typename
V
>
CV_ALWAYS_INLINE
void
flipHoriz_single
(
const
uchar
*
src
,
size_t
sstep
,
uchar
*
dst
,
size_t
dstep
,
Size
size
,
size_t
esz
)
{
...
...
@@ -572,6 +578,10 @@ template<typename V> CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, s
int
width_1
=
width
&
-
v_uint8x16
::
nlanes
;
int
i
,
j
;
#if CV_CHECK_ALIGNMENT
CV_Assert
(
isAligned
<
sizeof
(
T
)
>
(
src
,
dst
));
#endif
for
(
;
size
.
height
--
;
src
+=
sstep
,
dst
+=
dstep
)
{
for
(
i
=
0
,
j
=
end
;
i
<
width_1
;
i
+=
v_uint8x16
::
nlanes
,
j
-=
v_uint8x16
::
nlanes
)
...
...
@@ -585,7 +595,7 @@ template<typename V> CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, s
v_store
((
T
*
)(
dst
+
j
-
v_uint8x16
::
nlanes
),
t0
);
v_store
((
T
*
)(
dst
+
i
),
t1
);
}
if
(
((
size_t
)
src
|
(
size_t
)
dst
)
%
sizeof
(
T
)
==
0
)
if
(
isAligned
<
sizeof
(
T
)
>
(
src
,
dst
)
)
{
for
(
;
i
<
width
;
i
+=
sizeof
(
T
),
j
-=
sizeof
(
T
)
)
{
...
...
@@ -620,6 +630,11 @@ template<typename T1, typename T2> CV_ALWAYS_INLINE void flipHoriz_double( const
int
end
=
(
int
)(
size
.
width
*
esz
);
int
width
=
(
end
+
1
)
/
2
;
#if CV_CHECK_ALIGNMENT
CV_Assert
(
isAligned
<
sizeof
(
T1
)
>
(
src
,
dst
));
CV_Assert
(
isAligned
<
sizeof
(
T2
)
>
(
src
,
dst
));
#endif
for
(
;
size
.
height
--
;
src
+=
sstep
,
dst
+=
dstep
)
{
for
(
int
i
=
0
,
j
=
end
;
i
<
width
;
i
+=
sizeof
(
T1
)
+
sizeof
(
T2
),
j
-=
sizeof
(
T1
)
+
sizeof
(
T2
)
)
...
...
@@ -644,6 +659,9 @@ static void
flipHoriz
(
const
uchar
*
src
,
size_t
sstep
,
uchar
*
dst
,
size_t
dstep
,
Size
size
,
size_t
esz
)
{
#if CV_SIMD
#if CV_CHECK_ALIGNMENT
size_t
alignmentMark
=
((
size_t
)
src
)
|
((
size_t
)
dst
)
|
sstep
|
dstep
;
#endif
if
(
esz
==
2
*
v_uint8x16
::
nlanes
)
{
int
end
=
(
int
)(
size
.
width
*
esz
);
...
...
@@ -693,15 +711,27 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size,
}
}
}
else
if
(
esz
==
8
)
else
if
(
esz
==
8
#if CV_CHECK_ALIGNMENT
&&
isAligned
<
sizeof
(
uint64
)
>
(
alignmentMark
)
#endif
)
{
flipHoriz_single
<
v_uint64x2
>
(
src
,
sstep
,
dst
,
dstep
,
size
,
esz
);
}
else
if
(
esz
==
4
)
else
if
(
esz
==
4
#if CV_CHECK_ALIGNMENT
&&
isAligned
<
sizeof
(
unsigned
)
>
(
alignmentMark
)
#endif
)
{
flipHoriz_single
<
v_uint32x4
>
(
src
,
sstep
,
dst
,
dstep
,
size
,
esz
);
}
else
if
(
esz
==
2
)
else
if
(
esz
==
2
#if CV_CHECK_ALIGNMENT
&&
isAligned
<
sizeof
(
ushort
)
>
(
alignmentMark
)
#endif
)
{
flipHoriz_single
<
v_uint16x8
>
(
src
,
sstep
,
dst
,
dstep
,
size
,
esz
);
}
...
...
@@ -709,7 +739,11 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size,
{
flipHoriz_single
<
v_uint8x16
>
(
src
,
sstep
,
dst
,
dstep
,
size
,
esz
);
}
else
if
(
esz
==
24
)
else
if
(
esz
==
24
#if CV_CHECK_ALIGNMENT
&&
isAligned
<
sizeof
(
uint64_t
)
>
(
alignmentMark
)
#endif
)
{
int
end
=
(
int
)(
size
.
width
*
esz
);
int
width
=
(
end
+
1
)
/
2
;
...
...
@@ -732,6 +766,7 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size,
}
}
}
#if !CV_CHECK_ALIGNMENT
else
if
(
esz
==
12
)
{
flipHoriz_double
<
uint64_t
,
uint
>
(
src
,
sstep
,
dst
,
dstep
,
size
,
esz
);
...
...
@@ -744,8 +779,9 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size,
{
flipHoriz_double
<
ushort
,
uchar
>
(
src
,
sstep
,
dst
,
dstep
,
size
,
esz
);
}
else
#endif
else
#endif // CV_SIMD
{
int
i
,
j
,
limit
=
(
int
)(((
size
.
width
+
1
)
/
2
)
*
esz
);
AutoBuffer
<
int
>
_tab
(
size
.
width
*
esz
);
...
...
@@ -779,16 +815,33 @@ flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size,
{
int
i
=
0
;
#if CV_SIMD
for
(
;
i
<=
size
.
width
-
(
v_int32
::
nlanes
*
4
);
i
+=
v_int32
::
nlanes
*
4
)
#if CV_CHECK_ALIGNMENT
if
(
isAligned
<
sizeof
(
int
)
>
(
src0
,
src1
,
dst0
,
dst1
))
#endif
{
for
(;
i
<=
size
.
width
-
CV_SIMD_WIDTH
;
i
+=
CV_SIMD_WIDTH
)
{
v_int32
t0
=
vx_load
((
int
*
)(
src0
+
i
));
v_int32
t1
=
vx_load
((
int
*
)(
src1
+
i
));
vx_store
((
int
*
)(
dst0
+
i
),
t1
);
vx_store
((
int
*
)(
dst1
+
i
),
t0
);
}
}
#if CV_CHECK_ALIGNMENT
else
{
for
(;
i
<=
size
.
width
-
CV_SIMD_WIDTH
;
i
+=
CV_SIMD_WIDTH
)
{
v_uint8
t0
=
vx_load
(
src0
+
i
);
v_uint8
t1
=
vx_load
(
src1
+
i
);
vx_store
(
dst0
+
i
,
t1
);
vx_store
(
dst1
+
i
,
t0
);
}
}
#endif
#endif
if
(
((
size_t
)
src0
|
(
size_t
)
dst0
|
(
size_t
)
src1
|
(
size_t
)
dst1
)
%
sizeof
(
int
)
==
0
)
if
(
isAligned
<
sizeof
(
int
)
>
(
src0
,
src1
,
dst0
,
dst1
)
)
{
for
(
;
i
<=
size
.
width
-
16
;
i
+=
16
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录