Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
2d813910
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
2d813910
编写于
9月 20, 2017
作者:
A
Alexander Alekhin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #9669 from kraj:master
上级
c57aef75
2c7a123e
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
104 addition
and
104 deletion
+104
-104
3rdparty/carotene/src/channel_extract.cpp
3rdparty/carotene/src/channel_extract.cpp
+2
-2
3rdparty/carotene/src/channels_combine.cpp
3rdparty/carotene/src/channels_combine.cpp
+1
-1
3rdparty/carotene/src/colorconvert.cpp
3rdparty/carotene/src/colorconvert.cpp
+39
-39
3rdparty/carotene/src/convert.cpp
3rdparty/carotene/src/convert.cpp
+27
-27
3rdparty/carotene/src/convert_scale.cpp
3rdparty/carotene/src/convert_scale.cpp
+28
-28
3rdparty/carotene/src/gaussian_blur.cpp
3rdparty/carotene/src/gaussian_blur.cpp
+1
-1
3rdparty/carotene/src/pyramid.cpp
3rdparty/carotene/src/pyramid.cpp
+4
-4
3rdparty/carotene/src/scharr.cpp
3rdparty/carotene/src/scharr.cpp
+2
-2
未找到文件。
3rdparty/carotene/src/channel_extract.cpp
浏览文件 @
2d813910
...
...
@@ -231,7 +231,7 @@ void extract4(const Size2D &size,
srcStride == dst2Stride && \
srcStride == dst3Stride &&
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
#define SPLIT_ASM2(sgn, bits) __asm__ ( \
"vld2." #bits " {d0, d2}, [%[in0]] \n\t" \
...
...
@@ -351,7 +351,7 @@ void extract4(const Size2D &size,
} \
}
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
#define ALPHA_QUAD(sgn, bits) { \
internal::prefetch(src + sj); \
...
...
3rdparty/carotene/src/channels_combine.cpp
浏览文件 @
2d813910
...
...
@@ -77,7 +77,7 @@ namespace CAROTENE_NS {
dstStride == src2Stride && \
dstStride == src3Stride &&
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
#define MERGE_ASM2(sgn, bits) __asm__ ( \
"vld1." #bits " {d0-d1}, [%[in0]] \n\t" \
...
...
3rdparty/carotene/src/colorconvert.cpp
浏览文件 @
2d813910
...
...
@@ -97,7 +97,7 @@ void rgb2gray(const Size2D &size, COLOR_SPACE color_space,
const
u32
G2Y
=
color_space
==
COLOR_SPACE_BT601
?
G2Y_BT601
:
G2Y_BT709
;
const
u32
B2Y
=
color_space
==
COLOR_SPACE_BT601
?
B2Y_BT601
:
B2Y_BT709
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
register
int16x4_t
v_r2y
asm
(
"d31"
)
=
vmov_n_s16
(
R2Y
);
register
int16x4_t
v_g2y
asm
(
"d30"
)
=
vmov_n_s16
(
G2Y
);
register
int16x4_t
v_b2y
asm
(
"d29"
)
=
vmov_n_s16
(
B2Y
);
...
...
@@ -116,7 +116,7 @@ void rgb2gray(const Size2D &size, COLOR_SPACE color_space,
u8
*
dst
=
internal
::
getRowPtr
(
dstBase
,
dstStride
,
i
);
size_t
sj
=
0u
,
dj
=
0u
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
for
(;
dj
<
roiw8
;
sj
+=
24
,
dj
+=
8
)
{
internal
::
prefetch
(
src
+
sj
);
...
...
@@ -198,7 +198,7 @@ void rgbx2gray(const Size2D &size, COLOR_SPACE color_space,
const
u32
G2Y
=
color_space
==
COLOR_SPACE_BT601
?
G2Y_BT601
:
G2Y_BT709
;
const
u32
B2Y
=
color_space
==
COLOR_SPACE_BT601
?
B2Y_BT601
:
B2Y_BT709
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
register
int16x4_t
v_r2y
asm
(
"d31"
)
=
vmov_n_s16
(
R2Y
);
register
int16x4_t
v_g2y
asm
(
"d30"
)
=
vmov_n_s16
(
G2Y
);
register
int16x4_t
v_b2y
asm
(
"d29"
)
=
vmov_n_s16
(
B2Y
);
...
...
@@ -217,7 +217,7 @@ void rgbx2gray(const Size2D &size, COLOR_SPACE color_space,
u8
*
dst
=
internal
::
getRowPtr
(
dstBase
,
dstStride
,
i
);
size_t
sj
=
0u
,
dj
=
0u
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
for
(;
dj
<
roiw8
;
sj
+=
32
,
dj
+=
8
)
{
internal
::
prefetch
(
src
+
sj
);
...
...
@@ -300,7 +300,7 @@ void bgr2gray(const Size2D &size, COLOR_SPACE color_space,
const
u32
G2Y
=
color_space
==
COLOR_SPACE_BT601
?
G2Y_BT601
:
G2Y_BT709
;
const
u32
B2Y
=
color_space
==
COLOR_SPACE_BT601
?
B2Y_BT601
:
B2Y_BT709
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
register
int16x4_t
v_r2y
asm
(
"d31"
)
=
vmov_n_s16
(
R2Y
);
register
int16x4_t
v_g2y
asm
(
"d30"
)
=
vmov_n_s16
(
G2Y
);
register
int16x4_t
v_b2y
asm
(
"d29"
)
=
vmov_n_s16
(
B2Y
);
...
...
@@ -319,7 +319,7 @@ void bgr2gray(const Size2D &size, COLOR_SPACE color_space,
u8
*
dst
=
internal
::
getRowPtr
(
dstBase
,
dstStride
,
i
);
size_t
sj
=
0u
,
dj
=
0u
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
for
(;
dj
<
roiw8
;
sj
+=
24
,
dj
+=
8
)
{
internal
::
prefetch
(
src
+
sj
);
...
...
@@ -402,7 +402,7 @@ void bgrx2gray(const Size2D &size, COLOR_SPACE color_space,
const
u32
G2Y
=
color_space
==
COLOR_SPACE_BT601
?
G2Y_BT601
:
G2Y_BT709
;
const
u32
B2Y
=
color_space
==
COLOR_SPACE_BT601
?
B2Y_BT601
:
B2Y_BT709
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
register
int16x4_t
v_r2y
asm
(
"d31"
)
=
vmov_n_s16
(
R2Y
);
register
int16x4_t
v_g2y
asm
(
"d30"
)
=
vmov_n_s16
(
G2Y
);
register
int16x4_t
v_b2y
asm
(
"d29"
)
=
vmov_n_s16
(
B2Y
);
...
...
@@ -421,7 +421,7 @@ void bgrx2gray(const Size2D &size, COLOR_SPACE color_space,
u8
*
dst
=
internal
::
getRowPtr
(
dstBase
,
dstStride
,
i
);
size_t
sj
=
0u
,
dj
=
0u
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
for
(;
dj
<
roiw8
;
sj
+=
32
,
dj
+=
8
)
{
internal
::
prefetch
(
src
+
sj
);
...
...
@@ -512,7 +512,7 @@ void gray2rgb(const Size2D &size,
for
(;
sj
<
roiw16
;
sj
+=
16
,
dj
+=
48
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
(
"vld1.8 {d0-d1}, [%[in0]]
\n\t
"
"vmov.8 q1, q0
\n\t
"
...
...
@@ -538,7 +538,7 @@ void gray2rgb(const Size2D &size,
if
(
sj
<
roiw8
)
{
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
(
"vld1.8 {d0}, [%[in]]
\n\t
"
"vmov.8 d1, d0
\n\t
"
...
...
@@ -584,7 +584,7 @@ void gray2rgbx(const Size2D &size,
size_t
roiw16
=
size
.
width
>=
15
?
size
.
width
-
15
:
0
;
size_t
roiw8
=
size
.
width
>=
7
?
size
.
width
-
7
:
0
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
register
uint8x16_t
vc255
asm
(
"q4"
)
=
vmovq_n_u8
(
255
);
#else
uint8x16x4_t
vRgba
;
...
...
@@ -602,7 +602,7 @@ void gray2rgbx(const Size2D &size,
for
(;
sj
<
roiw16
;
sj
+=
16
,
dj
+=
64
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
(
"vld1.8 {d0-d1}, [%[in0]]
\n\t
"
"vmov.8 q1, q0
\n\t
"
...
...
@@ -628,7 +628,7 @@ void gray2rgbx(const Size2D &size,
if
(
sj
<
roiw8
)
{
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
(
"vld1.8 {d5}, [%[in]]
\n\t
"
"vmov.8 d6, d5
\n\t
"
...
...
@@ -1409,7 +1409,7 @@ inline void convertToHSV(const s32 r, const s32 g, const s32 b,
"d24","d25","d26","d27","d28","d29","d30","d31" \
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
#define YCRCB_CONSTS \
register int16x4_t vcYR asm ("d31") = vmov_n_s16(4899); \
...
...
@@ -1555,7 +1555,7 @@ inline uint8x8x3_t convertToYCrCb( const int16x8_t& vR, const int16x8_t& vG, con
#define COEFF_G ( 8663)
#define COEFF_B (-17705)
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
#define YUV420ALPHA3_CONST
#define YUV420ALPHA4_CONST register uint8x16_t c255 asm ("q13") = vmovq_n_u8(255);
#define YUV420ALPHA3_CONVERT
...
...
@@ -1852,7 +1852,7 @@ void rgb2hsv(const Size2D &size,
#ifdef CAROTENE_NEON
size_t
roiw8
=
size
.
width
>=
7
?
size
.
width
-
7
:
0
;
const
s32
hsv_shift
=
12
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
register
const
f32
vsdiv_table
=
f32
(
255
<<
hsv_shift
);
register
f32
vhdiv_table
=
f32
(
hrange
<<
hsv_shift
);
register
const
s32
vhrange
=
hrange
;
...
...
@@ -1871,7 +1871,7 @@ void rgb2hsv(const Size2D &size,
for
(;
j
<
roiw8
;
sj
+=
24
,
dj
+=
24
,
j
+=
8
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERT_TO_HSV_ASM
(
vld3
.8
{
d0
-
d2
},
d0
,
d2
)
#else
uint8x8x3_t
vRgb
=
vld3_u8
(
src
+
sj
);
...
...
@@ -1904,7 +1904,7 @@ void rgbx2hsv(const Size2D &size,
#ifdef CAROTENE_NEON
size_t
roiw8
=
size
.
width
>=
7
?
size
.
width
-
7
:
0
;
const
s32
hsv_shift
=
12
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
register
const
f32
vsdiv_table
=
f32
(
255
<<
hsv_shift
);
register
f32
vhdiv_table
=
f32
(
hrange
<<
hsv_shift
);
register
const
s32
vhrange
=
hrange
;
...
...
@@ -1923,7 +1923,7 @@ void rgbx2hsv(const Size2D &size,
for
(;
j
<
roiw8
;
sj
+=
32
,
dj
+=
24
,
j
+=
8
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERT_TO_HSV_ASM
(
vld4
.8
{
d0
-
d3
},
d0
,
d2
)
#else
uint8x8x4_t
vRgb
=
vld4_u8
(
src
+
sj
);
...
...
@@ -1956,7 +1956,7 @@ void bgr2hsv(const Size2D &size,
#ifdef CAROTENE_NEON
size_t
roiw8
=
size
.
width
>=
7
?
size
.
width
-
7
:
0
;
const
s32
hsv_shift
=
12
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
register
const
f32
vsdiv_table
=
f32
(
255
<<
hsv_shift
);
register
f32
vhdiv_table
=
f32
(
hrange
<<
hsv_shift
);
register
const
s32
vhrange
=
hrange
;
...
...
@@ -1975,7 +1975,7 @@ void bgr2hsv(const Size2D &size,
for
(;
j
<
roiw8
;
sj
+=
24
,
dj
+=
24
,
j
+=
8
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERT_TO_HSV_ASM
(
vld3
.8
{
d0
-
d2
},
d2
,
d0
)
#else
uint8x8x3_t
vRgb
=
vld3_u8
(
src
+
sj
);
...
...
@@ -2008,7 +2008,7 @@ void bgrx2hsv(const Size2D &size,
#ifdef CAROTENE_NEON
size_t
roiw8
=
size
.
width
>=
7
?
size
.
width
-
7
:
0
;
const
s32
hsv_shift
=
12
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
register
const
f32
vsdiv_table
=
f32
(
255
<<
hsv_shift
);
register
f32
vhdiv_table
=
f32
(
hrange
<<
hsv_shift
);
register
const
s32
vhrange
=
hrange
;
...
...
@@ -2027,7 +2027,7 @@ void bgrx2hsv(const Size2D &size,
for
(;
j
<
roiw8
;
sj
+=
32
,
dj
+=
24
,
j
+=
8
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERT_TO_HSV_ASM
(
vld4
.8
{
d0
-
d3
},
d2
,
d0
)
#else
uint8x8x4_t
vRgb
=
vld4_u8
(
src
+
sj
);
...
...
@@ -2068,7 +2068,7 @@ void rgbx2bgr565(const Size2D &size,
for
(;
j
<
roiw16
;
sj
+=
64
,
dj
+=
32
,
j
+=
16
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
(
"vld4.8 {d2, d4, d6, d8}, [%[in0]] @ q0 q1 q2 q3 q4
\n\t
"
"vld4.8 {d3, d5, d7, d9}, [%[in1]] @ xxxxxxxx rrrrRRRR ggggGGGG bbbbBBBB xxxxxxxx
\n\t
"
...
...
@@ -2122,7 +2122,7 @@ void rgb2bgr565(const Size2D &size,
for
(;
j
<
roiw16
;
sj
+=
48
,
dj
+=
32
,
j
+=
16
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
(
"vld3.8 {d2, d4, d6}, [%[in0]] @ q0 q1 q2 q3 q4
\n\t
"
"vld3.8 {d3, d5, d7}, [%[in1]] @ xxxxxxxx rrrrRRRR ggggGGGG bbbbBBBB xxxxxxxx
\n\t
"
...
...
@@ -2176,7 +2176,7 @@ void rgbx2rgb565(const Size2D &size,
for
(;
j
<
roiw16
;
sj
+=
64
,
dj
+=
32
,
j
+=
16
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
(
"vld4.8 {d0, d2, d4, d6}, [%[in0]] @ q0 q1 q2 q3
\n\t
"
"vld4.8 {d1, d3, d5, d7}, [%[in1]] @ rrrrRRRR ggggGGGG bbbbBBBB aaaaAAAA
\n\t
"
...
...
@@ -2230,7 +2230,7 @@ void rgb2rgb565(const Size2D &size,
for
(;
j
<
roiw16
;
sj
+=
48
,
dj
+=
32
,
j
+=
16
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
(
"vld3.8 {d0, d2, d4}, [%[in0]] @ q0 q1 q2 q3
\n\t
"
"vld3.8 {d1, d3, d5}, [%[in1]] @ rrrrRRRR ggggGGGG bbbbBBBB xxxxxxxx
\n\t
"
...
...
@@ -2285,7 +2285,7 @@ void rgb2ycrcb(const Size2D &size,
for
(;
j
<
roiw8
;
sj
+=
24
,
dj
+=
24
,
j
+=
8
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTTOYCRCB
(
vld3
.8
{
d0
-
d2
},
d0
,
d1
,
d2
)
#else
uint8x8x3_t
vRgb
=
vld3_u8
(
src
+
sj
);
...
...
@@ -2329,7 +2329,7 @@ void rgbx2ycrcb(const Size2D &size,
for
(;
j
<
roiw8
;
sj
+=
32
,
dj
+=
24
,
j
+=
8
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTTOYCRCB
(
vld4
.8
{
d0
-
d3
},
d0
,
d1
,
d2
)
#else
uint8x8x4_t
vRgba
=
vld4_u8
(
src
+
sj
);
...
...
@@ -2373,7 +2373,7 @@ void bgr2ycrcb(const Size2D &size,
for
(;
j
<
roiw8
;
sj
+=
24
,
dj
+=
24
,
j
+=
8
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTTOYCRCB
(
vld3
.8
{
d0
-
d2
},
d2
,
d1
,
d0
)
#else
uint8x8x3_t
vBgr
=
vld3_u8
(
src
+
sj
);
...
...
@@ -2417,7 +2417,7 @@ void bgrx2ycrcb(const Size2D &size,
for
(;
j
<
roiw8
;
sj
+=
32
,
dj
+=
24
,
j
+=
8
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTTOYCRCB
(
vld4
.8
{
d0
-
d3
},
d2
,
d1
,
d0
)
#else
uint8x8x4_t
vBgra
=
vld4_u8
(
src
+
sj
);
...
...
@@ -2499,7 +2499,7 @@ void yuv420sp2rgb(const Size2D &size,
internal
::
prefetch
(
uv
+
j
);
internal
::
prefetch
(
y1
+
j
);
internal
::
prefetch
(
y2
+
j
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB
(
3
,
d1
,
d0
,
q5
,
q6
)
#else
convertYUV420
.
ToRGB
(
y1
+
j
,
y2
+
j
,
uv
+
j
,
dst1
+
dj
,
dst2
+
dj
);
...
...
@@ -2545,7 +2545,7 @@ void yuv420sp2rgbx(const Size2D &size,
internal
::
prefetch
(
uv
+
j
);
internal
::
prefetch
(
y1
+
j
);
internal
::
prefetch
(
y2
+
j
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB
(
4
,
d1
,
d0
,
q5
,
q6
)
#else
convertYUV420
.
ToRGB
(
y1
+
j
,
y2
+
j
,
uv
+
j
,
dst1
+
dj
,
dst2
+
dj
);
...
...
@@ -2591,7 +2591,7 @@ void yuv420i2rgb(const Size2D &size,
internal
::
prefetch
(
uv
+
j
);
internal
::
prefetch
(
y1
+
j
);
internal
::
prefetch
(
y2
+
j
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB
(
3
,
d0
,
d1
,
q5
,
q6
)
#else
convertYUV420
.
ToRGB
(
y1
+
j
,
y2
+
j
,
uv
+
j
,
dst1
+
dj
,
dst2
+
dj
);
...
...
@@ -2637,7 +2637,7 @@ void yuv420i2rgbx(const Size2D &size,
internal
::
prefetch
(
uv
+
j
);
internal
::
prefetch
(
y1
+
j
);
internal
::
prefetch
(
y2
+
j
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB
(
4
,
d0
,
d1
,
q5
,
q6
)
#else
convertYUV420
.
ToRGB
(
y1
+
j
,
y2
+
j
,
uv
+
j
,
dst1
+
dj
,
dst2
+
dj
);
...
...
@@ -2683,7 +2683,7 @@ void yuv420sp2bgr(const Size2D &size,
internal
::
prefetch
(
uv
+
j
);
internal
::
prefetch
(
y1
+
j
);
internal
::
prefetch
(
y2
+
j
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB
(
3
,
d1
,
d0
,
q6
,
q5
)
#else
convertYUV420
.
ToRGB
(
y1
+
j
,
y2
+
j
,
uv
+
j
,
dst1
+
dj
,
dst2
+
dj
);
...
...
@@ -2729,7 +2729,7 @@ void yuv420sp2bgrx(const Size2D &size,
internal
::
prefetch
(
uv
+
j
);
internal
::
prefetch
(
y1
+
j
);
internal
::
prefetch
(
y2
+
j
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB
(
4
,
d1
,
d0
,
q6
,
q5
)
#else
convertYUV420
.
ToRGB
(
y1
+
j
,
y2
+
j
,
uv
+
j
,
dst1
+
dj
,
dst2
+
dj
);
...
...
@@ -2775,7 +2775,7 @@ void yuv420i2bgr(const Size2D &size,
internal
::
prefetch
(
uv
+
j
);
internal
::
prefetch
(
y1
+
j
);
internal
::
prefetch
(
y2
+
j
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB
(
3
,
d0
,
d1
,
q6
,
q5
)
#else
convertYUV420
.
ToRGB
(
y1
+
j
,
y2
+
j
,
uv
+
j
,
dst1
+
dj
,
dst2
+
dj
);
...
...
@@ -2821,7 +2821,7 @@ void yuv420i2bgrx(const Size2D &size,
internal
::
prefetch
(
uv
+
j
);
internal
::
prefetch
(
y1
+
j
);
internal
::
prefetch
(
y2
+
j
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB
(
4
,
d0
,
d1
,
q6
,
q5
)
#else
convertYUV420
.
ToRGB
(
y1
+
j
,
y2
+
j
,
uv
+
j
,
dst1
+
dj
,
dst2
+
dj
);
...
...
3rdparty/carotene/src/convert.cpp
浏览文件 @
2d813910
...
...
@@ -101,7 +101,7 @@ CVT_FUNC(u8, s8, 16,
}
})
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC
(
u8
,
u16
,
16
,
register
uint8x16_t
zero0
asm
(
"q1"
)
=
vmovq_n_u8
(
0
);,
{
...
...
@@ -135,7 +135,7 @@ CVT_FUNC(u8, u16, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC
(
u8
,
s32
,
16
,
register
uint8x16_t
zero0
asm
(
"q1"
)
=
vmovq_n_u8
(
0
);
register
uint8x16_t
zero1
asm
(
"q2"
)
=
vmovq_n_u8
(
0
);
...
...
@@ -173,7 +173,7 @@ CVT_FUNC(u8, s32, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
u8
,
f32
,
16
,
,
{
...
...
@@ -248,7 +248,7 @@ CVT_FUNC(s8, u8, 16,
}
})
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC
(
s8
,
u16
,
16
,
register
uint8x16_t
zero0
asm
(
"q1"
)
=
vmovq_n_u8
(
0
);,
{
...
...
@@ -284,7 +284,7 @@ CVT_FUNC(s8, u16, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
s8
,
s16
,
16
,
,
{
...
...
@@ -323,7 +323,7 @@ CVT_FUNC(s8, s16, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC
(
s8
,
s32
,
16
,
,
{
...
...
@@ -377,7 +377,7 @@ CVT_FUNC(s8, s32, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
s8
,
f32
,
16
,
,
{
...
...
@@ -440,7 +440,7 @@ CVT_FUNC(s8, f32, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
u16
,
u8
,
16
,
,
{
...
...
@@ -479,7 +479,7 @@ CVT_FUNC(u16, u8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
u16
,
s8
,
16
,
register
uint8x16_t
v127
asm
(
"q4"
)
=
vmovq_n_u8
(
127
);,
{
...
...
@@ -522,7 +522,7 @@ CVT_FUNC(u16, s8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC
(
u16
,
s16
,
8
,
register
uint16x8_t
v32767
asm
(
"q4"
)
=
vmovq_n_u16
(
0x7FFF
);,
{
...
...
@@ -555,7 +555,7 @@ CVT_FUNC(u16, s16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC
(
u16
,
s32
,
8
,
register
uint16x8_t
zero0
asm
(
"q1"
)
=
vmovq_n_u16
(
0
);,
{
...
...
@@ -589,7 +589,7 @@ CVT_FUNC(u16, s32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
u16
,
f32
,
8
,
,
{
...
...
@@ -633,7 +633,7 @@ CVT_FUNC(u16, f32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
s16
,
u8
,
16
,
,
{
...
...
@@ -672,7 +672,7 @@ CVT_FUNC(s16, u8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
s16
,
s8
,
16
,
,
{
...
...
@@ -711,7 +711,7 @@ CVT_FUNC(s16, s8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC
(
s16
,
u16
,
8
,
register
int16x8_t
vZero
asm
(
"q4"
)
=
vmovq_n_s16
(
0
);,
{
...
...
@@ -747,7 +747,7 @@ CVT_FUNC(s16, u16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
s16
,
s32
,
8
,
,
{
...
...
@@ -786,7 +786,7 @@ CVT_FUNC(s16, s32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
s16
,
f32
,
8
,
,
{
...
...
@@ -829,7 +829,7 @@ CVT_FUNC(s16, f32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
s32
,
u8
,
8
,
,
{
...
...
@@ -870,7 +870,7 @@ CVT_FUNC(s32, u8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
s32
,
s8
,
8
,
,
{
...
...
@@ -911,7 +911,7 @@ CVT_FUNC(s32, s8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
s32
,
u16
,
8
,
,
{
...
...
@@ -950,7 +950,7 @@ CVT_FUNC(s32, u16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
s32
,
s16
,
8
,
,
{
...
...
@@ -989,7 +989,7 @@ CVT_FUNC(s32, s16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
s32
,
f32
,
8
,
,
{
...
...
@@ -1034,7 +1034,7 @@ CVT_FUNC(s32, f32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
f32
,
u8
,
8
,
register
float32x4_t
vmult
asm
(
"q0"
)
=
vdupq_n_f32
((
float
)(
1
<<
16
));
register
uint32x4_t
vmask
asm
(
"q1"
)
=
vdupq_n_u32
(
1
<<
16
);,
...
...
@@ -1101,7 +1101,7 @@ CVT_FUNC(f32, u8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
f32
,
s8
,
8
,
register
float32x4_t
vhalf
asm
(
"q0"
)
=
vdupq_n_f32
(
0.5
f
);,
{
...
...
@@ -1153,7 +1153,7 @@ CVT_FUNC(f32, s8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
f32
,
u16
,
8
,
register
float32x4_t
vhalf
asm
(
"q0"
)
=
vdupq_n_f32
(
0.5
f
);,
{
...
...
@@ -1212,7 +1212,7 @@ CVT_FUNC(f32, u16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
f32
,
s16
,
8
,
register
float32x4_t
vhalf
asm
(
"q0"
)
=
vdupq_n_f32
(
0.5
f
);,
{
...
...
@@ -1271,7 +1271,7 @@ CVT_FUNC(f32, s16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
f32
,
s32
,
8
,
register
float32x4_t
vhalf
asm
(
"q0"
)
=
vdupq_n_f32
(
0.5
f
);,
{
...
...
3rdparty/carotene/src/convert_scale.cpp
浏览文件 @
2d813910
...
...
@@ -473,7 +473,7 @@ CVTS_FUNC(u8, s16, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
u8
,
s32
,
16
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -562,7 +562,7 @@ CVTS_FUNC(u8, s32, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
u8
,
f32
,
16
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
);,
...
...
@@ -985,7 +985,7 @@ CVTS_FUNC(s8, s16, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
s8
,
s32
,
16
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1074,7 +1074,7 @@ CVTS_FUNC(s8, s32, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
s8
,
f32
,
16
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
);,
...
...
@@ -1155,7 +1155,7 @@ CVTS_FUNC(s8, f32, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
u16
,
u8
,
16
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1214,7 +1214,7 @@ CVTS_FUNC(u16, u8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
u16
,
s8
,
16
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1273,7 +1273,7 @@ CVTS_FUNC(u16, s8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC1
(
u16
,
16
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1330,7 +1330,7 @@ CVTS_FUNC1(u16, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
u16
,
s16
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1387,7 +1387,7 @@ CVTS_FUNC(u16, s16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
u16
,
s32
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1443,7 +1443,7 @@ CVTS_FUNC(u16, s32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
u16
,
f32
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
);,
...
...
@@ -1495,7 +1495,7 @@ CVTS_FUNC(u16, f32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
s16
,
u8
,
16
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1554,7 +1554,7 @@ CVTS_FUNC(s16, u8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
s16
,
s8
,
16
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1613,7 +1613,7 @@ CVTS_FUNC(s16, s8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
s16
,
u16
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1670,7 +1670,7 @@ CVTS_FUNC(s16, u16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC1
(
s16
,
16
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1727,7 +1727,7 @@ CVTS_FUNC1(s16, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
s16
,
s32
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1783,7 +1783,7 @@ CVTS_FUNC(s16, s32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
s16
,
f32
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
);,
...
...
@@ -1835,7 +1835,7 @@ CVTS_FUNC(s16, f32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
s32
,
u8
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1893,7 +1893,7 @@ CVTS_FUNC(s32, u8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
s32
,
s8
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1951,7 +1951,7 @@ CVTS_FUNC(s32, s8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
s32
,
u16
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -2007,7 +2007,7 @@ CVTS_FUNC(s32, u16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
s32
,
s16
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -2063,7 +2063,7 @@ CVTS_FUNC(s32, s16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC1
(
s32
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -2118,7 +2118,7 @@ CVTS_FUNC1(s32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
s32
,
f32
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
);,
...
...
@@ -2169,7 +2169,7 @@ CVTS_FUNC(s32, f32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
f32
,
u8
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)((
1
<<
16
)
*
alpha
));
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)((
1
<<
16
)
*
beta
));
...
...
@@ -2239,7 +2239,7 @@ CVTS_FUNC(f32, u8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
f32
,
s8
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -2293,7 +2293,7 @@ CVTS_FUNC(f32, s8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
f32
,
u16
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -2345,7 +2345,7 @@ CVTS_FUNC(f32, u16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
f32
,
s16
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -2397,7 +2397,7 @@ CVTS_FUNC(f32, s16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
f32
,
s32
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -2448,7 +2448,7 @@ CVTS_FUNC(f32, s32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC1
(
f32
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
);,
...
...
3rdparty/carotene/src/gaussian_blur.cpp
浏览文件 @
2d813910
...
...
@@ -327,7 +327,7 @@ void gaussianBlur5x5(const Size2D &size, s32 cn,
u16
*
lidx1
=
lane
+
x
-
1
*
2
;
u16
*
lidx3
=
lane
+
x
+
1
*
2
;
u16
*
lidx4
=
lane
+
x
+
2
*
2
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
__volatile__
(
"vld2.16 {d0, d2}, [%[in0]]!
\n\t
"
"vld2.16 {d1, d3}, [%[in0]]
\n\t
"
...
...
3rdparty/carotene/src/pyramid.cpp
浏览文件 @
2d813910
...
...
@@ -331,7 +331,7 @@ void gaussianPyramidDown(const Size2D &srcSize,
for
(;
x
<
roiw8
;
x
+=
8
)
{
internal
::
prefetch
(
lane
+
2
*
x
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
(
"vld2.16 {d0-d3}, [%[in0]]
\n\t
"
"vld2.16 {d4-d7}, [%[in4]]
\n\t
"
...
...
@@ -538,7 +538,7 @@ void gaussianPyramidDown(const Size2D &srcSize,
for
(;
x
<
roiw4
;
x
+=
4
)
{
internal
::
prefetch
(
lane
+
2
*
x
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
(
"vld2.32 {d0-d3}, [%[in0]]
\n\t
"
"vld2.32 {d4-d7}, [%[in4]]
\n\t
"
...
...
@@ -672,7 +672,7 @@ void gaussianPyramidDown(const Size2D &srcSize,
std
::
vector
<
f32
>
_buf
(
cn
*
(
srcSize
.
width
+
4
)
+
32
/
sizeof
(
f32
));
f32
*
lane
=
internal
::
alignPtr
(
&
_buf
[
2
*
cn
],
32
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
register
float32x4_t
vc6d4f32
asm
(
"q11"
)
=
vmovq_n_f32
(
1.5
f
);
// 6/4
register
float32x4_t
vc1d4f32
asm
(
"q12"
)
=
vmovq_n_f32
(
0.25
f
);
// 1/4
...
...
@@ -739,7 +739,7 @@ void gaussianPyramidDown(const Size2D &srcSize,
for
(;
x
<
roiw4
;
x
+=
4
)
{
internal
::
prefetch
(
lane
+
2
*
x
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
__volatile__
(
"vld2.32 {d0-d3}, [%[in0]]
\n\t
"
"vld2.32 {d8-d11}, [%[in4]]
\n\t
"
...
...
3rdparty/carotene/src/scharr.cpp
浏览文件 @
2d813910
...
...
@@ -109,7 +109,7 @@ void ScharrDeriv(const Size2D &size, s32 cn,
internal
::
prefetch
(
srow0
+
x
);
internal
::
prefetch
(
srow1
+
x
);
internal
::
prefetch
(
srow2
+
x
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
(
"vld1.8 {d0}, [%[src0]]
\n\t
"
"vld1.8 {d2}, [%[src2]]
\n\t
"
...
...
@@ -161,7 +161,7 @@ void ScharrDeriv(const Size2D &size, s32 cn,
x
=
0
;
for
(
;
x
<
roiw8
;
x
+=
8
)
{
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
__asm__
(
"vld1.16 {d4-d5}, [%[s2ptr]]
\n\t
"
"vld1.16 {d8-d9}, [%[s4ptr]]
\n\t
"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录