Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
4499d126
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
4499d126
编写于
9月 04, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
9月 04, 2020
浏览文件
操作
浏览文件
下载
差异文件
!5427 modify arm cpu fp16 op: arithmetic
Merge pull request !5427 from 陶云浩/master
上级
12ff0be5
1fb6f1b6
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
561 addition
and
328 deletion
+561
-328
mindspore/lite/nnacl/fp16/arithmetic_fp16.c
mindspore/lite/nnacl/fp16/arithmetic_fp16.c
+561
-328
未找到文件。
mindspore/lite/nnacl/fp16/arithmetic_fp16.c
浏览文件 @
4499d126
...
...
@@ -580,27 +580,42 @@ int ElementOptSubFp16(float16_t *input0, float16_t *input1, float16_t *output, i
float16x8_t
vin0_opt
=
{
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
]};
float16x8_t
vin1_opt
=
{
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
]};
#endif
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
if
(
param
->
in_elements_num0_
==
1
)
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
param
->
in_elements_num0_
==
1
?
vin0_opt
:
vld1q_f16
(
input0
)
;
float16x8_t
vin1
=
param
->
in_elements_num1_
==
1
?
vin1_opt
:
vld1q_f16
(
input1
);
float16x8_t
vout
=
vsubq_f16
(
vin0
,
vin1
);
vst1q_f16
(
output
,
vout
);
float16x8_t
vin0
=
vin0_opt
;
float16x8_t
vin1
=
vld1q_f16
(
input1
);
float16x8_t
vout
=
vsubq_f16
(
vin0
,
vin1
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
i
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
i
];
output
[
i
]
=
in0
-
in1
;
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
in0_opt
-
input1
[
i
];
}
#endif
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
in0_opt
-
input1
[
index
];
}
}
else
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
vld1q_f16
(
input0
);
float16x8_t
vin1
=
vin1_opt
;
float16x8_t
vout
=
vsubq_f16
(
vin0
,
vin1
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
input0
[
i
]
-
in1_opt
;
}
#endif
input0
+=
C8NUM
;
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
index
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
index
];
output
[
index
]
=
in0
-
in1
;
input0
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
input0
[
index
]
-
in1_opt
;
}
}
return
NNACL_OK
;
}
...
...
@@ -644,29 +659,46 @@ int ElementOptSubReluFp16(float16_t *input0, float16_t *input1, float16_t *outpu
float16x8_t
vin1_opt
=
{
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
]};
float16x8_t
zeros
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
#endif
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
if
(
param
->
in_elements_num0_
==
1
)
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
param
->
in_elements_num0_
==
1
?
vin0_opt
:
vld1q_f16
(
input0
)
;
float16x8_t
vin1
=
param
->
in_elements_num1_
==
1
?
vin1_opt
:
vld1q_f16
(
input1
);
float16x8_t
vout
=
vsubq_f16
(
vin0
,
vin1
);
vout
=
vmaxq_f16
(
vout
,
zeros
);
vst1q_f16
(
output
,
vout
);
float16x8_t
vin0
=
vin0_opt
;
float16x8_t
vin1
=
vld1q_f16
(
input1
);
float16x8_t
vout
=
vsubq_f16
(
vin0
,
vin1
);
vout
=
vmaxq_f16
(
vout
,
zeros
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
i
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
i
];
output
[
i
]
=
MSMAX
(
in0
-
in1
,
0
);
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
MSMAX
(
in0_opt
-
input1
[
i
],
0
);
}
#endif
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
float16_t
res
=
in0_opt
-
input1
[
index
];
output
[
index
]
=
res
>
0
?
res
:
0
;
}
}
else
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
vld1q_f16
(
input0
);
float16x8_t
vin1
=
vin1_opt
;
float16x8_t
vout
=
vsubq_f16
(
vin0
,
vin1
);
vout
=
vmaxq_f16
(
vout
,
zeros
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
MSMAX
(
input0
[
i
]
-
in1_opt
,
0
);
}
#endif
input0
+=
C8NUM
;
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
index
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
index
];
float16_t
res
=
in0
-
in1
;
output
[
index
]
=
res
>
0
?
res
:
0
;
input0
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
float16_t
res
=
input0
[
index
]
-
in1_opt
;
output
[
index
]
=
res
>
0
?
res
:
0
;
}
}
return
NNACL_OK
;
}
...
...
@@ -712,30 +744,45 @@ int ElementOptSubRelu6Fp16(float16_t *input0, float16_t *input1, float16_t *outp
float16x8_t
zeros
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
float16x8_t
bounds
=
{
6
,
6
,
6
,
6
,
6
,
6
,
6
,
6
};
#endif
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
if
(
param
->
in_elements_num0_
==
1
)
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
param
->
in_elements_num0_
==
1
?
vin0_opt
:
vld1q_f16
(
input0
)
;
float16x8_t
vin1
=
param
->
in_elements_num1_
==
1
?
vin1_opt
:
vld1q_f16
(
input1
);
float16x8_t
vout
=
vsubq_f16
(
vin0
,
vin1
);
vout
=
vminq_f16
(
vmaxq_f16
(
vout
,
zeros
),
bounds
);
vst1q_f16
(
output
,
vout
);
float16x8_t
vin0
=
vin0_opt
;
float16x8_t
vin1
=
vld1q_f16
(
input1
);
float16x8_t
vout
=
vsubq_f16
(
vin0
,
vin1
);
vout
=
vminq_f16
(
vmaxq_f16
(
vout
,
zeros
),
bounds
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
i
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
i
];
output
[
i
]
=
MSMIN
(
MSMAX
(
in0
-
in1
,
0
),
6
);
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
MSMIN
(
MSMAX
(
in0_opt
-
input1
[
i
],
0
),
6
);
}
#endif
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
MSMIN
(
MSMAX
(
in0_opt
-
input1
[
index
],
0
),
6
);
}
}
else
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
vld1q_f16
(
input0
);
float16x8_t
vin1
=
vin1_opt
;
float16x8_t
vout
=
vsubq_f16
(
vin0
,
vin1
);
vout
=
vminq_f16
(
vmaxq_f16
(
vout
,
zeros
),
bounds
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
MSMIN
(
MSMAX
(
input0
[
i
]
-
in1_opt
,
0
),
6
);
}
#endif
input0
+=
C8NUM
;
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
index
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
index
];
output
[
index
]
=
MSMIN
(
MSMAX
(
in0
-
in1
,
0
),
6
);
input0
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
MSMIN
(
MSMAX
(
input0
[
index
]
-
in1_opt
,
0
),
6
);
}
}
return
NNACL_OK
;
}
...
...
@@ -781,41 +828,53 @@ int ElementOptDivFp16(float16_t *input0, float16_t *input1, float16_t *output, i
float16x8_t
vin0_opt
=
{
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
]};
float16x8_t
vin1_opt
=
{
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
]};
#endif
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
if
(
param
->
in_elements_num1_
==
1
)
{
if
(
in1_opt
==
0
)
{
return
NNACL_ERRCODE_DIVISOR_ZERO
;
}
}
else
{
if
(
param
->
in_elements_num0_
==
1
)
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
if
(
input1
[
i
]
==
0
)
{
return
NNACL_ERRCODE_DIVISOR_ZERO
;
}
}
}
#ifdef ENABLE_NEON
float16x8_t
vin0
=
param
->
in_elements_num0_
==
1
?
vin0_opt
:
vld1q_f16
(
input0
)
;
float16x8_t
vin1
=
param
->
in_elements_num1_
==
1
?
vin1_opt
:
vld1q_f16
(
input1
);
float16x8_t
vout
=
vdivq_f16
(
vin0
,
vin1
);
vst1q_f16
(
output
,
vout
);
float16x8_t
vin0
=
vin0_opt
;
float16x8_t
vin1
=
vld1q_f16
(
input1
);
float16x8_t
vout
=
vdivq_f16
(
vin0
,
vin1
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
i
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
i
];
output
[
i
]
=
in0
/
in1
;
}
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
in0_opt
/
input1
[
i
];
}
#endif
input0
+=
C8NUM
;
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
index
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
index
];
if
(
in1
==
0
)
{
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
if
(
input1
[
index
]
==
0
)
{
return
NNACL_ERRCODE_DIVISOR_ZERO
;
}
output
[
index
]
=
in0_opt
/
input1
[
index
];
}
}
else
{
if
(
in1_opt
==
0
)
{
return
NNACL_ERRCODE_DIVISOR_ZERO
;
}
output
[
index
]
=
in0
/
in1
;
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
vld1q_f16
(
input0
);
float16x8_t
vin1
=
vin1_opt
;
float16x8_t
vout
=
vdivq_f16
(
vin0
,
vin1
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
input0
[
i
]
/
in1_opt
;
}
#endif
input0
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
input0
[
index
]
/
in1_opt
;
}
}
return
NNACL_OK
;
}
...
...
@@ -867,43 +926,53 @@ int ElementOptDivReluFp16(float16_t *input0, float16_t *input1, float16_t *outpu
float16x8_t
vin1_opt
=
{
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
]};
float16x8_t
zeros
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
#endif
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
if
(
param
->
in_elements_num1_
==
1
)
{
if
(
in1_opt
==
0
)
{
return
NNACL_ERRCODE_DIVISOR_ZERO
;
}
}
else
{
if
(
param
->
in_elements_num0_
==
1
)
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
if
(
input1
[
i
]
==
0
)
{
return
NNACL_ERRCODE_DIVISOR_ZERO
;
}
}
}
#ifdef ENABLE_NEON
float16x8_t
vin0
=
param
->
in_elements_num0_
==
1
?
vin0_opt
:
vld1q_f16
(
input0
);
float16x8_t
vin1
=
param
->
in_elements_num1_
==
1
?
vin1_opt
:
vld1q_f16
(
input1
);
float16x8_t
vout
=
vdivq_f16
(
vin0
,
vin1
);
vout
=
vmaxq_f16
(
vout
,
zeros
);
vst1q_f16
(
output
,
vout
);
float16x8_t
vin0
=
vin0_opt
;
float16x8_t
vin1
=
vld1q_f16
(
input1
);
float16x8_t
vout
=
vmaxq_f16
(
vdivq_f16
(
vin0
,
vin1
),
zeros
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
i
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
i
];
output
[
i
]
=
MSMAX
(
in0
/
in1
,
0
);
}
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
MSMAX
(
in0_opt
/
input1
[
i
],
0
);
}
#endif
input0
+=
C8NUM
;
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
index
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
index
];
if
(
in1
==
0
)
{
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
if
(
input1
[
index
]
==
0
)
{
return
NNACL_ERRCODE_DIVISOR_ZERO
;
}
output
[
index
]
=
MSMAX
(
in0_opt
/
input1
[
index
],
0
);
}
}
else
{
if
(
in1_opt
==
0
)
{
return
NNACL_ERRCODE_DIVISOR_ZERO
;
}
float16_t
res
=
in0
/
in1
;
output
[
index
]
=
res
>
0
?
res
:
0
;
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
vld1q_f16
(
input0
);
float16x8_t
vin1
=
vin1_opt
;
float16x8_t
vout
=
vmaxq_f16
(
vdivq_f16
(
vin0
,
vin1
),
zeros
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
MSMAX
(
input0
[
i
]
/
in1_opt
,
0
);
}
#endif
input0
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
MSMAX
(
input0
[
index
]
/
in1_opt
,
0
);
}
}
return
NNACL_OK
;
}
...
...
@@ -948,7 +1017,6 @@ int ElementOptDivRelu6Fp16(float16_t *input0, float16_t *input1, float16_t *outp
ArithmeticParameter
*
param
)
{
int
block_mod
=
element_size
%
C8NUM
;
int
block_c8
=
element_size
-
block_mod
;
float16_t
in0_opt
=
input0
[
0
];
float16_t
in1_opt
=
input1
[
0
];
#ifdef ENABLE_NEON
...
...
@@ -957,42 +1025,53 @@ int ElementOptDivRelu6Fp16(float16_t *input0, float16_t *input1, float16_t *outp
float16x8_t
zeros
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
float16x8_t
bounds
=
{
6
,
6
,
6
,
6
,
6
,
6
,
6
,
6
};
#endif
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
if
(
param
->
in_elements_num1_
==
1
)
{
if
(
in1_opt
==
0
)
{
return
NNACL_ERRCODE_DIVISOR_ZERO
;
}
}
else
{
if
(
param
->
in_elements_num0_
==
1
)
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
if
(
input1
[
i
]
==
0
)
{
return
NNACL_ERRCODE_DIVISOR_ZERO
;
}
}
}
#ifdef ENABLE_NEON
float16x8_t
vin0
=
param
->
in_elements_num0_
==
1
?
vin0_opt
:
vld1q_f16
(
input0
);
float16x8_t
vin1
=
param
->
in_elements_num1_
==
1
?
vin1_opt
:
vld1q_f16
(
input1
);
float16x8_t
vout
=
vdivq_f16
(
vin0
,
vin1
);
vout
=
vminq_f16
(
vmaxq_f16
(
vout
,
zeros
),
bounds
);
vst1q_f16
(
output
,
vout
);
float16x8_t
vin0
=
vin0_opt
;
float16x8_t
vin1
=
vld1q_f16
(
input1
);
float16x8_t
vout
=
vminq_f16
(
vmaxq_f16
(
vdivq_f16
(
vin0
,
vin1
),
zeros
),
bounds
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
i
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
i
];
output
[
i
]
=
MSMIN
(
MSMAX
(
in0
/
in1
,
0
),
6
);
}
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
MSMIN
(
MSMAX
(
in0_opt
/
input1
[
i
],
0
),
6
);
}
#endif
input0
+=
C8NUM
;
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
index
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
index
];
if
(
in1
==
0
)
{
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
if
(
input1
[
index
]
==
0
)
{
return
NNACL_ERRCODE_DIVISOR_ZERO
;
}
output
[
index
]
=
MSMIN
(
MSMAX
(
in0_opt
/
input1
[
index
],
0
),
6
);
}
}
else
{
if
(
in1_opt
==
0
)
{
return
NNACL_ERRCODE_DIVISOR_ZERO
;
}
output
[
index
]
=
MSMIN
(
MSMAX
(
in0
/
in1
,
0
),
6
);
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
vld1q_f16
(
input0
);
float16x8_t
vin1
=
vin1_opt
;
float16x8_t
vout
=
vminq_f16
(
vmaxq_f16
(
vdivq_f16
(
vin0
,
vin1
),
zeros
),
bounds
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
MSMIN
(
MSMAX
(
input0
[
i
]
/
in1_opt
,
0
),
6
);
}
#endif
input0
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
MSMIN
(
MSMAX
(
input0
[
index
]
/
in1_opt
,
0
),
6
);
}
}
return
NNACL_OK
;
}
...
...
@@ -1089,39 +1168,56 @@ int ElementOptLogicalAndFp16(float16_t *input0, float16_t *input1, float16_t *ou
ArithmeticParameter
*
param
)
{
int
block_mod
=
element_size
%
C8NUM
;
int
block_c8
=
element_size
-
block_mod
;
float16_t
in0_opt
=
input0
[
0
];
float16_t
in1_opt
=
input1
[
0
];
#ifdef ENABLE_NEON
float16x8_t
vin0_opt
=
{
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
]};
float16x8_t
vin1_opt
=
{
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
]};
float16_t
in0_opt
=
input0
[
0
];
float16_t
in1_opt
=
input1
[
0
];
float16x8_t
vtrue
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
};
float16x8_t
vfalse
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
uint16x8_t
mask
=
vmovq_n_u16
(((
uint16_t
)(
1u
<<
15
)
-
1
));
uint16x8_t
zeros
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
#endif
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
if
(
param
->
in_elements_num0_
==
1
)
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0_
=
param
->
in_elements_num0_
==
1
?
vin0_opt
:
vld1q_f16
(
input0
)
;
float16x8_t
vin1_
=
param
->
in_elements_num1_
==
1
?
vin1_opt
:
vld1q_f16
(
input1
);
uint16x8_t
vin0
=
vandq_u16
(
vreinterpretq_s16_f16
(
vin0_
),
mask
);
uint16x8_t
vin1
=
vandq_u16
(
vreinterpretq_s16_f16
(
vin1_
),
mask
);
float16x8_t
vout
=
vbslq_f16
(
vceqq_u16
(
vandq_u16
(
vin0
,
vin1
),
zeros
),
vfalse
,
vtrue
);
vst1q_f16
(
output
,
vout
);
float16x8_t
vin0_
=
vin0_opt
;
float16x8_t
vin1_
=
vld1q_f16
(
input1
);
uint16x8_t
vin0
=
vandq_u16
(
vreinterpretq_s16_f16
(
vin0_
),
mask
);
uint16x8_t
vin1
=
vandq_u16
(
vreinterpretq_s16_f16
(
vin1_
),
mask
);
float16x8_t
vout
=
vbslq_f16
(
vceqq_u16
(
vandq_u16
(
vin0
,
vin1
),
zeros
),
vfalse
,
vtrue
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
i
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
i
];
output
[
i
]
=
(
float16_t
)((
bool
)(
in0
)
&
(
bool
)(
in1
));
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
(
float16_t
)((
bool
)(
in0_opt
)
&
(
bool
)(
input1
[
i
]));
}
#endif
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
(
float16_t
)((
bool
)(
in0_opt
)
&
(
bool
)(
input1
[
index
]));
}
}
else
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0_
=
vld1q_f16
(
input0
);
float16x8_t
vin1_
=
vin1_opt
;
uint16x8_t
vin0
=
vandq_u16
(
vreinterpretq_s16_f16
(
vin0_
),
mask
);
uint16x8_t
vin1
=
vandq_u16
(
vreinterpretq_s16_f16
(
vin1_
),
mask
);
float16x8_t
vout
=
vbslq_f16
(
vceqq_u16
(
vandq_u16
(
vin0
,
vin1
),
zeros
),
vfalse
,
vtrue
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
(
float16_t
)((
bool
)(
input0
[
i
])
&
(
bool
)(
in1_opt
));
}
#endif
input0
+=
C8NUM
;
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
index
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
index
];
output
[
index
]
=
(
float16_t
)((
bool
)(
in0
)
&
(
bool
)(
in1
));
input0
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
(
float16_t
)((
bool
)(
input0
[
index
])
&
(
bool
)(
in1_opt
));
}
}
return
NNACL_OK
;
}
...
...
@@ -1160,39 +1256,56 @@ int ElementOptLogicalOrFp16(float16_t *input0, float16_t *input1, float16_t *out
ArithmeticParameter
*
param
)
{
int
block_mod
=
element_size
%
C8NUM
;
int
block_c8
=
element_size
-
block_mod
;
float16_t
in0_opt
=
input0
[
0
];
float16_t
in1_opt
=
input1
[
0
];
#ifdef ENABLE_NEON
float16x8_t
vin0_opt
=
{
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
]};
float16x8_t
vin1_opt
=
{
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
]};
float16_t
in0_opt
=
input0
[
0
];
float16_t
in1_opt
=
input1
[
0
];
float16x8_t
vtrue
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
};
float16x8_t
vfalse
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
uint16x8_t
mask
=
vmovq_n_u16
(((
uint16_t
)(
1u
<<
15
)
-
1
));
uint16x8_t
zeros
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
#endif
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
if
(
param
->
in_elements_num0_
==
1
)
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0_
=
param
->
in_elements_num0_
==
1
?
vin0_opt
:
vld1q_f16
(
input0
)
;
float16x8_t
vin1_
=
param
->
in_elements_num1_
==
1
?
vin1_opt
:
vld1q_f16
(
input1
);
uint16x8_t
vin0
=
vandq_u16
(
vreinterpretq_s16_f16
(
vin0_
),
mask
);
uint16x8_t
vin1
=
vandq_u16
(
vreinterpretq_s16_f16
(
vin1_
),
mask
);
float16x8_t
vout
=
vbslq_f16
(
vceqq_u16
(
vorrq_u16
(
vin0
,
vin1
),
zeros
),
vfalse
,
vtrue
);
vst1q_f16
(
output
,
vout
);
float16x8_t
vin0_
=
vin0_opt
;
float16x8_t
vin1_
=
vld1q_f16
(
input1
);
uint16x8_t
vin0
=
vandq_u16
(
vreinterpretq_s16_f16
(
vin0_
),
mask
);
uint16x8_t
vin1
=
vandq_u16
(
vreinterpretq_s16_f16
(
vin1_
),
mask
);
float16x8_t
vout
=
vbslq_f16
(
vceqq_u16
(
vorrq_u16
(
vin0
,
vin1
),
zeros
),
vfalse
,
vtrue
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
i
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
i
];
output
[
i
]
=
(
float16_t
)((
bool
)(
in0
)
|
(
bool
)(
in1
));
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
(
float16_t
)((
bool
)(
in0_opt
)
|
(
bool
)(
input1
[
i
]));
}
#endif
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
(
float16_t
)((
bool
)(
in0_opt
)
|
(
bool
)(
input1
[
index
]));
}
}
else
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0_
=
vld1q_f16
(
input0
);
float16x8_t
vin1_
=
vin1_opt
;
uint16x8_t
vin0
=
vandq_u16
(
vreinterpretq_s16_f16
(
vin0_
),
mask
);
uint16x8_t
vin1
=
vandq_u16
(
vreinterpretq_s16_f16
(
vin1_
),
mask
);
float16x8_t
vout
=
vbslq_f16
(
vceqq_u16
(
vorrq_u16
(
vin0
,
vin1
),
zeros
),
vfalse
,
vtrue
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
(
float16_t
)((
bool
)(
input0
[
i
])
|
(
bool
)(
in1_opt
));
}
#endif
input0
+=
C8NUM
;
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
index
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
index
];
output
[
index
]
=
(
float16_t
)((
bool
)(
in0
)
|
(
bool
)(
in1
));
input0
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
(
float16_t
)((
bool
)(
input0
[
index
])
|
(
bool
)(
in1_opt
));
}
}
return
NNACL_OK
;
}
...
...
@@ -1234,33 +1347,48 @@ int ElementOptMaximumFp16(float16_t *input0, float16_t *input1, float16_t *outpu
ArithmeticParameter
*
param
)
{
int
block_mod
=
element_size
%
C8NUM
;
int
block_c8
=
element_size
-
block_mod
;
float16_t
in0_opt
=
input0
[
0
];
float16_t
in1_opt
=
input1
[
0
];
#ifdef ENABLE_NEON
float16x8_t
vin0_opt
=
{
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
]};
float16x8_t
vin1_opt
=
{
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
]};
float16_t
in0_opt
=
input0
[
0
];
float16_t
in1_opt
=
input1
[
0
];
#endif
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
if
(
param
->
in_elements_num0_
==
1
)
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
param
->
in_elements_num0_
==
1
?
vin0_opt
:
vld1q_f16
(
input0
)
;
float16x8_t
vin1
=
param
->
in_elements_num1_
==
1
?
vin1_opt
:
vld1q_f16
(
input1
);
float16x8_t
vout
=
vmaxq_f16
(
vin0
,
vin1
);
vst1q_f16
(
output
,
vout
);
float16x8_t
vin0
=
vin0_opt
;
float16x8_t
vin1
=
vld1q_f16
(
input1
);
float16x8_t
vout
=
vmaxq_f16
(
vin0
,
vin1
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
i
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
i
];
output
[
i
]
=
MSMAX
(
in0
,
in1
);
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
MSMAX
(
in0_opt
,
input1
[
i
]);
}
#endif
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
MSMAX
(
in0_opt
,
input1
[
index
]);
}
}
else
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
vld1q_f16
(
input0
);
float16x8_t
vin1
=
vin1_opt
;
float16x8_t
vout
=
vmaxq_f16
(
vin0
,
vin1
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
MSMAX
(
input0
[
i
],
in1_opt
);
}
#endif
input0
+=
C8NUM
;
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
index
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
index
];
output
[
index
]
=
MSMAX
(
in0
,
in1
);
input0
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
MSMAX
(
input0
[
index
],
in1_opt
);
}
}
return
NNACL_OK
;
}
...
...
@@ -1292,33 +1420,48 @@ int ElementOptMinimumFp16(float16_t *input0, float16_t *input1, float16_t *outpu
ArithmeticParameter
*
param
)
{
int
block_mod
=
element_size
%
C8NUM
;
int
block_c8
=
element_size
-
block_mod
;
float16_t
in0_opt
=
input0
[
0
];
float16_t
in1_opt
=
input1
[
0
];
#ifdef ENABLE_NEON
float16x8_t
vin0_opt
=
{
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
]};
float16x8_t
vin1_opt
=
{
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
]};
float16_t
in0_opt
=
input0
[
0
];
float16_t
in1_opt
=
input1
[
0
];
#endif
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
if
(
param
->
in_elements_num0_
==
1
)
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
param
->
in_elements_num0_
==
1
?
vin0_opt
:
vld1q_f16
(
input0
)
;
float16x8_t
vin1
=
param
->
in_elements_num1_
==
1
?
vin1_opt
:
vld1q_f16
(
input1
);
float16x8_t
vout
=
vminq_f16
(
vin0
,
vin1
);
vst1q_f16
(
output
,
vout
);
float16x8_t
vin0
=
vin0_opt
;
float16x8_t
vin1
=
vld1q_f16
(
input1
);
float16x8_t
vout
=
vminq_f16
(
vin0
,
vin1
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
i
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
i
];
output
[
i
]
=
MSMIN
(
in0
,
in1
);
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
MSMIN
(
in0_opt
,
input1
[
i
]);
}
#endif
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
MSMIN
(
in0_opt
,
input1
[
index
]);
}
}
else
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
vld1q_f16
(
input0
);
float16x8_t
vin1
=
vin1_opt
;
float16x8_t
vout
=
vminq_f16
(
vin0
,
vin1
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
MSMIN
(
input0
[
i
],
in1_opt
);
}
#endif
input0
+=
C8NUM
;
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
index
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
index
];
output
[
index
]
=
MSMIN
(
in0
,
in1
);
input0
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
MSMIN
(
input0
[
index
],
in1_opt
);
}
}
return
NNACL_OK
;
}
...
...
@@ -1354,35 +1497,50 @@ int ElementOptNotEqualFp16(float16_t *input0, float16_t *input1, float16_t *outp
ArithmeticParameter
*
param
)
{
int
block_mod
=
element_size
%
C8NUM
;
int
block_c8
=
element_size
-
block_mod
;
float16_t
in0_opt
=
input0
[
0
];
float16_t
in1_opt
=
input1
[
0
];
#ifdef ENABLE_NEON
float16x8_t
vin0_opt
=
{
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
]};
float16x8_t
vin1_opt
=
{
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
]};
float16_t
in0_opt
=
input0
[
0
];
float16_t
in1_opt
=
input1
[
0
];
float16x8_t
vtrue
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
};
float16x8_t
vfalse
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
#endif
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
if
(
param
->
in_elements_num0_
==
1
)
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
param
->
in_elements_num0_
==
1
?
vin0_opt
:
vld1q_f16
(
input0
)
;
float16x8_t
vin1
=
param
->
in_elements_num1_
==
1
?
vin1_opt
:
vld1q_f16
(
input1
);
float16x8_t
vout
=
vbslq_f16
(
vceqq_f16
(
vin0
,
vin1
),
vfalse
,
vtrue
);
vst1q_f16
(
output
,
vout
);
float16x8_t
vin0
=
vin0_opt
;
float16x8_t
vin1
=
vld1q_f16
(
input1
);
float16x8_t
vout
=
vbslq_f16
(
vceqq_f16
(
vin0
,
vin1
),
vfalse
,
vtrue
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
i
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
i
];
output
[
i
]
=
(
float16_t
)(
in0
!=
in1
);
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
(
float16_t
)(
in0_opt
!=
input1
[
i
]);
}
#endif
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
(
float16_t
)(
in0_opt
!=
input1
[
index
]);
}
}
else
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
vld1q_f16
(
input0
);
float16x8_t
vin1
=
vin1_opt
;
float16x8_t
vout
=
vbslq_f16
(
vceqq_f16
(
vin0
,
vin1
),
vfalse
,
vtrue
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
(
float16_t
)(
input0
[
i
]
!=
in1_opt
);
}
#endif
input0
+=
C8NUM
;
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
index
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
index
];
output
[
index
]
=
(
float16_t
)(
in0
!=
in1
);
input0
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
(
float16_t
)(
input0
[
index
]
!=
in1_opt
);
}
}
return
NNACL_OK
;
}
...
...
@@ -1418,35 +1576,50 @@ int ElementOptEqualFp16(float16_t *input0, float16_t *input1, float16_t *output,
ArithmeticParameter
*
param
)
{
int
block_mod
=
element_size
%
C8NUM
;
int
block_c8
=
element_size
-
block_mod
;
float16_t
in0_opt
=
input0
[
0
];
float16_t
in1_opt
=
input1
[
0
];
#ifdef ENABLE_NEON
float16x8_t
vin0_opt
=
{
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
]};
float16x8_t
vin1_opt
=
{
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
]};
float16_t
in0_opt
=
input0
[
0
];
float16_t
in1_opt
=
input1
[
0
];
float16x8_t
vtrue
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
};
float16x8_t
vfalse
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
#endif
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
if
(
param
->
in_elements_num0_
==
1
)
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
param
->
in_elements_num0_
==
1
?
vin0_opt
:
vld1q_f16
(
input0
)
;
float16x8_t
vin1
=
param
->
in_elements_num1_
==
1
?
vin1_opt
:
vld1q_f16
(
input1
);
float16x8_t
vout
=
vbslq_f16
(
vceqq_f16
(
vin0
,
vin1
),
vtrue
,
vfalse
);
vst1q_f16
(
output
,
vout
);
float16x8_t
vin0
=
vin0_opt
;
float16x8_t
vin1
=
vld1q_f16
(
input1
);
float16x8_t
vout
=
vbslq_f16
(
vceqq_f16
(
vin0
,
vin1
),
vtrue
,
vfalse
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
i
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
i
];
output
[
i
]
=
(
float16_t
)(
in0
==
in1
);
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
(
float16_t
)(
in0_opt
==
input1
[
i
]);
}
#endif
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
(
float16_t
)(
in0_opt
==
input1
[
index
]);
}
}
else
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
vld1q_f16
(
input0
);
float16x8_t
vin1
=
vin1_opt
;
float16x8_t
vout
=
vbslq_f16
(
vceqq_f16
(
vin0
,
vin1
),
vtrue
,
vfalse
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
(
float16_t
)(
input0
[
i
]
==
in1_opt
);
}
#endif
input0
+=
C8NUM
;
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
index
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
index
];
output
[
index
]
=
(
float16_t
)(
in0
==
in1
);
input0
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
(
float16_t
)(
input0
[
index
]
==
in1_opt
);
}
}
return
NNACL_OK
;
}
...
...
@@ -1482,35 +1655,50 @@ int ElementOptLessFp16(float16_t *input0, float16_t *input1, float16_t *output,
ArithmeticParameter
*
param
)
{
int
block_mod
=
element_size
%
C8NUM
;
int
block_c8
=
element_size
-
block_mod
;
float16_t
in0_opt
=
input0
[
0
];
float16_t
in1_opt
=
input1
[
0
];
#ifdef ENABLE_NEON
float16x8_t
vin0_opt
=
{
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
]};
float16x8_t
vin1_opt
=
{
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
]};
float16_t
in0_opt
=
input0
[
0
];
float16_t
in1_opt
=
input1
[
0
];
float16x8_t
vtrue
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
};
float16x8_t
vfalse
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
#endif
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
if
(
param
->
in_elements_num0_
==
1
)
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
param
->
in_elements_num0_
==
1
?
vin0_opt
:
vld1q_f16
(
input0
)
;
float16x8_t
vin1
=
param
->
in_elements_num1_
==
1
?
vin1_opt
:
vld1q_f16
(
input1
);
float16x8_t
vout
=
vbslq_f16
(
vcltq_f16
(
vin0
,
vin1
),
vtrue
,
vfalse
);
vst1q_f16
(
output
,
vout
);
float16x8_t
vin0
=
vin0_opt
;
float16x8_t
vin1
=
vld1q_f16
(
input1
);
float16x8_t
vout
=
vbslq_f16
(
vcltq_f16
(
vin0
,
vin1
),
vtrue
,
vfalse
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
i
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
i
];
output
[
i
]
=
(
float16_t
)(
in0
<
in1
);
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
(
float16_t
)(
in0_opt
<
input1
[
i
]);
}
#endif
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
(
float16_t
)(
in0_opt
<
input1
[
index
]);
}
}
else
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
vld1q_f16
(
input0
);
float16x8_t
vin1
=
vin1_opt
;
float16x8_t
vout
=
vbslq_f16
(
vcltq_f16
(
vin0
,
vin1
),
vtrue
,
vfalse
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
(
float16_t
)(
input0
[
i
]
<
in1_opt
);
}
#endif
input0
+=
C8NUM
;
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
index
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
index
];
output
[
index
]
=
(
float16_t
)(
in0
<
in1
);
input0
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
(
float16_t
)(
input0
[
index
]
<
in1_opt
);
}
}
return
NNACL_OK
;
}
...
...
@@ -1546,35 +1734,50 @@ int ElementOptLessEqualFp16(float16_t *input0, float16_t *input1, float16_t *out
ArithmeticParameter
*
param
)
{
int
block_mod
=
element_size
%
C8NUM
;
int
block_c8
=
element_size
-
block_mod
;
float16_t
in0_opt
=
input0
[
0
];
float16_t
in1_opt
=
input1
[
0
];
#ifdef ENABLE_NEON
float16x8_t
vin0_opt
=
{
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
]};
float16x8_t
vin1_opt
=
{
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
]};
float16_t
in0_opt
=
input0
[
0
];
float16_t
in1_opt
=
input1
[
0
];
float16x8_t
vtrue
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
};
float16x8_t
vfalse
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
#endif
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
if
(
param
->
in_elements_num0_
==
1
)
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
param
->
in_elements_num0_
==
1
?
vin0_opt
:
vld1q_f16
(
input0
)
;
float16x8_t
vin1
=
param
->
in_elements_num1_
==
1
?
vin1_opt
:
vld1q_f16
(
input1
);
float16x8_t
vout
=
vbslq_f16
(
vcleq_f16
(
vin0
,
vin1
),
vtrue
,
vfalse
);
vst1q_f16
(
output
,
vout
);
float16x8_t
vin0
=
vin0_opt
;
float16x8_t
vin1
=
vld1q_f16
(
input1
);
float16x8_t
vout
=
vbslq_f16
(
vcleq_f16
(
vin0
,
vin1
),
vtrue
,
vfalse
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
i
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
i
];
output
[
i
]
=
(
float16_t
)(
in0
<=
in1
);
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
(
float16_t
)(
in0_opt
<=
input1
[
i
]);
}
#endif
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
(
float16_t
)(
in0_opt
<=
input1
[
index
]);
}
}
else
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
vld1q_f16
(
input0
);
float16x8_t
vin1
=
vin1_opt
;
float16x8_t
vout
=
vbslq_f16
(
vcleq_f16
(
vin0
,
vin1
),
vtrue
,
vfalse
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
(
float16_t
)(
input0
[
i
]
<=
in1_opt
);
}
#endif
input0
+=
C8NUM
;
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
index
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
index
];
output
[
index
]
=
(
float16_t
)(
in0
<=
in1
);
input0
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
(
float16_t
)(
input0
[
index
]
<=
in1_opt
);
}
}
return
NNACL_OK
;
}
...
...
@@ -1610,35 +1813,50 @@ int ElementOptGreaterFp16(float16_t *input0, float16_t *input1, float16_t *outpu
ArithmeticParameter
*
param
)
{
int
block_mod
=
element_size
%
C8NUM
;
int
block_c8
=
element_size
-
block_mod
;
float16_t
in0_opt
=
input0
[
0
];
float16_t
in1_opt
=
input1
[
0
];
#ifdef ENABLE_NEON
float16x8_t
vin0_opt
=
{
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
]};
float16x8_t
vin1_opt
=
{
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
]};
float16_t
in0_opt
=
input0
[
0
];
float16_t
in1_opt
=
input1
[
0
];
float16x8_t
vtrue
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
};
float16x8_t
vfalse
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
#endif
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
if
(
param
->
in_elements_num0_
==
1
)
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
param
->
in_elements_num0_
==
1
?
vin0_opt
:
vld1q_f16
(
input0
)
;
float16x8_t
vin1
=
param
->
in_elements_num1_
==
1
?
vin1_opt
:
vld1q_f16
(
input1
);
float16x8_t
vout
=
vbslq_f16
(
vcgtq_f16
(
vin0
,
vin1
),
vtrue
,
vfalse
);
vst1q_f16
(
output
,
vout
);
float16x8_t
vin0
=
vin0_opt
;
float16x8_t
vin1
=
vld1q_f16
(
input1
);
float16x8_t
vout
=
vbslq_f16
(
vcgtq_f16
(
vin0
,
vin1
),
vtrue
,
vfalse
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
i
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
i
];
output
[
i
]
=
(
float16_t
)(
in0
>
in1
);
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
(
float16_t
)(
in0_opt
>
input1
[
i
]);
}
#endif
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
(
float16_t
)(
in0_opt
>
input1
[
index
]);
}
}
else
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
vld1q_f16
(
input0
);
float16x8_t
vin1
=
vin1_opt
;
float16x8_t
vout
=
vbslq_f16
(
vcgtq_f16
(
vin0
,
vin1
),
vtrue
,
vfalse
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
(
float16_t
)(
input0
[
i
]
>
in1_opt
);
}
#endif
input0
+=
C8NUM
;
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
index
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
index
];
output
[
index
]
=
(
float16_t
)(
in0
>
in1
);
input0
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
(
float16_t
)(
input0
[
index
]
>
in1_opt
);
}
}
return
NNACL_OK
;
}
...
...
@@ -1674,35 +1892,50 @@ int ElementOptGreaterEqualFp16(float16_t *input0, float16_t *input1, float16_t *
ArithmeticParameter
*
param
)
{
int
block_mod
=
element_size
%
C8NUM
;
int
block_c8
=
element_size
-
block_mod
;
float16_t
in0_opt
=
input0
[
0
];
float16_t
in1_opt
=
input1
[
0
];
#ifdef ENABLE_NEON
float16x8_t
vin0_opt
=
{
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
],
input0
[
0
]};
float16x8_t
vin1_opt
=
{
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
],
input1
[
0
]};
float16_t
in0_opt
=
input0
[
0
];
float16_t
in1_opt
=
input1
[
0
];
float16x8_t
vtrue
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
};
float16x8_t
vfalse
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
#endif
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
if
(
param
->
in_elements_num0_
==
1
)
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
param
->
in_elements_num0_
==
1
?
vin0_opt
:
vld1q_f16
(
input0
)
;
float16x8_t
vin1
=
param
->
in_elements_num1_
==
1
?
vin1_opt
:
vld1q_f16
(
input1
);
float16x8_t
vout
=
vbslq_f16
(
vcgeq_f16
(
vin0
,
vin1
),
vtrue
,
vfalse
);
vst1q_f16
(
output
,
vout
);
float16x8_t
vin0
=
vin0_opt
;
float16x8_t
vin1
=
vld1q_f16
(
input1
);
float16x8_t
vout
=
vbslq_f16
(
vcgeq_f16
(
vin0
,
vin1
),
vtrue
,
vfalse
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
i
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
i
];
output
[
i
]
=
(
float16_t
)(
in0
>=
in1
);
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
(
float16_t
)(
in0_opt
>=
input1
[
i
]);
}
#endif
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
(
float16_t
)(
in0_opt
>=
input1
[
index
]);
}
}
else
{
for
(
int
index
=
0
;
index
<
block_c8
;
index
+=
C8NUM
)
{
#ifdef ENABLE_NEON
float16x8_t
vin0
=
vld1q_f16
(
input0
);
float16x8_t
vin1
=
vin1_opt
;
float16x8_t
vout
=
vbslq_f16
(
vcgeq_f16
(
vin0
,
vin1
),
vtrue
,
vfalse
);
vst1q_f16
(
output
,
vout
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
++
i
)
{
output
[
i
]
=
(
float16_t
)(
input0
[
i
]
>=
in1_opt
);
}
#endif
input0
+=
C8NUM
;
input1
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
float16_t
in0
=
param
->
in_elements_num0_
==
1
?
in0_opt
:
input0
[
index
];
float16_t
in1
=
param
->
in_elements_num1_
==
1
?
in1_opt
:
input1
[
index
];
output
[
index
]
=
(
float16_t
)(
in0
>=
in1
);
input0
+=
C8NUM
;
output
+=
C8NUM
;
}
for
(
int
index
=
0
;
index
<
block_mod
;
++
index
)
{
output
[
index
]
=
(
float16_t
)(
input0
[
index
]
>=
in1_opt
);
}
}
return
NNACL_OK
;
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录