Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
147c7457
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
147c7457
编写于
8月 19, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 19, 2020
浏览文件
操作
浏览文件
下载
差异文件
!4671 modify arm cpu fp16 op: arithmetic
Merge pull request !4671 from 陶云浩/lite
上级
4d7304b8
d1c8f967
变更
3
展开全部
隐藏空白更改
内联
并排
Showing
3 changed file
with
1231 addition
and
252 deletion
+1231
-252
mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
...spore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
+111
-17
mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/arithmetic_fp16.c
.../lite/src/runtime/kernel/arm/nnacl/fp16/arithmetic_fp16.c
+1074
-234
mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/arithmetic_fp16.h
.../lite/src/runtime/kernel/arm/nnacl/fp16/arithmetic_fp16.h
+46
-1
未找到文件。
mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
浏览文件 @
147c7457
...
@@ -195,23 +195,104 @@ int ArithmeticFP16CPUKernel::ReSize() {
...
@@ -195,23 +195,104 @@ int ArithmeticFP16CPUKernel::ReSize() {
}
}
if
(
arithmeticParameter_
->
in_elements_num0_
==
1
||
arithmeticParameter_
->
in_elements_num1_
==
1
)
{
if
(
arithmeticParameter_
->
in_elements_num0_
==
1
||
arithmeticParameter_
->
in_elements_num1_
==
1
)
{
if
(
arithmeticParameter_
->
activation_type_
==
schema
::
ActivationType_NO_ACTIVATION
)
{
switch
(
arithmeticParameter_
->
op_parameter_
.
type_
)
{
switch
(
arithmeticParameter_
->
op_parameter_
.
type_
)
{
case
PrimitiveType_Mul
:
case
PrimitiveType_Mul
:
arithmeticParameter_
->
broadcasting_
=
false
;
arithmeticParameter_
->
broadcasting_
=
false
;
switch
(
arithmeticParameter_
->
activation_type_
)
{
arithmetic_opt_run_
=
ElementOptMulFp16
;
case
schema
::
ActivationType_RELU
:
break
;
arithmetic_opt_run_
=
ElementOptMulReluFp16
;
case
PrimitiveType_Add
:
break
;
arithmeticParameter_
->
broadcasting_
=
false
;
case
schema
::
ActivationType_RELU6
:
arithmetic_opt_run_
=
ElementOptAddFp16
;
arithmetic_opt_run_
=
ElementOptDivRelu6Fp16
;
break
;
break
;
case
PrimitiveType_Sub
:
default:
arithmeticParameter_
->
broadcasting_
=
false
;
arithmetic_opt_run_
=
ElementOptDivFp16
;
arithmetic_opt_run_
=
ElementOptSubFp16
;
break
;
break
;
}
default:
break
;
break
;
case
PrimitiveType_Add
:
}
arithmeticParameter_
->
broadcasting_
=
false
;
switch
(
arithmeticParameter_
->
activation_type_
)
{
case
schema
::
ActivationType_RELU
:
arithmetic_opt_run_
=
ElementOptAddReluFp16
;
break
;
case
schema
::
ActivationType_RELU6
:
arithmetic_opt_run_
=
ElementOptAddRelu6Fp16
;
break
;
default:
arithmetic_opt_run_
=
ElementOptAddFp16
;
break
;
}
break
;
case
PrimitiveType_Sub
:
arithmeticParameter_
->
broadcasting_
=
false
;
switch
(
arithmeticParameter_
->
activation_type_
)
{
case
schema
::
ActivationType_RELU
:
arithmetic_opt_run_
=
ElementOptSubReluFp16
;
break
;
case
schema
::
ActivationType_RELU6
:
arithmetic_opt_run_
=
ElementOptSubRelu6Fp16
;
break
;
default:
arithmetic_opt_run_
=
ElementOptSubFp16
;
break
;
}
break
;
case
PrimitiveType_Div
:
arithmeticParameter_
->
broadcasting_
=
false
;
switch
(
arithmeticParameter_
->
activation_type_
)
{
case
schema
::
ActivationType_RELU
:
arithmetic_opt_run_
=
ElementOptDivReluFp16
;
break
;
case
schema
::
ActivationType_RELU6
:
arithmetic_opt_run_
=
ElementOptDivRelu6Fp16
;
break
;
default:
arithmetic_opt_run_
=
ElementOptDivFp16
;
break
;
}
break
;
case
PrimitiveType_FloorMod
:
arithmeticParameter_
->
broadcasting_
=
false
;
arithmetic_opt_run_
=
ElementOptFloorModFp16
;
case
PrimitiveType_FloorDiv
:
arithmeticParameter_
->
broadcasting_
=
false
;
arithmetic_opt_run_
=
ElementOptFloorDivFp16
;
case
PrimitiveType_LogicalAnd
:
arithmeticParameter_
->
broadcasting_
=
false
;
arithmetic_opt_run_
=
ElementOptLogicalAndFp16
;
case
PrimitiveType_LogicalOr
:
arithmeticParameter_
->
broadcasting_
=
false
;
arithmetic_opt_run_
=
ElementOptLogicalOrFp16
;
case
PrimitiveType_SquaredDifference
:
arithmeticParameter_
->
broadcasting_
=
false
;
arithmetic_opt_run_
=
ElementOptSquaredDifferenceFp16
;
case
PrimitiveType_Maximum
:
arithmeticParameter_
->
broadcasting_
=
false
;
arithmetic_opt_run_
=
ElementOptMaximumFp16
;
case
PrimitiveType_Minimum
:
arithmeticParameter_
->
broadcasting_
=
false
;
arithmetic_opt_run_
=
ElementOptMinimumFp16
;
case
PrimitiveType_NotEqual
:
arithmeticParameter_
->
broadcasting_
=
false
;
arithmetic_opt_run_
=
ElementOptNotEqualFp16
;
case
PrimitiveType_Equal
:
arithmeticParameter_
->
broadcasting_
=
false
;
arithmetic_opt_run_
=
ElementOptEqualFp16
;
case
PrimitiveType_Less
:
arithmeticParameter_
->
broadcasting_
=
false
;
arithmetic_opt_run_
=
ElementOptLessFp16
;
case
PrimitiveType_LessEqual
:
arithmeticParameter_
->
broadcasting_
=
false
;
arithmetic_opt_run_
=
ElementOptLessEqualFp16
;
case
PrimitiveType_Greater
:
arithmeticParameter_
->
broadcasting_
=
false
;
arithmetic_opt_run_
=
ElementOptGreaterFp16
;
case
PrimitiveType_GreaterEqual
:
arithmeticParameter_
->
broadcasting_
=
false
;
arithmetic_opt_run_
=
ElementOptGreaterEqualFp16
;
default:
break
;
}
}
}
}
...
@@ -334,4 +415,17 @@ kernel::LiteKernel *CpuArithmeticFp16KernelCreator(const std::vector<lite::tenso
...
@@ -334,4 +415,17 @@ kernel::LiteKernel *CpuArithmeticFp16KernelCreator(const std::vector<lite::tenso
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_Mul
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_Mul
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_Add
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_Add
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_Sub
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_Sub
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_Div
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_FloorMod
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_FloorDiv
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_LogicalAnd
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_LogicalOr
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_Maximum
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_Minimum
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_NotEqual
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_Equal
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_Less
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_LessEqual
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_Greater
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_GreaterEqual
,
CpuArithmeticFp16KernelCreator
)
}
// namespace mindspore::kernel
}
// namespace mindspore::kernel
mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/arithmetic_fp16.c
浏览文件 @
147c7457
此差异已折叠。
点击以展开。
mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/arithmetic_fp16.h
浏览文件 @
147c7457
...
@@ -26,12 +26,57 @@
...
@@ -26,12 +26,57 @@
#ifdef __cplusplus
#ifdef __cplusplus
extern
"C"
{
extern
"C"
{
#endif
#endif
int
ElementOptMulFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
int
ElementOptMulReluFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
int
ElementOptMulRelu6Fp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
int
ElementOptAddFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
int
ElementOptAddFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
ArithmeticParameter
*
param
);
int
ElementOptAddReluFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
int
ElementOptAddRelu6Fp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
int
ElementOptSubFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
int
ElementOptSubFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
ArithmeticParameter
*
param
);
int
ElementOptMulFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
int
ElementOptSubReluFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
int
ElementOptSubRelu6Fp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
int
ElementOptDivFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
ArithmeticParameter
*
param
);
int
ElementOptDivReluFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
int
ElementOptDivRelu6Fp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
int
ElementOptFloorModFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
int
ElementOptFloorDivFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
int
ElementOptLogicalAndFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
int
ElementOptLogicalOrFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
int
ElementOptSquaredDifferenceFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
int
ElementOptMaximumFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
int
ElementOptMinimumFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
int
ElementOptNotEqualFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
int
ElementOptEqualFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
int
ElementOptLessFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
int
ElementOptLessEqualFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
int
ElementOptGreaterFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
int
ElementOptGreaterEqualFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
,
ArithmeticParameter
*
param
);
int
ElementMulFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
);
int
ElementMulFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
);
int
ElementMulReluFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
);
int
ElementMulReluFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
);
int
ElementMulRelu6Fp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
);
int
ElementMulRelu6Fp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录