Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
5613116c
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5613116c
编写于
4年前
作者:
M
mindspore-ci-bot
提交者:
Gitee
4年前
浏览文件
操作
浏览文件
下载
差异文件
!4201 modify CPU op arithmetic fp16
Merge pull request !4201 from 陶云浩/test
上级
e6ffdeeb
0bbb6420
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
25 addition
and
39 deletion
+25
-39
mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
...spore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
+25
-37
mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.h
mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.h
+0
-2
未找到文件。
mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
浏览文件 @
5613116c
...
...
@@ -48,14 +48,6 @@ using mindspore::schema::PrimitiveType_Sub;
namespace
mindspore
::
kernel
{
void
ArithmeticFP16CPUKernel
::
FreeTmpBuffer
()
{
if
(
tile_data0_
!=
nullptr
)
{
free
(
tile_data0_
);
tile_data0_
=
nullptr
;
}
if
(
tile_data1_
!=
nullptr
)
{
free
(
tile_data1_
);
tile_data1_
=
nullptr
;
}
if
(
input0_fp16_
!=
nullptr
)
{
context_
->
allocator
->
Free
(
input0_fp16_
);
input0_fp16_
=
nullptr
;
...
...
@@ -70,7 +62,7 @@ void ArithmeticFP16CPUKernel::FreeTmpBuffer() {
}
}
ArithmeticFP16CPUKernel
::~
ArithmeticFP16CPUKernel
()
{
FreeTmpBuffer
();
}
ArithmeticFP16CPUKernel
::~
ArithmeticFP16CPUKernel
()
{}
int
ArithmeticFP16CPUKernel
::
Init
()
{
switch
(
op_parameter_
->
type_
)
{
...
...
@@ -162,7 +154,6 @@ int ArithmeticFP16CPUKernel::Init() {
}
int
ArithmeticFP16CPUKernel
::
ReSize
()
{
FreeTmpBuffer
();
arithmeticParameter_
->
in_elements_num0_
=
in_tensors_
[
0
]
->
ElementsNum
();
arithmeticParameter_
->
in_elements_num1_
=
in_tensors_
[
1
]
->
ElementsNum
();
arithmeticParameter_
->
out_elements_num_
=
out_tensors_
[
0
]
->
ElementsNum
();
...
...
@@ -286,7 +277,7 @@ int ArithmeticFP16CPUKernel::ReSize() {
}
int
ArithmeticFP16CPUKernel
::
BroadcastRun
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
dim
,
int
out_count
,
int
out_thread_stride
)
{
int
out_count
,
int
out_thread_stride
)
{
if
(
dim
>
break_pos_
)
{
int
error_code
=
arithmetic_run_
(
input0
+
out_thread_stride
,
input1
+
out_thread_stride
,
output
+
out_thread_stride
,
out_count
);
...
...
@@ -303,8 +294,8 @@ int ArithmeticFP16CPUKernel::BroadcastRun(float16_t *input0, float16_t *input1,
int
pos1_
=
arithmeticParameter_
->
in_shape1_
[
dim
]
==
1
?
0
:
i
;
int
error_code
=
BroadcastRun
(
input0
+
pos0_
*
arithmeticParameter_
->
in_strides0_
[
dim
],
input1
+
pos1_
*
arithmeticParameter_
->
in_strides1_
[
dim
],
output
+
i
*
arithmeticParameter_
->
out_strides_
[
dim
],
dim
+
1
,
out_count
,
out_thread_stride
);
input1
+
pos1_
*
arithmeticParameter_
->
in_strides1_
[
dim
],
output
+
i
*
arithmeticParameter_
->
out_strides_
[
dim
],
dim
+
1
,
out_count
,
out_thread_stride
);
if
(
error_code
!=
RET_OK
)
{
return
RET_ERROR
;
}
...
...
@@ -327,7 +318,6 @@ int ArithmeticFP16CPUKernel::DoArithmetic(int task_id) {
if
(
arithmetic_run_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"arithmetic_run function is nullptr!"
;
FreeTmpBuffer
();
return
RET_ERROR
;
}
...
...
@@ -383,8 +373,7 @@ int ArithmeticFP16CPUKernel::Run() {
arithmeticParameter_
->
in_elements_num1_
=
in_tensors_
[
1
]
->
ElementsNum
();
arithmeticParameter_
->
out_elements_num_
=
out_tensors_
[
0
]
->
ElementsNum
();
if
(
out_tensors_
[
0
]
->
data_type
()
==
kNumberTypeFloat32
||
out_tensors_
[
0
]
->
data_type
()
==
kNumberTypeFloat
)
{
output_fp16_
=
reinterpret_cast
<
float16_t
*>
(
context_
->
allocator
->
Malloc
(
arithmeticParameter_
->
out_elements_num_
*
sizeof
(
float16_t
)));
output_fp16_
=
reinterpret_cast
<
float16_t
*>
(
malloc
(
arithmeticParameter_
->
out_elements_num_
*
sizeof
(
float16_t
)));
if
(
output_fp16_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"malloc data fail!"
;
FreeTmpBuffer
();
...
...
@@ -392,8 +381,7 @@ int ArithmeticFP16CPUKernel::Run() {
}
}
if
(
in_tensors_
[
0
]
->
data_type
()
==
kNumberTypeFloat32
||
in_tensors_
[
0
]
->
data_type
()
==
kNumberTypeFloat
)
{
input0_fp16_
=
reinterpret_cast
<
float16_t
*>
(
context_
->
allocator
->
Malloc
(
arithmeticParameter_
->
in_elements_num0_
*
sizeof
(
float16_t
)));
input0_fp16_
=
reinterpret_cast
<
float16_t
*>
(
malloc
(
arithmeticParameter_
->
in_elements_num0_
*
sizeof
(
float16_t
)));
if
(
input0_fp16_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"malloc data fail!"
;
FreeTmpBuffer
();
...
...
@@ -403,8 +391,7 @@ int ArithmeticFP16CPUKernel::Run() {
arithmeticParameter_
->
in_elements_num0_
);
}
if
(
in_tensors_
[
1
]
->
data_type
()
==
kNumberTypeFloat32
||
in_tensors_
[
1
]
->
data_type
()
==
kNumberTypeFloat
)
{
input1_fp16_
=
reinterpret_cast
<
float16_t
*>
(
context_
->
allocator
->
Malloc
(
arithmeticParameter_
->
in_elements_num0_
*
sizeof
(
float16_t
)));
input1_fp16_
=
reinterpret_cast
<
float16_t
*>
(
malloc
(
arithmeticParameter_
->
in_elements_num0_
*
sizeof
(
float16_t
)));
if
(
input1_fp16_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"malloc data fail!"
;
FreeTmpBuffer
();
...
...
@@ -414,6 +401,7 @@ int ArithmeticFP16CPUKernel::Run() {
arithmeticParameter_
->
in_elements_num1_
);
}
ret
=
ParallelLaunch
(
THREAD_POOL_DEFAULT
,
ArithmeticsRun_Fp16
,
this
,
context_
->
thread_num_
);
FreeTmpBuffer
();
return
ret
;
}
...
...
@@ -441,21 +429,21 @@ kernel::LiteKernel *CpuArithmeticFp16KernelCreator(const std::vector<lite::tenso
return
kernel
;
}
//
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Mul, CpuArithmeticFp16KernelCreator)
//
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Add, CpuArithmeticFp16KernelCreator)
//
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Sub, CpuArithmeticFp16KernelCreator)
//
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Div, CpuArithmeticFp16KernelCreator)
//
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_FloorMod, CpuArithmeticFp16KernelCreator)
//
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_FloorDiv, CpuArithmeticFp16KernelCreator)
//
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_LogicalAnd, CpuArithmeticFp16KernelCreator)
//
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_LogicalOr, CpuArithmeticFp16KernelCreator)
//
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Maximum, CpuArithmeticFp16KernelCreator)
//
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Minimum, CpuArithmeticFp16KernelCreator)
//
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_NotEqual, CpuArithmeticFp16KernelCreator)
//
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Equal, CpuArithmeticFp16KernelCreator)
//
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Less, CpuArithmeticFp16KernelCreator)
//
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_LessEqual, CpuArithmeticFp16KernelCreator)
//
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Greater, CpuArithmeticFp16KernelCreator)
//
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_GreaterEqual, CpuArithmeticFp16KernelCreator)
//
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Eltwise, CpuArithmeticFp16KernelCreator)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_Mul
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_Add
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_Sub
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_Div
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_FloorMod
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_FloorDiv
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_LogicalAnd
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_LogicalOr
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_Maximum
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_Minimum
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_NotEqual
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_Equal
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_Less
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_LessEqual
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_Greater
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_GreaterEqual
,
CpuArithmeticFp16KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_Eltwise
,
CpuArithmeticFp16KernelCreator
)
}
// namespace mindspore::kernel
This diff is collapsed.
Click to expand it.
mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.h
浏览文件 @
5613116c
...
...
@@ -50,8 +50,6 @@ class ArithmeticFP16CPUKernel : public LiteKernel {
int
break_pos_
;
int
out_thread_stride_
;
int
out_count_
;
float16_t
*
tile_data0_
=
nullptr
;
float16_t
*
tile_data1_
=
nullptr
;
float16_t
*
input0_fp16_
=
nullptr
;
float16_t
*
input1_fp16_
=
nullptr
;
float16_t
*
output_fp16_
=
nullptr
;
...
...
This diff is collapsed.
Click to expand it.
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录
新手
引导
客服
返回
顶部