Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
4e63cccc
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
4e63cccc
编写于
8月 20, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 20, 2020
浏览文件
操作
浏览文件
下载
差异文件
!4837 modify arm cpu fp16 & fp32 op: arithmetic
Merge pull request !4837 from 陶云浩/arithmetic0
上级
98052f9d
c31ad9d5
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
87 addition
and
43 deletion
+87
-43
mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
...spore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
+58
-43
mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/arithmetic_fp16.c
.../lite/src/runtime/kernel/arm/nnacl/fp16/arithmetic_fp16.c
+27
-0
mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/arithmetic_fp16.h
.../lite/src/runtime/kernel/arm/nnacl/fp16/arithmetic_fp16.h
+2
-0
未找到文件。
mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
浏览文件 @
4e63cccc
...
...
@@ -162,34 +162,9 @@ int ArithmeticFP16CPUKernel::Init() {
}
int
ArithmeticFP16CPUKernel
::
ReSize
()
{
FreeTmpBuffer
();
arithmeticParameter_
->
in_elements_num0_
=
in_tensors_
[
0
]
->
ElementsNum
();
arithmeticParameter_
->
in_elements_num1_
=
in_tensors_
[
1
]
->
ElementsNum
();
arithmeticParameter_
->
out_elements_num_
=
out_tensors_
[
0
]
->
ElementsNum
();
if
(
in_tensors_
[
0
]
->
data_type
()
==
kNumberTypeFloat32
||
in_tensors_
[
0
]
->
data_type
()
==
kNumberTypeFloat
)
{
input0_fp16_
=
reinterpret_cast
<
float16_t
*>
(
context_
->
allocator
->
Malloc
(
arithmeticParameter_
->
in_elements_num0_
*
sizeof
(
float16_t
)));
if
(
input0_fp16_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"malloc data fail!"
;
return
RET_ERROR
;
}
}
if
(
in_tensors_
[
1
]
->
data_type
()
==
kNumberTypeFloat32
||
in_tensors_
[
1
]
->
data_type
()
==
kNumberTypeFloat
)
{
input1_fp16_
=
reinterpret_cast
<
float16_t
*>
(
context_
->
allocator
->
Malloc
(
arithmeticParameter_
->
in_elements_num1_
*
sizeof
(
float16_t
)));
if
(
input0_fp16_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"malloc data fail!"
;
return
RET_ERROR
;
}
}
if
(
out_tensors_
[
0
]
->
data_type
()
==
kNumberTypeFloat32
||
out_tensors_
[
0
]
->
data_type
()
==
kNumberTypeFloat
)
{
output_fp16_
=
reinterpret_cast
<
float16_t
*>
(
context_
->
allocator
->
Malloc
(
arithmeticParameter_
->
out_elements_num_
*
sizeof
(
float16_t
)));
if
(
output_fp16_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"malloc data fail!"
;
return
RET_ERROR
;
}
}
if
(
arithmeticParameter_
->
in_elements_num0_
==
1
||
arithmeticParameter_
->
in_elements_num1_
==
1
)
{
switch
(
arithmeticParameter_
->
op_parameter_
.
type_
)
{
...
...
@@ -292,20 +267,6 @@ int ArithmeticFP16CPUKernel::ReSize() {
break
;
}
}
if
(
arithmeticParameter_
->
broadcasting_
)
{
outside_
=
1
;
for
(
int
i
=
arithmeticParameter_
->
ndim_
-
1
;
i
>=
0
;
--
i
)
{
if
(
arithmeticParameter_
->
in_shape0_
[
i
]
!=
arithmeticParameter_
->
in_shape1_
[
i
])
{
break_pos_
=
i
;
break
;
}
outside_
*=
arithmeticParameter_
->
out_shape_
[
i
];
}
ComputeStrides
(
arithmeticParameter_
->
in_shape0_
,
arithmeticParameter_
->
in_strides0_
,
arithmeticParameter_
->
ndim_
);
ComputeStrides
(
arithmeticParameter_
->
in_shape1_
,
arithmeticParameter_
->
in_strides1_
,
arithmeticParameter_
->
ndim_
);
ComputeStrides
(
arithmeticParameter_
->
out_shape_
,
arithmeticParameter_
->
out_strides_
,
arithmeticParameter_
->
ndim_
);
}
return
RET_OK
;
}
...
...
@@ -344,10 +305,8 @@ int ArithmeticFP16CPUKernel::DoArithmetic(int task_id) {
int
error_code
=
RET_OK
;
if
(
arithmeticParameter_
->
broadcasting_
)
{
stride
=
UP_DIV
(
outside_
,
context_
->
thread_num_
);
out_count_
=
MSMIN
(
stride
,
outside_
-
stride
*
task_id
);
out_thread_stride_
=
stride
*
task_id
;
error_code
=
broadcast_run_
(
input0_data
,
input1_data1
,
output_data
,
0
);
error_code
=
arithmetic_run_
(
tile_data0_
+
thread_stride
,
tile_data1_
+
thread_stride
,
output_data
+
thread_stride
,
count
);
}
else
if
(
arithmetic_opt_run_
!=
nullptr
)
{
if
(
arithmeticParameter_
->
in_elements_num0_
==
1
)
{
error_code
=
arithmetic_opt_run_
(
input0_data
,
input1_data1
+
thread_stride
,
output_data
+
thread_stride
,
count
,
...
...
@@ -364,6 +323,7 @@ int ArithmeticFP16CPUKernel::DoArithmetic(int task_id) {
arithmetic_run_
(
input0_data
+
thread_stride
,
input1_data1
+
thread_stride
,
output_data
+
thread_stride
,
count
);
}
if
(
error_code
!=
RET_OK
)
{
FreeTmpBuffer
();
return
RET_ERROR
;
}
if
(
output_fp16_
!=
nullptr
)
{
...
...
@@ -390,6 +350,37 @@ int ArithmeticFP16CPUKernel::Run() {
return
ret
;
}
arithmeticParameter_
->
in_elements_num0_
=
in_tensors_
[
0
]
->
ElementsNum
();
arithmeticParameter_
->
in_elements_num1_
=
in_tensors_
[
1
]
->
ElementsNum
();
arithmeticParameter_
->
out_elements_num_
=
out_tensors_
[
0
]
->
ElementsNum
();
if
(
in_tensors_
[
0
]
->
data_type
()
==
kNumberTypeFloat32
||
in_tensors_
[
0
]
->
data_type
()
==
kNumberTypeFloat
)
{
input0_fp16_
=
reinterpret_cast
<
float16_t
*>
(
context_
->
allocator
->
Malloc
(
arithmeticParameter_
->
in_elements_num0_
*
sizeof
(
float16_t
)));
if
(
input0_fp16_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"malloc data fail!"
;
FreeTmpBuffer
();
return
RET_ERROR
;
}
}
if
(
in_tensors_
[
1
]
->
data_type
()
==
kNumberTypeFloat32
||
in_tensors_
[
1
]
->
data_type
()
==
kNumberTypeFloat
)
{
input1_fp16_
=
reinterpret_cast
<
float16_t
*>
(
context_
->
allocator
->
Malloc
(
arithmeticParameter_
->
in_elements_num1_
*
sizeof
(
float16_t
)));
if
(
input0_fp16_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"malloc data fail!"
;
FreeTmpBuffer
();
return
RET_ERROR
;
}
}
if
(
out_tensors_
[
0
]
->
data_type
()
==
kNumberTypeFloat32
||
out_tensors_
[
0
]
->
data_type
()
==
kNumberTypeFloat
)
{
output_fp16_
=
reinterpret_cast
<
float16_t
*>
(
context_
->
allocator
->
Malloc
(
arithmeticParameter_
->
out_elements_num_
*
sizeof
(
float16_t
)));
if
(
output_fp16_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"malloc data fail!"
;
FreeTmpBuffer
();
return
RET_ERROR
;
}
}
if
(
in_tensors_
[
0
]
->
data_type
()
==
kNumberTypeFloat32
||
in_tensors_
[
0
]
->
data_type
()
==
kNumberTypeFloat
)
{
Float32ToFloat16
(
reinterpret_cast
<
float
*>
(
in_tensors_
[
0
]
->
Data
()),
input0_fp16_
,
arithmeticParameter_
->
in_elements_num0_
);
...
...
@@ -399,9 +390,33 @@ int ArithmeticFP16CPUKernel::Run() {
arithmeticParameter_
->
in_elements_num1_
);
}
if
(
arithmeticParameter_
->
broadcasting_
)
{
auto
tile_size
=
arithmeticParameter_
->
out_elements_num_
*
sizeof
(
float16_t
);
tile_data0_
=
reinterpret_cast
<
float16_t
*>
(
malloc
(
tile_size
));
if
(
tile_data0_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"malloc data fail!"
;
FreeTmpBuffer
();
return
RET_ERROR
;
}
tile_data1_
=
reinterpret_cast
<
float16_t
*>
(
malloc
(
tile_size
));
if
(
tile_data1_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"malloc data fail!"
;
FreeTmpBuffer
();
return
RET_ERROR
;
}
auto
input0
=
reinterpret_cast
<
float16_t
*>
(
in_tensors_
[
0
]
->
Data
());
auto
input1
=
reinterpret_cast
<
float16_t
*>
(
in_tensors_
[
1
]
->
Data
());
float16_t
*
input0_data
=
input0_fp16_
==
nullptr
?
input0
:
input0_fp16_
;
float16_t
*
input1_data1
=
input1_fp16_
==
nullptr
?
input1
:
input1_fp16_
;
TileDimensionsFp16
(
input0_data
,
input1_data1
,
tile_data0_
,
tile_data1_
,
arithmeticParameter_
);
}
ret
=
LiteBackendParallelLaunch
(
ArithmeticsRun
,
this
,
context_
->
thread_num_
);
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Arithmetic function fail!ret: "
<<
ret
;
FreeTmpBuffer
();
return
ret
;
}
return
RET_OK
;
...
...
mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/arithmetic_fp16.c
浏览文件 @
4e63cccc
...
...
@@ -18,6 +18,33 @@
#include <math.h>
#include "nnacl/arithmetic_common.h"
void
TileOneDimensionFp16
(
float16_t
*
inData
,
float16_t
*
outData
,
int
dim
,
size_t
ndim
,
int
*
inShape
,
int
*
inStrides
,
int
*
outStrides
,
int
*
multiple
)
{
int
srcDimSize
=
inShape
[
dim
];
if
(
dim
==
ndim
-
1
)
{
for
(
int
i
=
0
;
i
<
multiple
[
dim
];
i
++
)
{
memcpy
(
outData
,
inData
,
srcDimSize
*
sizeof
(
float16_t
));
outData
+=
srcDimSize
;
}
return
;
}
for
(
size_t
i
=
0
;
i
<
srcDimSize
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
multiple
[
dim
];
j
++
)
{
TileOneDimensionFp16
(
inData
+
inStrides
[
dim
]
*
i
,
outData
+
outStrides
[
dim
]
*
(
i
+
j
*
srcDimSize
),
dim
+
1
,
ndim
,
inShape
,
inStrides
,
outStrides
,
multiple
);
}
}
}
void
TileDimensionsFp16
(
float16_t
*
data0
,
float16_t
*
data1
,
float16_t
*
tile_data0
,
float16_t
*
tile_data1
,
ArithmeticParameter
*
param
)
{
CalcMultiplesAndStrides
(
param
);
TileOneDimensionFp16
(
data0
,
tile_data0
,
0
,
param
->
ndim_
,
param
->
in_shape0_
,
param
->
in_strides0_
,
param
->
out_strides_
,
param
->
multiples0_
);
TileOneDimensionFp16
(
data1
,
tile_data1
,
0
,
param
->
ndim_
,
param
->
in_shape1_
,
param
->
in_strides1_
,
param
->
out_strides_
,
param
->
multiples1_
);
}
int
ElementMulFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
)
{
int
block_mod
=
element_size
%
C8NUM
;
int
block_c8
=
element_size
-
block_mod
;
...
...
mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/arithmetic_fp16.h
浏览文件 @
4e63cccc
...
...
@@ -111,6 +111,8 @@ int ElementLessEqual(float16_t *input0, float16_t *input1, float16_t *output, in
int
ElementGreaterFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
);
int
ElementGreaterEqualFp16
(
float16_t
*
input0
,
float16_t
*
input1
,
float16_t
*
output
,
int
element_size
);
void
TileDimensionsFp16
(
float16_t
*
data0
,
float16_t
*
data1
,
float16_t
*
tile_data0
,
float16_t
*
tile_data1
,
ArithmeticParameter
*
param
);
#ifdef __cplusplus
}
#endif
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录