Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
48883001
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
48883001
编写于
4年前
作者:
Y
yangruoqi713
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[MS][LITE] arm cpu fp32 op: optimize scale op
上级
2de21696
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
73 addition
and
56 deletion
+73
-56
mindspore/lite/nnacl/scale.c
mindspore/lite/nnacl/scale.c
+56
-26
mindspore/lite/nnacl/scale.h
mindspore/lite/nnacl/scale.h
+1
-2
mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc
mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc
+16
-28
未找到文件。
mindspore/lite/nnacl/scale.c
浏览文件 @
48883001
...
...
@@ -15,35 +15,65 @@
*/
#include "nnacl/scale.h"
#include "nnacl/errorcode.h"
int
DoScale
(
float
*
in_data
,
float
*
out_data
,
float
*
scale
,
float
*
offset
,
int
task_id
,
ScaleParameter
*
scale_param
)
{
if
(
in_data
==
NULL
||
out_data
==
NULL
||
scale
==
NULL
||
offset
==
NULL
||
scale_param
==
NULL
)
{
return
NNACL_ERR
;
#ifdef ENABLE_ARM
#include <arm_neon.h>
#endif
void
ScaleInner
(
float
*
in_data
,
float
*
out_data
,
float
*
scale
,
float
*
offset
,
int
outer_start
,
int
outer_end
,
int
axis_size
,
int
inner_size
)
{
for
(
int
out
=
outer_start
;
out
<
outer_end
;
out
++
)
{
int
out_offset
=
out
*
axis_size
*
inner_size
;
for
(
int
i
=
0
;
i
<
axis_size
;
i
++
)
{
int
axis_offset
=
out_offset
+
i
*
inner_size
;
int
in_index
=
0
;
#ifdef ENABLE_ARM
for
(;
in_index
<
inner_size
-
4
;
in_index
+=
4
)
{
int
in_offset
=
axis_offset
+
in_index
;
float32x4_t
data
=
vld1q_f32
(
in_data
+
in_offset
);
float32x4_t
scale_4
=
vdupq_n_f32
(
scale
[
i
]);
float32x4_t
offset_4
=
vdupq_n_f32
(
offset
[
i
]);
float32x4_t
reslut
=
vfmaq_f32
(
offset_4
,
data
,
scale_4
);
vst1q_f32
(
out_data
+
in_offset
,
reslut
);
}
#endif
for
(;
in_index
<
inner_size
;
in_index
++
)
{
int
in_offset
=
axis_offset
+
in_index
;
out_data
[
in_offset
]
=
in_data
[
in_offset
]
*
scale
[
i
]
+
offset
[
i
];
}
}
}
}
if
(
scale_param
->
has_offset_
)
{
for
(
int
out
=
task_id
;
out
<
scale_param
->
outer_size_
;
out
+=
scale_param
->
op_parameter_
.
thread_num_
)
{
int
out_offset
=
out
*
scale_param
->
axis_size_
*
scale_param
->
inner_size_
;
for
(
int
i
=
0
;
i
<
scale_param
->
axis_size_
;
i
++
)
{
int
axis_offset
=
out_offset
+
i
*
scale_param
->
inner_size_
;
for
(
int
in
=
0
;
in
<
scale_param
->
inner_size_
;
in
++
)
{
int
in_offset
=
axis_offset
+
in
;
out_data
[
in_offset
]
=
in_data
[
in_offset
]
*
scale
[
i
]
+
offset
[
i
];
}
}
void
ScaleAxis
(
float
*
in_data
,
float
*
out_data
,
float
*
scale
,
float
*
offset
,
int
outer_start
,
int
outer_end
,
int
axis_size
)
{
for
(
int
out
=
outer_start
;
out
<
outer_end
;
out
++
)
{
int
out_offset
=
out
*
axis_size
;
int
index
=
0
;
#ifdef ENABLE_ARM
for
(;
index
<
axis_size
-
4
;
index
+=
4
)
{
int
in_offset
=
out_offset
+
index
;
float32x4_t
data
=
vld1q_f32
(
in_data
+
in_offset
);
float32x4_t
scale_4
=
vld1q_f32
(
scale
+
index
);
float32x4_t
offset_4
=
vld1q_f32
(
offset
+
index
);
float32x4_t
reslut
=
vfmaq_f32
(
offset_4
,
data
,
scale_4
);
vst1q_f32
(
out_data
+
in_offset
,
reslut
);
}
}
else
{
for
(
int
out
=
task_id
;
out
<
scale_param
->
outer_size_
;
out
+=
scale_param
->
op_parameter_
.
thread_num_
)
{
int
out_offset
=
out
*
scale_param
->
axis_size_
*
scale_param
->
inner_size_
;
for
(
int
i
=
0
;
i
<
scale_param
->
axis_size_
;
i
++
)
{
int
axis_offset
=
out_offset
+
i
*
scale_param
->
inner_size_
;
for
(
int
in
=
0
;
in
<
scale_param
->
inner_size_
;
in
++
)
{
int
in_offset
=
axis_offset
+
in
;
out_data
[
in_offset
]
=
in_data
[
in_offset
]
*
scale
[
i
];
}
}
#endif
for
(;
index
<
axis_size
;
index
++
)
{
int
in_offset
=
out_offset
+
index
;
out_data
[
in_offset
]
=
in_data
[
in_offset
]
*
scale
[
index
]
+
offset
[
index
];
}
}
return
NNACL_OK
;
}
void
DoScale
(
float
*
in_data
,
float
*
out_data
,
float
*
scale
,
float
*
offset
,
int
task_id
,
ScaleParameter
*
scale_param
)
{
int
outer_step
=
UP_DIV
(
scale_param
->
outer_size_
,
scale_param
->
op_parameter_
.
thread_num_
);
int
outer_start
=
task_id
*
outer_step
;
int
outer_end
=
MSMIN
(
outer_start
+
outer_step
,
scale_param
->
outer_size_
);
if
(
scale_param
->
inner_size_
==
1
)
{
ScaleAxis
(
in_data
,
out_data
,
scale
,
offset
,
outer_start
,
outer_end
,
scale_param
->
axis_size_
);
}
else
{
ScaleInner
(
in_data
,
out_data
,
scale
,
offset
,
outer_start
,
outer_end
,
scale_param
->
axis_size_
,
scale_param
->
inner_size_
);
}
}
This diff is collapsed.
Click to expand it.
mindspore/lite/nnacl/scale.h
浏览文件 @
48883001
...
...
@@ -26,13 +26,12 @@ typedef struct ScaleParameter {
int
inner_size_
;
int
axis_
;
bool
const_scale_
;
bool
has_offset_
;
}
ScaleParameter
;
#ifdef __cplusplus
extern
"C"
{
#endif
int
DoScale
(
float
*
in_data
,
float
*
out_data
,
float
*
scale
,
float
*
offset
,
int
task_id
,
ScaleParameter
*
scale_param
);
void
DoScale
(
float
*
in_data
,
float
*
out_data
,
float
*
scale
,
float
*
offset
,
int
task_id
,
ScaleParameter
*
scale_param
);
#ifdef __cplusplus
}
#endif
...
...
This diff is collapsed.
Click to expand it.
mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc
浏览文件 @
48883001
...
...
@@ -35,11 +35,9 @@ ScaleCPUKernel::~ScaleCPUKernel() {
scale_
=
nullptr
;
}
}
if
(
scale_param_
->
has_offset_
)
{
if
(
offset_
!=
nullptr
)
{
free
(
offset_
);
offset_
=
nullptr
;
}
if
(
offset_
!=
nullptr
)
{
free
(
offset_
);
offset_
=
nullptr
;
}
}
...
...
@@ -59,18 +57,15 @@ int ScaleCPUKernel::InitScaleOffset() {
scale_
=
nullptr
;
}
offset_
=
reinterpret_cast
<
float
*>
(
malloc
(
scale_param_
->
axis_size_
*
sizeof
(
float
)));
if
(
offset_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Malloc buffer failed."
;
return
RET_ERROR
;
}
memset
(
offset_
,
0
,
scale_param_
->
axis_size_
*
sizeof
(
float
));
if
(
in_tensors_
.
size
()
==
3
)
{
auto
offset_tensor
=
in_tensors_
.
at
(
2
);
offset_
=
reinterpret_cast
<
float
*>
(
malloc
(
offset_tensor
->
ElementsNum
()
*
sizeof
(
float
)));
if
(
offset_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Malloc buffer failed."
;
return
RET_ERROR
;
}
memcpy
(
offset_
,
offset_tensor
->
Data
(),
offset_tensor
->
ElementsNum
()
*
sizeof
(
float
));
scale_param_
->
has_offset_
=
true
;
}
else
{
offset_
=
nullptr
;
scale_param_
->
has_offset_
=
false
;
}
return
RET_OK
;
}
...
...
@@ -101,6 +96,7 @@ int ScaleCPUKernel::InitParameter() {
for
(
size_t
i
=
scale_param_
->
axis_
+
scale_shape
.
size
();
i
<
in_shape
.
size
();
i
++
)
{
scale_param_
->
inner_size_
*=
in_shape
[
i
];
}
scale_param_
->
op_parameter_
.
thread_num_
=
MSMIN
(
scale_param_
->
op_parameter_
.
thread_num_
,
scale_param_
->
outer_size_
);
return
RET_OK
;
}
...
...
@@ -114,6 +110,11 @@ int ScaleCPUKernel::Init() {
return
RET_OK
;
}
ReSize
();
return
RET_OK
;
}
int
ScaleCPUKernel
::
ReSize
()
{
auto
ret
=
InitParameter
();
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Scale fp32 InitParameter failed."
;
...
...
@@ -128,21 +129,8 @@ int ScaleCPUKernel::Init() {
return
RET_OK
;
}
int
ScaleCPUKernel
::
ReSize
()
{
auto
ret
=
InitParameter
();
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Scale fp32 InitParameter failed."
;
return
RET_ERROR
;
}
return
RET_OK
;
}
int
ScaleCPUKernel
::
Scale
(
int
task_id
)
{
auto
ret
=
DoScale
(
input_ptr_
,
output_ptr_
,
scale_
,
offset_
,
task_id
,
scale_param_
);
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Scale error task_id["
<<
task_id
<<
"] error_code["
<<
ret
<<
"]"
;
return
RET_ERROR
;
}
DoScale
(
input_ptr_
,
output_ptr_
,
scale_
,
offset_
,
task_id
,
scale_param_
);
return
RET_OK
;
}
...
...
This diff is collapsed.
Click to expand it.
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录
新手
引导
客服
返回
顶部