Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
d9ccbb32
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d9ccbb32
编写于
8月 28, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 28, 2020
浏览文件
操作
浏览文件
下载
差异文件
!5361 [MS][LITE][Develop]fix batchnorm output fp16
Merge pull request !5361 from sunsuodong/fix_batchnorm_output_fp16
上级
d2bd1536
4f70467d
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
37 addition
and
35 deletion
+37
-35
mindspore/lite/nnacl/fp16/batchnorm_fp16.c
mindspore/lite/nnacl/fp16/batchnorm_fp16.c
+11
-6
mindspore/lite/nnacl/fp16/batchnorm_fp16.h
mindspore/lite/nnacl/fp16/batchnorm_fp16.h
+2
-2
mindspore/lite/nnacl/fp32/batchnorm.c
mindspore/lite/nnacl/fp32/batchnorm.c
+0
-1
mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc
mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc
+18
-22
mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.h
mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.h
+4
-3
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/topk_fp32_tests.cc
...te/test/ut/src/runtime/kernel/arm/fp32/topk_fp32_tests.cc
+2
-1
未找到文件。
mindspore/lite/nnacl/fp16/batchnorm_fp16.c
浏览文件 @
d9ccbb32
...
...
@@ -17,8 +17,8 @@
#include "nnacl/fp16/batchnorm_fp16.h"
#include <math.h>
void
BatchNormFp16
(
const
void
*
input
,
const
void
*
mean
,
const
void
*
variance
,
BatchNormParameter
*
param
,
int
task_id
,
void
*
output
)
{
void
BatchNormFp16
(
const
float16_t
*
input
,
const
void
*
mean
,
const
void
*
variance
,
BatchNormParameter
*
param
,
int
task_id
,
float16_t
*
output
)
{
int
units_per_thread
=
UP_DIV
(
param
->
unit_
,
param
->
op_parameter_
.
thread_num_
);
int
completed_units
=
task_id
*
units_per_thread
;
int
cur_unit
=
MSMIN
(
units_per_thread
,
param
->
unit_
-
completed_units
);
...
...
@@ -27,8 +27,9 @@ void BatchNormFp16(const void *input, const void *mean, const void *variance,
for
(
int
i
=
0
;
i
<
cur_unit
;
i
++
)
{
for
(
int
c
=
0
;
c
<
param
->
channel_
;
c
++
)
{
float16_t
variance_sqrt
=
sqrt
(((
const
float16_t
*
)
variance
)[
c
]
+
param
->
epsilon_
);
((
float16_t
*
)
output
)[
cur_offset
+
c
]
=
(((
const
float16_t
*
)
input
)[
cur_offset
+
c
]
-
((
const
float16_t
*
)
mean
)[
c
])
/
variance_sqrt
;
if
(
variance_sqrt
!=
0
)
{
output
[
cur_offset
+
c
]
=
(
input
[
cur_offset
+
c
]
-
((
const
float16_t
*
)
mean
)[
c
])
/
variance_sqrt
;
}
}
cur_offset
+=
param
->
channel_
;
}
...
...
@@ -44,8 +45,12 @@ void FusedBatchNormFp16(const void *input, const void *scale, const void *offset
for
(
int
i
=
0
;
i
<
cur_unit
;
i
++
)
{
for
(
int
c
=
0
;
c
<
param
->
channel_
;
c
++
)
{
float16_t
variance_sqrt
=
sqrt
(((
const
float16_t
*
)
variance
)[
c
]
+
param
->
epsilon_
);
float16_t
norm_val
=
(((
const
float16_t
*
)
input
)[
cur_offset
+
c
]
-
((
const
float16_t
*
)
mean
)[
c
])
/
variance_sqrt
;
((
float16_t
*
)
output
)[
cur_offset
+
c
]
=
norm_val
*
((
const
float16_t
*
)
scale
)[
c
]
+
((
const
float16_t
*
)
offset
)[
c
];
if
(
variance_sqrt
!=
0
)
{
float16_t
norm_val
=
(((
const
float16_t
*
)
input
)[
cur_offset
+
c
]
-
((
const
float16_t
*
)
mean
)[
c
])
/
variance_sqrt
;
((
float16_t
*
)
output
)[
cur_offset
+
c
]
=
norm_val
*
((
const
float16_t
*
)
scale
)[
c
]
+
((
const
float16_t
*
)
offset
)[
c
];
}
}
cur_offset
+=
param
->
channel_
;
}
...
...
mindspore/lite/nnacl/fp16/batchnorm_fp16.h
浏览文件 @
d9ccbb32
...
...
@@ -25,8 +25,8 @@
extern
"C"
{
#endif
void
BatchNormFp16
(
const
void
*
input
,
const
void
*
mean
,
const
void
*
variance
,
BatchNormParameter
*
param
,
int
task_id
,
void
*
output
);
void
BatchNormFp16
(
const
float16_t
*
input
,
const
void
*
mean
,
const
void
*
variance
,
BatchNormParameter
*
param
,
int
task_id
,
float16_t
*
output
);
void
FusedBatchNormFp16
(
const
void
*
input
,
const
void
*
scale
,
const
void
*
offset
,
const
void
*
mean
,
const
void
*
variance
,
BatchNormParameter
*
param
,
int
task_id
,
void
*
output
);
...
...
mindspore/lite/nnacl/fp32/batchnorm.c
浏览文件 @
d9ccbb32
...
...
@@ -15,7 +15,6 @@
*/
#include "nnacl/fp32/batchnorm.h"
#include "nnacl/fp16/batchnorm_fp16.h"
#include <math.h>
#include "nnacl/batchnorm_parameter.h"
#include "nnacl/op_base.h"
...
...
mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc
浏览文件 @
d9ccbb32
...
...
@@ -15,6 +15,7 @@
*/
#include "src/runtime/kernel/arm/fp16/batchnorm_fp16.h"
#include "src/runtime/kernel/arm/fp16/common_fp16.h"
#include "nnacl/fp16/batchnorm_fp16.h"
#include "nnacl/fp16/cast_fp16.h"
#include "src/kernel_registry.h"
...
...
@@ -24,8 +25,9 @@ using mindspore::schema::PrimitiveType_BatchNorm;
namespace
mindspore
::
kernel
{
int
BatchnormFp16CPUKernel
::
InitConstTensor
()
{
isFloat32Tensor_
=
in_tensors_
.
at
(
0
)
->
data_type
()
==
kNumberTypeFloat32
;
if
(
isFloat32Tensor_
)
{
is_input_fp32_
=
in_tensors_
.
at
(
0
)
->
data_type
()
==
kNumberTypeFloat32
;
is_output_fp32_
=
out_tensors_
.
at
(
0
)
->
data_type
()
==
kNumberTypeFloat32
;
if
(
is_input_fp32_
)
{
auto
mean_fp32
=
in_tensors_
.
at
(
1
);
auto
variance_fp32
=
in_tensors_
.
at
(
2
);
mean_
=
malloc
(
mean_fp32
->
ElementsNum
()
*
sizeof
(
float16_t
));
...
...
@@ -50,30 +52,24 @@ int BatchnormFp16CPUKernel::Run() {
MS_LOG
(
ERROR
)
<<
"Prepare fail! Ret error code: "
<<
ret
;
return
ret
;
}
auto
input_fp32
=
in_tensors_
.
at
(
0
);
auto
output_fp32
=
out_tensors_
.
at
(
0
);
if
(
isFloat32Tensor_
)
{
input_
=
context_
->
allocator
->
Malloc
(
input_fp32
->
ElementsNum
()
*
sizeof
(
float16_t
));
output_
=
context_
->
allocator
->
Malloc
(
output_fp32
->
ElementsNum
()
*
sizeof
(
float16_t
));
if
(
input_
==
nullptr
||
output_
==
nullptr
)
{
FreeInputAndOutput
();
return
RET_ERROR
;
}
Float32ToFloat16
(
reinterpret_cast
<
float
*>
(
input_fp32
->
Data
()),
reinterpret_cast
<
float16_t
*>
(
input_
),
input_fp32
->
ElementsNum
());
}
else
{
input_
=
in_tensors_
.
at
(
0
)
->
Data
();
output_
=
out_tensors_
.
at
(
0
)
->
Data
();
auto
input_tensor
=
in_tensors_
.
at
(
0
);
auto
output_tensor
=
out_tensors_
.
at
(
0
);
input_
=
ConvertInputFp32toFp16
(
input_tensor
,
context_
);
output_
=
MallocOutputFp16
(
output_tensor
,
context_
);
if
(
input_
==
nullptr
||
output_
==
nullptr
)
{
FreeInputAndOutput
();
MS_LOG
(
ERROR
)
<<
"input or output is nullptr"
;
return
RET_ERROR
;
}
ret
=
ParallelLaunch
(
THREAD_POOL_DEFAULT
,
BatchNormRun
,
this
,
op_parameter_
->
thread_num_
);
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"BatchnormRun error error_code["
<<
ret
<<
"]"
;
}
if
(
isFloat32Tensor_
)
{
Float16ToFloat32
(
reinterpret_cast
<
float16_t
*>
(
output_
),
reinterpret_cast
<
float
*>
(
output_fp32
->
Data
()),
output_fp32
->
ElementsNum
());
FreeInputAndOutput
();
if
(
is_output_fp32_
)
{
Float16ToFloat32
(
output_
,
reinterpret_cast
<
float
*>
(
output_tensor
->
Data
()),
output_tensor
->
ElementsNum
());
}
FreeInputAndOutput
();
return
ret
;
}
...
...
@@ -84,11 +80,11 @@ int BatchnormFp16CPUKernel::DoExecute(int task_id) {
}
void
BatchnormFp16CPUKernel
::
FreeInputAndOutput
()
{
if
(
i
nput_
!=
nullptr
)
{
if
(
i
s_input_fp32_
)
{
context_
->
allocator
->
Free
(
input_
);
input_
=
nullptr
;
}
if
(
output_
!=
nullptr
)
{
if
(
is_output_fp32_
)
{
context_
->
allocator
->
Free
(
output_
);
output_
=
nullptr
;
}
...
...
mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.h
浏览文件 @
d9ccbb32
...
...
@@ -35,9 +35,10 @@ class BatchnormFp16CPUKernel : public BatchnormCPUKernel {
private:
void
FreeInputAndOutput
();
bool
isFloat32Tensor_
=
false
;
void
*
input_
=
nullptr
;
void
*
output_
=
nullptr
;
bool
is_input_fp32_
=
false
;
bool
is_output_fp32_
=
false
;
float16_t
*
input_
=
nullptr
;
float16_t
*
output_
=
nullptr
;
};
}
// namespace mindspore::kernel
...
...
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/topk_fp32_tests.cc
浏览文件 @
d9ccbb32
...
...
@@ -45,7 +45,8 @@ TEST_F(TestTopKFp32, TopK) {
auto
creator
=
lite
::
KernelRegistry
::
GetInstance
()
->
GetCreator
(
desc
);
ASSERT_NE
(
creator
,
nullptr
);
auto
kernel
=
creator
(
inputs
,
outputs
,
reinterpret_cast
<
OpParameter
*>
(
&
parameter
),
nullptr
,
desc
,
nullptr
);
auto
ctx
=
std
::
make_shared
<
lite
::
Context
>
();
auto
kernel
=
creator
(
inputs
,
outputs
,
reinterpret_cast
<
OpParameter
*>
(
&
parameter
),
ctx
.
get
(),
desc
,
nullptr
);
ASSERT_NE
(
kernel
,
nullptr
);
auto
ret
=
kernel
->
Run
();
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录