Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
816ed8d8
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
816ed8d8
编写于
8月 24, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 24, 2020
浏览文件
操作
浏览文件
下载
差异文件
!5010 [MS][LITE][Develop]enable batchnorm fp16
Merge pull request !5010 from sunsuodong/batch_norm_fp16
上级
3162b125
77287093
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
74 addition
and
35 deletion
+74
-35
mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc
mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc
+65
-34
mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.h
mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.h
+9
-1
未找到文件。
mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc
浏览文件 @
816ed8d8
...
...
@@ -23,44 +23,75 @@ using mindspore::lite::KernelRegistrar;
using
mindspore
::
schema
::
PrimitiveType_BatchNorm
;
namespace
mindspore
::
kernel
{
int
BatchnormFp16CPUKernel
::
DoExecute
(
int
task_id
)
{
auto
param
=
reinterpret_cast
<
BatchNormParameter
*>
(
op_parameter_
);
if
(
in_tensors_
.
at
(
0
)
->
data_type
()
==
kNumberTypeFloat32
)
{
auto
input
=
in_tensors_
.
at
(
0
);
auto
mean
=
in_tensors_
.
at
(
1
);
auto
variance
=
in_tensors_
.
at
(
2
);
auto
output
=
out_tensors_
.
at
(
0
);
int
BatchnormFp16CPUKernel
::
InitConstTensor
()
{
isFloat32Tensor_
=
in_tensors_
.
at
(
0
)
->
data_type
()
==
kNumberTypeFloat32
;
if
(
isFloat32Tensor_
)
{
auto
mean_fp32
=
in_tensors_
.
at
(
1
);
auto
variance_fp32
=
in_tensors_
.
at
(
2
);
mean_
=
malloc
(
mean_fp32
->
ElementsNum
()
*
sizeof
(
float16_t
));
variance_
=
malloc
(
variance_fp32
->
ElementsNum
()
*
sizeof
(
float16_t
));
if
(
mean_
==
nullptr
||
variance_
==
nullptr
)
{
FreeMeanAndVariance
();
return
RET_ERROR
;
}
Float32ToFloat16
(
reinterpret_cast
<
float
*>
(
mean_fp32
->
Data
()),
reinterpret_cast
<
float16_t
*>
(
mean_
),
mean_fp32
->
ElementsNum
());
Float32ToFloat16
(
reinterpret_cast
<
float
*>
(
variance_fp32
->
Data
()),
reinterpret_cast
<
float16_t
*>
(
variance_
),
variance_fp32
->
ElementsNum
());
}
else
{
BatchnormCPUKernel
::
InitConstTensor
();
}
return
RET_OK
;
}
auto
input_fp16
=
context_
->
allocator
->
Malloc
(
input
->
ElementsNum
()
*
sizeof
(
float16_t
));
auto
mean_fp16
=
context_
->
allocator
->
Malloc
(
mean
->
ElementsNum
()
*
sizeof
(
float16_t
));
auto
variance_fp16
=
context_
->
allocator
->
Malloc
(
variance
->
ElementsNum
()
*
sizeof
(
float16_t
));
auto
output_fp16
=
context_
->
allocator
->
Malloc
(
output
->
ElementsNum
()
*
sizeof
(
float16_t
));
if
(
input_fp16
==
nullptr
||
mean_fp16
==
nullptr
||
variance_fp16
==
nullptr
||
output_fp16
==
nullptr
)
{
context_
->
allocator
->
Free
(
input_fp16
);
context_
->
allocator
->
Free
(
mean_fp16
);
context_
->
allocator
->
Free
(
variance_fp16
);
context_
->
allocator
->
Free
(
output_fp16
);
int
BatchnormFp16CPUKernel
::
Run
()
{
auto
ret
=
Prepare
();
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Prepare fail! Ret error code: "
<<
ret
;
return
ret
;
}
auto
input_fp32
=
in_tensors_
.
at
(
0
);
auto
output_fp32
=
out_tensors_
.
at
(
0
);
if
(
isFloat32Tensor_
)
{
input_
=
context_
->
allocator
->
Malloc
(
input_fp32
->
ElementsNum
()
*
sizeof
(
float16_t
));
output_
=
context_
->
allocator
->
Malloc
(
output_fp32
->
ElementsNum
()
*
sizeof
(
float16_t
));
if
(
input_
==
nullptr
||
output_
==
nullptr
)
{
FreeInputAndOutput
();
return
RET_ERROR
;
}
Float32ToFloat16
(
reinterpret_cast
<
float
*>
(
input
->
Data
()),
reinterpret_cast
<
float16_t
*>
(
input_fp16
),
input
->
ElementsNum
());
Float32ToFloat16
(
reinterpret_cast
<
float
*>
(
mean
->
Data
()),
reinterpret_cast
<
float16_t
*>
(
mean_fp16
),
mean
->
ElementsNum
());
Float32ToFloat16
(
reinterpret_cast
<
float
*>
(
variance
->
Data
()),
reinterpret_cast
<
float16_t
*>
(
variance_fp16
),
variance
->
ElementsNum
());
Float32ToFloat16
(
reinterpret_cast
<
float
*>
(
input_fp32
->
Data
()),
reinterpret_cast
<
float16_t
*>
(
input_
),
input_fp32
->
ElementsNum
());
}
else
{
input_
=
in_tensors_
.
at
(
0
)
->
Data
();
output_
=
out_tensors_
.
at
(
0
)
->
Data
();
}
ret
=
LiteBackendParallelLaunch
(
BatchNormRun
,
this
,
op_parameter_
->
thread_num_
);
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"BatchnormRun error error_code["
<<
ret
<<
"]"
;
}
if
(
isFloat32Tensor_
)
{
Float16ToFloat32
(
reinterpret_cast
<
float16_t
*>
(
output_
),
reinterpret_cast
<
float
*>
(
output_fp32
->
Data
()),
output_fp32
->
ElementsNum
());
FreeInputAndOutput
();
}
return
ret
;
}
BatchNormFp16
(
input_fp16
,
mean_fp16
,
variance_fp16
,
param
,
task_id
,
output_fp16
);
int
BatchnormFp16CPUKernel
::
DoExecute
(
int
task_id
)
{
auto
param
=
reinterpret_cast
<
BatchNormParameter
*>
(
op_parameter_
);
BatchNormFp16
(
input_
,
mean_
,
variance_
,
param
,
task_id
,
output_
);
return
mindspore
::
lite
::
RET_OK
;
}
Float16ToFloat32
(
reinterpret_cast
<
float16_t
*>
(
output_fp16
),
reinterpret_cast
<
float
*>
(
output
),
output
->
ElementsNum
());
context_
->
allocator
->
Free
(
input_fp16
);
context_
->
allocator
->
Free
(
mean_fp16
);
context_
->
allocator
->
Free
(
variance_fp16
);
context_
->
allocator
->
Free
(
output_fp16
);
return
mindspore
::
lite
::
RET_OK
;
void
BatchnormFp16CPUKernel
::
FreeInputAndOutput
()
{
if
(
input_
!=
nullptr
)
{
context_
->
allocator
->
Free
(
input_
);
input_
=
nullptr
;
}
if
(
output_
!=
nullptr
)
{
context_
->
allocator
->
Free
(
output_
);
output_
=
nullptr
;
}
BatchNormFp16
(
in_tensors_
.
at
(
0
)
->
Data
(),
mean_
,
variance_
,
param
,
task_id
,
out_tensors_
.
at
(
0
)
->
Data
());
return
mindspore
::
lite
::
RET_OK
;
}
kernel
::
LiteKernel
*
CpuBatchnormFp16KernelCreator
(
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
...
...
@@ -83,5 +114,5 @@ kernel::LiteKernel *CpuBatchnormFp16KernelCreator(const std::vector<lite::tensor
return
kernel
;
}
//
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_BatchNorm, CpuBatchnormFp16KernelCreator)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat16
,
PrimitiveType_BatchNorm
,
CpuBatchnormFp16KernelCreator
)
}
// namespace mindspore::kernel
mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.h
浏览文件 @
816ed8d8
...
...
@@ -29,7 +29,15 @@ class BatchnormFp16CPUKernel : public BatchnormCPUKernel {
:
BatchnormCPUKernel
(
parameter
,
inputs
,
outputs
,
ctx
,
primitive
)
{}
virtual
~
BatchnormFp16CPUKernel
()
{}
virtual
int
DoExecute
(
int
task_id
);
int
Run
()
override
;
int
InitConstTensor
()
override
;
int
DoExecute
(
int
task_id
)
override
;
private:
void
FreeInputAndOutput
();
bool
isFloat32Tensor_
=
false
;
void
*
input_
=
nullptr
;
void
*
output_
=
nullptr
;
};
}
// namespace mindspore::kernel
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录