Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
dc13718c
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
dc13718c
编写于
8月 23, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 23, 2020
浏览文件
操作
浏览文件
下载
差异文件
!4974 [MS][LITE][Develop]fp16 conv1x1 bug
Merge pull request !4974 from ling/sr
上级
7dbe9f70
0fac817a
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
59 addition
and
27 deletion
+59
-27
mindspore/lite/nnacl/fp16/matmul_fp16.c
mindspore/lite/nnacl/fp16/matmul_fp16.c
+20
-6
mindspore/lite/nnacl/fp16/matmul_fp16.h
mindspore/lite/nnacl/fp16/matmul_fp16.h
+1
-1
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc
.../lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc
+25
-13
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h
...e/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h
+2
-4
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc
...lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc
+9
-0
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h
.../lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h
+1
-1
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
...pore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
+1
-2
未找到文件。
mindspore/lite/nnacl/fp16/matmul_fp16.c
浏览文件 @
dc13718c
...
...
@@ -15,14 +15,28 @@
*/
#include "nnacl/fp16/matmul_fp16.h"
void
ColMajor2Row8MajorFp16
(
float16_t
*
src_ptr
,
float16_t
*
dst_ptr
,
size_t
row
,
size_t
col
)
{
for
(
int
r
=
0
;
r
<
row
;
r
++
)
{
for
(
int
c
=
0
;
c
<
col
;
c
++
)
{
int
cd8
=
c
/
8
;
int
cm8
=
c
%
8
;
dst_ptr
[
cd8
*
8
*
row
+
r
*
8
+
cm8
]
=
src_ptr
[
c
*
row
+
r
];
void
ColMajor2Row8MajorFp16
(
void
*
src_ptr
,
float16_t
*
dst_ptr
,
size_t
row
,
size_t
col
,
bool
src_float16
)
{
if
(
src_float16
)
{
float16_t
*
src
=
(
float16_t
*
)
src_ptr
;
for
(
int
r
=
0
;
r
<
row
;
r
++
)
{
for
(
int
c
=
0
;
c
<
col
;
c
++
)
{
int
cd8
=
c
/
8
;
int
cm8
=
c
%
8
;
dst_ptr
[
cd8
*
8
*
row
+
r
*
8
+
cm8
]
=
(
float16_t
)(
src
[
c
*
row
+
r
]);
}
}
}
else
{
float
*
src
=
(
float
*
)
src_ptr
;
for
(
int
r
=
0
;
r
<
row
;
r
++
)
{
for
(
int
c
=
0
;
c
<
col
;
c
++
)
{
int
cd8
=
c
/
8
;
int
cm8
=
c
%
8
;
dst_ptr
[
cd8
*
8
*
row
+
r
*
8
+
cm8
]
=
(
float16_t
)(
src
[
c
*
row
+
r
]);
}
}
}
return
;
}
void
MatMul16x8
(
const
float16_t
*
a
,
const
float16_t
*
b
,
float16_t
*
dst
,
const
float16_t
*
bias
,
ActType
act_type
,
...
...
mindspore/lite/nnacl/fp16/matmul_fp16.h
浏览文件 @
dc13718c
...
...
@@ -32,7 +32,7 @@ extern "C" {
void
MatMulFp16
(
const
float16_t
*
a
,
const
float16_t
*
b
,
float16_t
*
c
,
const
float16_t
*
bias
,
ActType
act_type
,
int
depth
,
int
row
,
int
col
,
int
stride
,
bool
write_nhwc
);
void
ColMajor2Row8MajorFp16
(
float16_t
*
src_ptr
,
float16_t
*
dst_ptr
,
size_t
row
,
size_t
col
);
void
ColMajor2Row8MajorFp16
(
void
*
src_ptr
,
float16_t
*
dst_ptr
,
size_t
row
,
size_t
col
,
bool
src_float16
);
void
RowMajor2Col16MajorFp16
(
float16_t
*
src_ptr
,
float16_t
*
dst_ptr
,
size_t
row
,
size_t
col
);
...
...
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc
浏览文件 @
dc13718c
...
...
@@ -74,31 +74,36 @@ int Convolution1x1FP16CPUKernel::InitConv1x1Param() {
}
int
Convolution1x1FP16CPUKernel
::
InitWeightBias
()
{
auto
ret
=
ConvolutionBaseFP16CPUKernel
::
GetExecuteFilter
();
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Get Execute filter failed."
;
return
ret
;
}
auto
bias_tensor
=
in_tensors_
.
at
(
kBiasIndex
);
auto
weight_tensor
=
in_tensors_
.
at
(
kWeightIndex
);
auto
input_channel
=
weight_tensor
->
Channel
();
auto
output_channel
=
weight_tensor
->
Batch
();
bias_data_
=
malloc
(
matmul_param_
->
col_8_
*
sizeof
(
float16_t
));
size_t
size
=
UP_ROUND
(
output_channel
,
C8NUM
)
*
sizeof
(
float16_t
);
bias_data_
=
malloc
(
size
);
if
(
bias_data_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Conv1x1 Malloc bias_ptr_ error!"
;
return
RET_ERROR
;
}
memset
(
bias_data_
,
0
,
matmul_param_
->
col_8_
*
sizeof
(
float16_t
)
);
memset
(
bias_data_
,
0
,
size
);
if
(
in_tensors_
.
size
()
==
3
)
{
Float32ToFloat16
(
reinterpret_cast
<
float
*>
(
in_tensors_
[
2
]
->
Data
()),
reinterpret_cast
<
float16_t
*>
(
bias_data_
),
conv_param_
->
output_channel_
);
if
(
bias_tensor
->
data_type
()
==
kNumberTypeFloat16
)
{
memcpy
(
bias_data_
,
bias_tensor
->
Data
(),
output_channel
*
sizeof
(
float16_t
));
}
else
{
Float32ToFloat16
(
reinterpret_cast
<
float
*>
(
bias_tensor
->
Data
()),
reinterpret_cast
<
float16_t
*>
(
bias_data_
),
output_channel
);
}
}
weight_ptr_
=
reinterpret_cast
<
float16_t
*>
(
malloc
(
matmul_param_
->
deep_
*
matmul_param_
->
col_8_
*
sizeof
(
float16_t
)));
size
=
input_channel
*
UP_ROUND
(
output_channel
,
C8NUM
)
*
sizeof
(
float16_t
);
weight_ptr_
=
reinterpret_cast
<
float16_t
*>
(
malloc
(
size
));
if
(
weight_ptr_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Conv1x1 Malloc weight_ptr_ error!"
;
return
RET_ERROR
;
}
memset
(
weight_ptr_
,
0
,
matmul_param_
->
deep_
*
matmul_param_
->
col_8_
*
sizeof
(
float16_t
)
);
ColMajor2Row8MajorFp16
(
reinterpret_cast
<
float16_t
*>
(
execute_weight_
),
weight_ptr_
,
matmul_param_
->
deep_
,
matmul_param_
->
col_
);
memset
(
weight_ptr_
,
0
,
size
);
ColMajor2Row8MajorFp16
(
weight_tensor
->
Data
(),
weight_ptr_
,
input_channel
,
output_channel
,
weight_tensor
->
data_type
()
==
kNumberTypeFloat16
);
return
RET_OK
;
}
...
...
@@ -106,6 +111,13 @@ int Convolution1x1FP16CPUKernel::Init() {
if
(
!
InferShapeDone
())
{
return
RET_OK
;
}
matmul_param_
=
new
(
std
::
nothrow
)
MatMulParameter
();
if
(
matmul_param_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Init matmul_param_ failed."
;
return
RET_ERROR
;
}
int
ret
=
InitWeightBias
();
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Init weight bias failed."
;
...
...
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h
浏览文件 @
dc13718c
...
...
@@ -31,9 +31,7 @@ class Convolution1x1FP16CPUKernel : public ConvolutionBaseFP16CPUKernel {
Convolution1x1FP16CPUKernel
(
OpParameter
*
parameter
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
,
const
Context
*
ctx
,
const
mindspore
::
lite
::
PrimitiveC
*
primitive
)
:
ConvolutionBaseFP16CPUKernel
(
parameter
,
inputs
,
outputs
,
ctx
,
primitive
)
{
matmul_param_
=
new
MatMulParameter
();
}
:
ConvolutionBaseFP16CPUKernel
(
parameter
,
inputs
,
outputs
,
ctx
,
primitive
)
{}
~
Convolution1x1FP16CPUKernel
()
override
;
int
Init
()
override
;
...
...
@@ -50,7 +48,7 @@ class Convolution1x1FP16CPUKernel : public ConvolutionBaseFP16CPUKernel {
private:
bool
pre_trans_input_
=
false
;
int
thread_count_
=
0
;
int
thread_count_
=
1
;
int
thread_stride_
=
0
;
float16_t
*
weight_ptr_
=
nullptr
;
float16_t
*
input_ptr_
=
nullptr
;
...
...
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc
浏览文件 @
dc13718c
...
...
@@ -23,6 +23,14 @@
#include "src/runtime/runtime_api.h"
namespace
mindspore
::
kernel
{
ConvolutionBaseFP16CPUKernel
::~
ConvolutionBaseFP16CPUKernel
()
{
if
(
fp16_weight_
!=
nullptr
)
{
free
(
fp16_weight_
);
fp16_weight_
=
nullptr
;
}
}
int
ConvolutionBaseFP16CPUKernel
::
GetExecuteTensor
()
{
// ===================input====================//
auto
input_tensor
=
in_tensors_
.
at
(
kInputIndex
);
...
...
@@ -65,6 +73,7 @@ int ConvolutionBaseFP16CPUKernel::GetExecuteFilter() {
}
else
{
auto
*
origin_weight
=
reinterpret_cast
<
float16_t
*>
(
in_tensors_
.
at
(
kWeightIndex
)
->
Data
());
execute_weight_
=
origin_weight
;
fp16_weight_
=
nullptr
;
}
return
RET_OK
;
}
...
...
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h
浏览文件 @
dc13718c
...
...
@@ -30,7 +30,7 @@ class ConvolutionBaseFP16CPUKernel : public ConvolutionBaseCPUKernel {
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
,
const
Context
*
ctx
,
const
mindspore
::
lite
::
PrimitiveC
*
primitive
)
:
ConvolutionBaseCPUKernel
(
parameter
,
inputs
,
outputs
,
ctx
,
primitive
)
{}
~
ConvolutionBaseFP16CPUKernel
()
override
=
default
;
~
ConvolutionBaseFP16CPUKernel
()
override
;
int
Init
()
override
{
return
RET_OK
;
}
int
ReSize
()
override
{
return
RET_OK
;
}
...
...
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
浏览文件 @
dc13718c
...
...
@@ -244,8 +244,7 @@ kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::tensor::Ten
if
(
kernel_h
==
3
&&
kernel_w
==
3
&&
stride_h
==
1
&&
stride_w
==
1
&&
dilation_h
==
1
&&
dilation_w
==
1
)
{
kernel
=
new
(
std
::
nothrow
)
kernel
::
Convolution3x3FP16CPUKernel
(
opParameter
,
inputs
,
outputs
,
ctx
,
primitive
);
}
else
if
(
kernel_h
==
1
&&
kernel_w
==
1
)
{
// kernel = new (std::nothrow) kernel::Convolution1x1FP16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
kernel
=
new
(
std
::
nothrow
)
kernel
::
ConvolutionFP16CPUKernel
(
opParameter
,
inputs
,
outputs
,
ctx
,
primitive
);
kernel
=
new
(
std
::
nothrow
)
kernel
::
Convolution1x1FP16CPUKernel
(
opParameter
,
inputs
,
outputs
,
ctx
,
primitive
);
}
else
{
bool
use_winograd
=
false
;
int
out_unit
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录