Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
正统之独孤求败
mindspore
提交
018c0d40
M
mindspore
项目概览
正统之独孤求败
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
018c0d40
编写于
8月 24, 2020
作者:
L
ling
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[MS][LITE][Develop] fp16 1x1 bug && matmul bug
上级
33a562de
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
81 addition
and
81 deletion
+81
-81
mindspore/lite/src/ops/deconv2d.cc
mindspore/lite/src/ops/deconv2d.cc
+14
-0
mindspore/lite/src/ops/deconv2d.h
mindspore/lite/src/ops/deconv2d.h
+4
-0
mindspore/lite/src/populate_parameter.cc
mindspore/lite/src/populate_parameter.cc
+2
-20
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc
.../lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc
+13
-13
mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc
mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc
+45
-47
mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h
mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h
+3
-1
未找到文件。
mindspore/lite/src/ops/deconv2d.cc
浏览文件 @
018c0d40
...
...
@@ -124,6 +124,20 @@ int DeConv2D::InferShape(std::vector<lite::tensor::Tensor *> inputs_, std::vecto
}
std
::
vector
<
int
>
out_shape
=
{
output_n
,
output_h
,
output_w
,
output_c
};
output
->
set_shape
(
out_shape
);
if
(
pad_mode
==
schema
::
PadMode_SAME
)
{
pad_h_
=
((
input_h
-
1
)
*
stride_h
+
(
kernel_h
-
1
)
*
dilate_h
+
1
-
output_h
)
/
2
;
pad_w_
=
((
input_w
-
1
)
*
stride_w
+
(
kernel_w
-
1
)
*
dilate_w
+
1
-
output_w
)
/
2
;
}
else
if
(
pad_mode
==
schema
::
PadMode_VALID
)
{
pad_h_
=
0
;
pad_w_
=
0
;
}
else
if
(
pad_mode
==
schema
::
PadMode_CAFFE
)
{
pad_h_
=
pad_u_
;
pad_w_
=
pad_l_
;
}
else
{
MS_LOG
(
ERROR
)
<<
"unsupported pad mode for deconv"
;
}
return
0
;
}
}
// namespace lite
...
...
mindspore/lite/src/ops/deconv2d.h
浏览文件 @
018c0d40
...
...
@@ -74,12 +74,16 @@ class DeConv2D : public PrimitiveC {
int
PadDown
()
const
{
return
this
->
pad_d_
;
}
int
PadLeft
()
const
{
return
this
->
pad_l_
;
}
int
PadRight
()
const
{
return
this
->
pad_r_
;
}
int
PadH
()
const
{
return
this
->
pad_h_
;
}
int
PadW
()
const
{
return
this
->
pad_w_
;
}
protected:
int
pad_u_
=
0
;
int
pad_d_
=
0
;
int
pad_l_
=
0
;
int
pad_r_
=
0
;
int
pad_h_
=
0
;
int
pad_w_
=
0
;
};
}
// namespace lite
}
// namespace mindspore
...
...
mindspore/lite/src/populate_parameter.cc
浏览文件 @
018c0d40
...
...
@@ -506,6 +506,8 @@ OpParameter *PopulateDeconvParameter(const mindspore::lite::PrimitiveC *primitiv
conv_param
->
pad_d_
=
deconv_lite_primitive
->
PadDown
();
conv_param
->
pad_l_
=
deconv_lite_primitive
->
PadLeft
();
conv_param
->
pad_r_
=
deconv_lite_primitive
->
PadRight
();
conv_param
->
pad_h_
=
deconv_lite_primitive
->
PadH
();
conv_param
->
pad_w_
=
deconv_lite_primitive
->
PadW
();
conv_param
->
dilation_h_
=
conv_primitive
->
GetDilateH
();
conv_param
->
dilation_w_
=
conv_primitive
->
GetDilateW
();
auto
act_type
=
conv_primitive
->
GetActivationType
();
...
...
@@ -523,26 +525,6 @@ OpParameter *PopulateDeconvParameter(const mindspore::lite::PrimitiveC *primitiv
conv_param
->
is_relu6_
=
false
;
break
;
}
auto
pad_mode
=
conv_primitive
->
GetPadMode
();
switch
(
pad_mode
)
{
case
schema
::
PadMode_SAME
:
conv_param
->
pad_h_
=
(
conv_param
->
kernel_h_
-
1
)
/
2
;
conv_param
->
pad_w_
=
(
conv_param
->
kernel_w_
-
1
)
/
2
;
break
;
case
schema
::
PadMode_VALID
:
conv_param
->
pad_h_
=
0
;
conv_param
->
pad_w_
=
0
;
break
;
case
schema
::
PadMode_CAFFE
:
conv_param
->
pad_h_
=
conv_param
->
pad_u_
;
conv_param
->
pad_w_
=
conv_param
->
pad_l_
;
break
;
default:
MS_LOG
(
ERROR
)
<<
"invalid pad mode!"
;
return
nullptr
;
}
return
reinterpret_cast
<
OpParameter
*>
(
conv_param
);
}
...
...
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc
浏览文件 @
018c0d40
...
...
@@ -70,6 +70,15 @@ int Convolution1x1FP16CPUKernel::InitConv1x1Param() {
return
RET_MEMORY_FAILED
;
}
memset
(
pack_input_
,
0
,
matmul_param_
->
row_16_
*
matmul_param_
->
deep_
*
sizeof
(
float16_t
));
if
(
pre_trans_input_
)
{
input_ptr_
=
reinterpret_cast
<
float16_t
*>
(
malloc
(
matmul_param_
->
row_
*
matmul_param_
->
deep_
*
sizeof
(
float16_t
)));
if
(
input_ptr_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Conv1x1 Malloc input_ptr_ error!"
;
return
RET_MEMORY_FAILED
;
}
memset
(
input_ptr_
,
0
,
matmul_param_
->
row_
*
matmul_param_
->
deep_
*
sizeof
(
float16_t
));
}
return
RET_OK
;
}
...
...
@@ -131,6 +140,10 @@ void Convolution1x1FP16CPUKernel::FreeTmpBuffer() {
free
(
pack_input_
);
pack_input_
=
nullptr
;
}
if
(
pre_trans_input_
&&
input_ptr_
!=
nullptr
)
{
free
(
input_ptr_
);
input_ptr_
=
nullptr
;
}
return
;
}
...
...
@@ -205,15 +218,6 @@ int Convolution1x1FP16CPUKernel::Run() {
return
ret
;
}
if
(
pre_trans_input_
)
{
input_ptr_
=
reinterpret_cast
<
float16_t
*>
(
ctx_
->
allocator
->
Malloc
(
matmul_param_
->
row_
*
matmul_param_
->
deep_
*
sizeof
(
float16_t
)));
if
(
input_ptr_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Conv1x1 Malloc input_ptr_ error!"
;
return
RET_MEMORY_FAILED
;
}
}
for
(
int
batch_index
=
0
;
batch_index
<
conv_param_
->
input_batch_
;
batch_index
++
)
{
Pre1x1Trans
(
execute_input_
+
batch_index
*
conv_param_
->
input_h_
*
conv_param_
->
input_w_
*
conv_param_
->
input_channel_
,
...
...
@@ -229,10 +233,6 @@ int Convolution1x1FP16CPUKernel::Run() {
ConvolutionBaseFP16CPUKernel
::
IfCastOutput
();
ConvolutionBaseFP16CPUKernel
::
FreeTmpBuffer
();
if
(
pre_trans_input_
&&
input_ptr_
!=
nullptr
)
{
ctx_
->
allocator
->
Free
(
input_ptr_
);
input_ptr_
=
nullptr
;
}
return
RET_OK
;
}
}
// namespace mindspore::kernel
mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc
浏览文件 @
018c0d40
...
...
@@ -29,15 +29,15 @@ MatmulCPUKernel::~MatmulCPUKernel() { FreeTmpBuffer(); }
void
MatmulCPUKernel
::
FreeTmpBuffer
()
{
if
(
a_c12_ptr_
!=
nullptr
)
{
ctx_
->
allocator
->
F
ree
(
a_c12_ptr_
);
f
ree
(
a_c12_ptr_
);
a_c12_ptr_
=
nullptr
;
}
if
(
b_r8_ptr_
!=
nullptr
)
{
ctx_
->
allocator
->
F
ree
(
b_r8_ptr_
);
f
ree
(
b_r8_ptr_
);
b_r8_ptr_
=
nullptr
;
}
if
(
bias_ptr_
!=
nullptr
)
{
ctx_
->
allocator
->
F
ree
(
bias_ptr_
);
f
ree
(
bias_ptr_
);
bias_ptr_
=
nullptr
;
}
}
...
...
@@ -67,23 +67,28 @@ int MatmulCPUKernel::ReSize() {
thread_count_
=
MSMIN
(
thread_count_
,
UP_DIV
(
params_
->
col_8_
,
8
));
thread_stride_
=
UP_DIV
(
UP_DIV
(
params_
->
col_8_
,
8
),
thread_count_
);
a_c12_ptr_
=
reinterpret_cast
<
float
*>
(
ctx_
->
allocator
->
Malloc
(
params_
->
row_12_
*
params_
->
deep_
*
sizeof
(
float
)));
a_c12_ptr_
=
reinterpret_cast
<
float
*>
(
malloc
(
params_
->
batch
*
params_
->
row_12_
*
params_
->
deep_
*
sizeof
(
float
)));
if
(
a_c12_ptr_
==
nullptr
)
{
FreeTmpBuffer
();
return
RET_MEMORY_FAILED
;
}
memset
(
a_c12_ptr_
,
0
,
params_
->
row_12_
*
params_
->
deep_
*
sizeof
(
float
));
b_r8_ptr_
=
reinterpret_cast
<
float
*>
(
ctx_
->
allocator
->
Malloc
(
params_
->
col_8_
*
params_
->
deep_
*
sizeof
(
float
)));
b_r8_ptr_
=
reinterpret_cast
<
float
*>
(
malloc
(
params_
->
batch
*
params_
->
col_8_
*
params_
->
deep_
*
sizeof
(
float
)));
if
(
b_r8_ptr_
==
nullptr
)
{
FreeTmpBuffer
();
return
RET_MEMORY_FAILED
;
}
memset
(
b_r8_ptr_
,
0
,
params_
->
col_8_
*
params_
->
deep_
*
sizeof
(
float
));
params_
->
a_const_
=
false
;
params_
->
b_const_
=
false
;
InitMatrixA
(
reinterpret_cast
<
float
*>
(
in_tensors_
[
0
]
->
Data
()),
a_c12_ptr_
);
InitMatrixB
(
reinterpret_cast
<
float
*>
(
in_tensors_
[
1
]
->
Data
()),
b_r8_ptr_
);
params_
->
a_const_
=
(
in_tensors_
[
0
]
->
Data
()
!=
nullptr
);
params_
->
b_const_
=
(
in_tensors_
[
1
]
->
Data
()
!=
nullptr
);
if
(
params_
->
a_const_
==
true
)
{
InitMatrixA
(
reinterpret_cast
<
float
*>
(
in_tensors_
[
0
]
->
Data
()),
a_c12_ptr_
);
}
if
(
params_
->
b_const_
==
true
)
{
InitMatrixB
(
reinterpret_cast
<
float
*>
(
in_tensors_
[
1
]
->
Data
()),
b_r8_ptr_
);
}
bias_ptr_
=
reinterpret_cast
<
float
*>
(
malloc
(
params_
->
col_8_
*
sizeof
(
float
)));
if
(
bias_ptr_
==
nullptr
)
{
...
...
@@ -99,35 +104,27 @@ int MatmulCPUKernel::ReSize() {
}
void
MatmulCPUKernel
::
InitMatrixA
(
float
*
src_ptr
,
float
*
dst_ptr
)
{
if
(
params_
->
a_const_
==
true
)
{
return
;
}
if
(
src_ptr
==
nullptr
)
{
return
;
}
params_
->
a_const_
=
true
;
if
(
params_
->
a_transpose_
)
{
RowMajor2Row12Major
(
src_ptr
,
dst_ptr
,
params_
->
deep_
,
params_
->
row_
);
}
else
{
RowMajor2Col12Major
(
src_ptr
,
dst_ptr
,
params_
->
row_
,
params_
->
deep_
);
for
(
int
i
=
0
;
i
<
params_
->
batch
;
i
++
)
{
float
*
src
=
src_ptr
+
i
*
params_
->
deep_
*
params_
->
row_
;
float
*
dst
=
dst_ptr
+
i
*
params_
->
deep_
*
params_
->
row_12_
;
if
(
params_
->
a_transpose_
)
{
RowMajor2Row12Major
(
src
,
dst
,
params_
->
deep_
,
params_
->
row_
);
}
else
{
RowMajor2Col12Major
(
src
,
dst
,
params_
->
row_
,
params_
->
deep_
);
}
}
return
;
}
void
MatmulCPUKernel
::
InitMatrixB
(
float
*
src_ptr
,
float
*
dst_ptr
)
{
if
(
params_
->
b_const_
==
true
)
{
return
;
}
if
(
src_ptr
==
nullptr
)
{
return
;
}
params_
->
b_const_
=
true
;
if
(
params_
->
b_transpose_
)
{
RowMajor2Col8Major
(
src_ptr
,
dst_ptr
,
params_
->
col_
,
params_
->
deep_
);
}
else
{
RowMajor2Row8Major
(
src_ptr
,
dst_ptr
,
params_
->
deep_
,
params_
->
col_
);
for
(
int
i
=
0
;
i
<
params_
->
batch
;
i
++
)
{
float
*
src
=
src_ptr
+
i
*
params_
->
deep_
*
params_
->
col_
;
float
*
dst
=
dst_ptr
+
i
*
params_
->
deep_
*
params_
->
col_8_
;
if
(
params_
->
b_transpose_
)
{
RowMajor2Col8Major
(
src
,
dst
,
params_
->
col_
,
params_
->
deep_
);
}
else
{
RowMajor2Row8Major
(
src
,
dst
,
params_
->
deep_
,
params_
->
col_
);
}
}
return
;
}
...
...
@@ -144,8 +141,8 @@ int MatmulCPUKernel::RunImpl(int task_id) {
if
(
cur_oc
<=
0
)
{
return
RET_OK
;
}
MatMulOpt
(
a_
c12_ptr_
,
b_r8
_ptr_
+
task_id
*
thread_stride_
*
C8NUM
*
params_
->
deep_
,
c_
r_
ptr_
+
task_id
*
thread_stride_
*
C8NUM
,
bias_ptr_
+
task_id
*
thread_stride_
*
C8NUM
,
ActType_No
,
MatMulOpt
(
a_
ptr_
,
b
_ptr_
+
task_id
*
thread_stride_
*
C8NUM
*
params_
->
deep_
,
c_ptr_
+
task_id
*
thread_stride_
*
C8NUM
,
bias_ptr_
+
task_id
*
thread_stride_
*
C8NUM
,
ActType_No
,
params_
->
deep_
,
params_
->
row_
,
cur_oc
,
params_
->
col_
,
OutType_Nhwc
);
return
RET_OK
;
}
...
...
@@ -166,20 +163,21 @@ int MatmulCPUKernel::Run() {
MS_LOG
(
ERROR
)
<<
"Prepare fail!ret: "
<<
prepare_ret
;
return
prepare_ret
;
}
auto
a_ptr
=
reinterpret_cast
<
float
*>
(
in_tensors_
[
0
]
->
Data
());
auto
b_ptr
=
reinterpret_cast
<
float
*>
(
in_tensors_
[
1
]
->
Data
());
auto
c_ptr
=
reinterpret_cast
<
float
*>
(
out_tensors_
[
0
]
->
Data
());
auto
a_stride
=
params_
->
row_
*
params_
->
deep_
;
auto
b_stride
=
params_
->
deep_
*
params_
->
col_
;
auto
c_stride
=
params_
->
row_
*
params_
->
col_
;
for
(
int
i
=
0
;
i
<
params_
->
batch
;
++
i
)
{
auto
cur_a_ptr
=
a_ptr
+
i
*
a_stride
;
auto
cur_b_ptr
=
b_ptr
+
i
*
b_stride
;
c_r_ptr_
=
c_ptr
+
i
*
c_stride
;
auto
a_src
=
reinterpret_cast
<
float
*>
(
in_tensors_
[
0
]
->
Data
());
auto
b_src
=
reinterpret_cast
<
float
*>
(
in_tensors_
[
1
]
->
Data
());
auto
c_src
=
reinterpret_cast
<
float
*>
(
out_tensors_
[
0
]
->
Data
());
InitMatrixA
(
cur_a_ptr
,
a_c12_ptr_
);
InitMatrixB
(
cur_b_ptr
,
b_r8_ptr_
);
if
(
params_
->
a_const_
==
false
)
{
InitMatrixA
(
a_src
,
a_c12_ptr_
);
}
if
(
params_
->
b_const_
==
false
)
{
InitMatrixB
(
b_src
,
b_r8_ptr_
);
}
for
(
int
i
=
0
;
i
<
params_
->
batch
;
++
i
)
{
a_ptr_
=
a_c12_ptr_
+
i
*
params_
->
row_12_
*
params_
->
deep_
;
b_ptr_
=
b_r8_ptr_
+
i
*
params_
->
deep_
*
params_
->
col_8_
;
c_ptr_
=
c_src
+
i
*
params_
->
row_
*
params_
->
col_
;
LiteBackendParallelLaunch
(
MatmulFloatRun
,
this
,
thread_count_
);
}
return
RET_OK
;
...
...
mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h
浏览文件 @
018c0d40
...
...
@@ -43,8 +43,10 @@ class MatmulCPUKernel : public MatmulBaseCPUKernel {
private:
float
*
a_c12_ptr_
=
nullptr
;
float
*
b_r8_ptr_
=
nullptr
;
float
*
c_r_ptr_
=
nullptr
;
float
*
bias_ptr_
=
nullptr
;
float
*
a_ptr_
=
nullptr
;
float
*
b_ptr_
=
nullptr
;
float
*
c_ptr_
=
nullptr
;
};
}
// namespace mindspore::kernel
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录