Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
c0e7f422
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c0e7f422
编写于
8月 03, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 03, 2020
浏览文件
操作
浏览文件
下载
差异文件
!3878 optimize lite arm cpu op: conv_depthwise, deconv_depthwise
Merge pull request !3878 from yangruoqi713/lite
上级
df6707bd
88a44a0e
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
276 addition
and
148 deletion
+276
-148
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
...src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
+0
-1
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc
...lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc
+67
-39
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h
.../lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h
+3
-4
mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.cc
...te/src/runtime/kernel/arm/fp32/deconvolution_depthwise.cc
+69
-40
mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.h
...ite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.h
+8
-6
mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc
...src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc
+50
-22
mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h
.../src/runtime/kernel/arm/int8/convolution_depthwise_int8.h
+1
-0
mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc
...c/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc
+61
-31
mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h
...rc/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h
+1
-0
mindspore/lite/src/runtime/kernel/arm/opclib/fp16/conv_depthwise_fp16.cc
...src/runtime/kernel/arm/opclib/fp16/conv_depthwise_fp16.cc
+6
-1
mindspore/lite/src/runtime/kernel/arm/opclib/fp32/conv_depthwise.cc
...lite/src/runtime/kernel/arm/opclib/fp32/conv_depthwise.cc
+8
-2
mindspore/lite/src/runtime/kernel/arm/opclib/int8/conv_depthwise_int8.cc
...src/runtime/kernel/arm/opclib/int8/conv_depthwise_int8.cc
+2
-2
未找到文件。
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
浏览文件 @
c0e7f422
...
...
@@ -46,7 +46,6 @@ int ConvolutionDepthwiseFp16CPUKernel::InitBuffer() {
MS_LOG
(
ERROR
)
<<
"Malloc buffer failed."
;
return
RET_ERROR
;
}
memset
(
packed_output_
,
0
,
pack_output_size
*
sizeof
(
float16_t
));
return
RET_OK
;
}
...
...
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc
浏览文件 @
c0e7f422
...
...
@@ -27,27 +27,7 @@ using mindspore::lite::RET_OK;
using
mindspore
::
schema
::
PrimitiveType_DepthwiseConv2D
;
namespace
mindspore
::
kernel
{
int
ConvolutionDepthwiseCPUKernel
::
Init
()
{
// conv base init
ConvolutionBaseCPUKernel
::
Init
();
// init sliding window param
sliding_
=
new
SlidingWindowParam
;
InitSlidingParam
(
sliding_
,
conv_param_
,
C4NUM
);
// pack input function: convert_func_
auto
input_tensor
=
inputs_
[
kInputIndex
];
auto
data_type
=
input_tensor
->
data_type
();
auto
input_format
=
input_tensor
->
GetFormat
();
schema
::
Format
execute_format
=
schema
::
Format_NHWC4
;
if
(
input_format
!=
execute_format
)
{
convert_func_
=
LayoutTransform
(
data_type
,
input_format
,
execute_format
);
if
(
convert_func_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"layout convert func is nullptr."
;
return
RET_ERROR
;
}
}
int
ConvolutionDepthwiseCPUKernel
::
InitWeightBias
()
{
// init weight: o, h, w, i; o == group, i == 1
auto
weight_tensor
=
inputs_
[
kWeightIndex
];
auto
origin_weight
=
reinterpret_cast
<
float
*>
(
weight_tensor
->
Data
());
...
...
@@ -55,42 +35,93 @@ int ConvolutionDepthwiseCPUKernel::Init() {
int
pack_weight_size
=
C4NUM
*
OC4
*
conv_param_
->
kernel_h_
*
conv_param_
->
kernel_w_
;
packed_weight_
=
reinterpret_cast
<
float
*>
(
malloc
(
pack_weight_size
*
sizeof
(
float
)));
if
(
packed_weight_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Malloc buffer failed."
;
return
RET_ERROR
;
}
memset
(
packed_weight_
,
0
,
pack_weight_size
*
sizeof
(
float
));
PackNCHWToNC4HW4Fp32
(
origin_weight
,
packed_weight_
,
1
,
conv_param_
->
kernel_h_
*
conv_param_
->
kernel_w_
,
conv_param_
->
output_channel_
);
// init bias
bias_data_
=
reinterpret_cast
<
float
*>
(
malloc
(
C4NUM
*
OC4
*
sizeof
(
float
)));
if
(
bias_data_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Malloc buffer failed."
;
return
RET_ERROR
;
}
memset
(
bias_data_
,
0
,
C4NUM
*
OC4
*
sizeof
(
float
));
if
(
inputs_
.
size
()
==
kInputSize2
)
{
auto
ori_bias
=
reinterpret_cast
<
float
*>
(
inputs_
.
at
(
kBiasIndex
)
->
Data
());
memcpy
(
bias_data_
,
ori_bias
,
conv_param_
->
output_channel_
*
sizeof
(
float
));
}
else
{
MS_ASSERT
(
inputs_
.
size
()
==
kInputSize1
);
}
// init threadNum;
conv_param_
->
thread_num_
=
MSMIN
(
thread_count_
,
OC4
);
ReSize
();
return
RET_OK
;
}
int
ConvolutionDepthwiseCPUKernel
::
ReSize
()
{
// malloc pack input buffer
if
(
convert_func_
!=
nullptr
)
{
int
ConvolutionDepthwiseCPUKernel
::
InitBuffer
()
{
// malloc pack input and output buffer
if
(
conv_param_
->
input_channel_
%
C4NUM
!=
0
)
{
need_align_
=
true
;
int
IC4
=
UP_DIV
(
conv_param_
->
input_channel_
,
C4NUM
);
int
pack_input_size
=
conv_param_
->
input_batch_
*
conv_param_
->
input_h_
*
conv_param_
->
input_w_
*
C4NUM
*
IC4
;
packed_input_
=
reinterpret_cast
<
float
*>
(
malloc
(
pack_input_size
*
sizeof
(
float
)));
if
(
packed_input_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Malloc buffer failed."
;
return
RET_ERROR
;
}
memset
(
packed_input_
,
0
,
pack_input_size
*
sizeof
(
float
));
}
// malloc tmp output buffer
if
(
conv_param_
->
output_channel_
%
C4NUM
!=
0
)
{
need_align_
=
true
;
int
OC4
=
UP_DIV
(
conv_param_
->
output_channel_
,
C4NUM
);
int
pack_output_size
=
conv_param_
->
output_batch_
*
conv_param_
->
output_h_
*
conv_param_
->
output_w_
*
C4NUM
*
OC4
;
packed_output_
=
reinterpret_cast
<
float
*>
(
malloc
(
pack_output_size
*
sizeof
(
float
)));
memset
(
packed_output_
,
0
,
pack_output_size
*
sizeof
(
float
));
if
(
packed_output_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Malloc buffer failed."
;
return
RET_ERROR
;
}
}
return
RET_OK
;
}
int
ConvolutionDepthwiseCPUKernel
::
Init
()
{
// conv base init
ConvolutionBaseCPUKernel
::
Init
();
// init sliding window param
sliding_
=
new
SlidingWindowParam
;
InitSlidingParam
(
sliding_
,
conv_param_
,
C4NUM
);
auto
ret
=
InitWeightBias
();
if
(
ret
!=
0
)
{
MS_LOG
(
ERROR
)
<<
"Convolution depthwise fp32 InitWeightBias failed."
;
return
RET_ERROR
;
}
ret
=
InitBuffer
();
if
(
ret
!=
0
)
{
MS_LOG
(
ERROR
)
<<
"Convolution depthwise fp32 InitBuffer failed."
;
return
RET_ERROR
;
}
return
RET_OK
;
}
int
ConvolutionDepthwiseCPUKernel
::
ReSize
()
{
if
(
need_align_
)
{
free
(
packed_input_
);
free
(
packed_output_
);
}
// conv base init
ConvolutionBaseCPUKernel
::
Init
();
// init sliding window param
sliding_
=
new
SlidingWindowParam
;
InitSlidingParam
(
sliding_
,
conv_param_
,
C4NUM
);
auto
ret
=
InitBuffer
();
if
(
ret
!=
0
)
{
MS_LOG
(
ERROR
)
<<
"Convolution depthwise fp32 InitBuffer failed."
;
return
RET_ERROR
;
}
return
RET_OK
;
}
...
...
@@ -120,15 +151,14 @@ int ConvolutionDepthwiseCPUKernel::Run() {
auto
input_addr
=
reinterpret_cast
<
float
*>
(
input_tensor
->
Data
());
// pack input: to nhwc4
if
(
convert_func_
!=
nullptr
)
{
convert_func_
(
input_addr
,
packed_input_
,
conv_param_
->
input_batch_
,
conv_param_
->
input_h_
*
conv_param_
->
input_w
_
,
conv_param_
->
input_channel_
);
if
(
need_align_
)
{
PackNHWCToNHWC4Fp32
(
input_addr
,
packed_input_
,
conv_param_
->
input_batch
_
,
conv_param_
->
input_h_
*
conv_param_
->
input_w_
,
conv_param_
->
input_channel_
);
}
else
{
packed_input_
=
input_addr
;
}
output_addr
=
reinterpret_cast
<
float
*>
(
outputs_
.
at
(
kOutputIndex
)
->
Data
());
memset
(
output_addr
,
0
,
outputs_
.
at
(
kOutputIndex
)
->
ElementsNum
()
*
sizeof
(
float
));
auto
output_addr
=
reinterpret_cast
<
float
*>
(
outputs_
.
at
(
kOutputIndex
)
->
Data
());
if
(
!
need_align_
)
{
packed_output_
=
output_addr
;
}
...
...
@@ -146,7 +176,6 @@ int ConvolutionDepthwiseCPUKernel::Run() {
return
RET_OK
;
}
kernel
::
LiteKernel
*
CpuConvDwFp32KernelCreator
(
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
,
OpParameter
*
opParameter
,
const
Context
*
ctx
,
...
...
@@ -170,4 +199,3 @@ kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector<lite::tensor::T
REG_KERNEL
(
kCPU
,
kNumberTypeFloat32
,
PrimitiveType_DepthwiseConv2D
,
CpuConvDwFp32KernelCreator
)
}
// namespace mindspore::kernel
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h
浏览文件 @
c0e7f422
...
...
@@ -31,10 +31,8 @@ class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel {
~
ConvolutionDepthwiseCPUKernel
()
override
{
delete
sliding_
;
free
(
packed_weight_
);
if
(
convert_func_
!=
nullptr
)
{
free
(
packed_input_
);
}
if
(
need_align_
)
{
free
(
packed_input_
);
free
(
packed_output_
);
}
};
...
...
@@ -43,6 +41,8 @@ class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel {
int
ReSize
()
override
;
int
Run
()
override
;
int
InitBuffer
();
int
InitWeightBias
();
int
Execute
(
int
task_id
);
private:
...
...
@@ -50,7 +50,6 @@ class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel {
float
*
packed_weight_
;
float
*
packed_input_
;
float
*
packed_output_
;
float
*
output_addr
;
bool
need_align_
=
false
;
};
}
// namespace mindspore::kernel
...
...
mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.cc
浏览文件 @
c0e7f422
...
...
@@ -43,24 +43,7 @@ int DeconvolutionDepthwiseCPUKernel::InitSlideParam() {
return
RET_OK
;
}
int
DeconvolutionDepthwiseCPUKernel
::
Init
()
{
InitSlideParam
();
// conv base init
ConvolutionBaseCPUKernel
::
Init
();
// pack input function: convert_func_
auto
input_tensor
=
inputs_
[
kInputIndex
];
auto
data_type
=
input_tensor
->
data_type
();
auto
input_format
=
input_tensor
->
GetFormat
();
schema
::
Format
execute_format
=
schema
::
Format_NHWC4
;
if
(
input_format
!=
execute_format
)
{
convert_func_
=
LayoutTransform
(
data_type
,
input_format
,
execute_format
);
if
(
convert_func_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"layout convert func is nullptr."
;
return
RET_ERROR
;
}
}
int
DeconvolutionDepthwiseCPUKernel
::
InitWeightBias
()
{
// init weight: o, h, w, i; o == group, i == 1
auto
weight_tensor
=
inputs_
[
kWeightIndex
];
auto
origin_weight
=
reinterpret_cast
<
float
*>
(
weight_tensor
->
Data
());
...
...
@@ -68,55 +51,102 @@ int DeconvolutionDepthwiseCPUKernel::Init() {
int
pack_weight_size
=
C4NUM
*
OC4
*
conv_param_
->
kernel_h_
*
conv_param_
->
kernel_w_
;
packed_weight_
=
reinterpret_cast
<
float
*>
(
malloc
(
pack_weight_size
*
sizeof
(
float
)));
if
(
packed_weight_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Malloc buffer failed."
;
return
RET_ERROR
;
}
memset
(
packed_weight_
,
0
,
pack_weight_size
*
sizeof
(
float
));
PackNCHWToNC4HW4Fp32
(
origin_weight
,
packed_weight_
,
1
,
conv_param_
->
kernel_h_
*
conv_param_
->
kernel_w_
,
conv_param_
->
output_channel_
);
// init bias
bias_data_
=
reinterpret_cast
<
float
*>
(
malloc
(
C4NUM
*
OC4
*
sizeof
(
float
)));
if
(
bias_data_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Malloc buffer failed."
;
return
RET_ERROR
;
}
memset
(
bias_data_
,
0
,
C4NUM
*
OC4
*
sizeof
(
float
));
if
(
inputs_
.
size
()
==
kInputSize2
)
{
auto
ori_bias
=
reinterpret_cast
<
float
*>
(
inputs_
.
at
(
kBiasIndex
)
->
Data
());
memcpy
(
bias_data_
,
ori_bias
,
conv_param_
->
output_channel_
*
sizeof
(
float
));
}
else
{
MS_ASSERT
(
inputs_
.
size
()
==
kInputSize1
);
}
// init threadNum;
conv_param_
->
thread_num_
=
MSMIN
(
conv_param_
->
thread_num_
,
OC4
);
ReSize
();
return
RET_OK
;
}
int
DeconvolutionDepthwiseCPUKernel
::
ReSize
()
{
// malloc pack input buffer
if
(
convert_func_
!=
nullptr
)
{
int
DeconvolutionDepthwiseCPUKernel
::
InitBuffer
()
{
// malloc pack input and output buffer
if
(
conv_param_
->
input_channel_
%
C4NUM
!=
0
)
{
need_align_
=
true
;
int
IC4
=
UP_DIV
(
conv_param_
->
input_channel_
,
C4NUM
);
int
pack_input_size
=
conv_param_
->
input_batch_
*
conv_param_
->
input_h_
*
conv_param_
->
input_w_
*
C4NUM
*
IC4
;
packed_input_
=
reinterpret_cast
<
float
*>
(
malloc
(
pack_input_size
*
sizeof
(
float
)));
if
(
packed_input_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Malloc buffer failed."
;
return
RET_ERROR
;
}
memset
(
packed_input_
,
0
,
pack_input_size
*
sizeof
(
float
));
}
// malloc tmp output buffer
if
(
conv_param_
->
output_channel_
%
C4NUM
!=
0
)
{
need_pack_
=
true
;
int
OC4
=
UP_DIV
(
conv_param_
->
output_channel_
,
C4NUM
);
int
pack_output_size
=
conv_param_
->
output_batch_
*
conv_param_
->
output_h_
*
conv_param_
->
output_w_
*
C4NUM
*
OC4
;
packed_output_
=
reinterpret_cast
<
float
*>
(
malloc
(
pack_output_size
*
sizeof
(
float
)));
if
(
packed_output_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Malloc buffer failed."
;
return
RET_ERROR
;
}
memset
(
packed_output_
,
0
,
pack_output_size
*
sizeof
(
float
));
}
return
RET_OK
;
}
int
DeconvolutionDepthwiseCPUKernel
::
DoExcute
(
int
task_id
)
{
int
DeconvolutionDepthwiseCPUKernel
::
Init
()
{
InitSlideParam
();
// conv base init
ConvolutionBaseCPUKernel
::
Init
();
auto
ret
=
InitWeightBias
();
if
(
ret
!=
0
)
{
MS_LOG
(
ERROR
)
<<
"Deconvolution depthwise fp32 InitWeightBias failed."
;
return
RET_ERROR
;
}
ret
=
InitBuffer
();
if
(
ret
!=
0
)
{
MS_LOG
(
ERROR
)
<<
"Deconvolution depthwise fp32 InitBuffer failed."
;
return
RET_ERROR
;
}
return
RET_OK
;
}
int
DeconvolutionDepthwiseCPUKernel
::
ReSize
()
{
if
(
need_align_
)
{
free
(
packed_input_
);
free
(
packed_output_
);
}
InitSlideParam
();
// conv base init
ConvolutionBaseCPUKernel
::
Init
();
auto
ret
=
InitBuffer
();
if
(
ret
!=
0
)
{
MS_LOG
(
ERROR
)
<<
"Deconvolution depthwise fp32 InitBuffer failed."
;
return
RET_ERROR
;
}
return
RET_OK
;
}
int
DeconvolutionDepthwiseCPUKernel
::
Execute
(
int
task_id
)
{
DeconvDwC4Fp32
(
packed_output_
,
packed_input_
,
packed_weight_
,
reinterpret_cast
<
float
*>
(
bias_data_
),
conv_param_
,
sliding_
,
task_id
);
return
RET_OK
;
}
int
DeconvDwRun
(
int
task_id
,
LiteParallelGroupEnv
*
penv
,
void
*
cdata
)
{
auto
conv_dw
=
reinterpret_cast
<
DeconvolutionDepthwiseCPUKernel
*>
(
cdata
);
auto
ret
=
conv_dw
->
DoEx
cute
(
task_id
);
auto
de
conv_dw
=
reinterpret_cast
<
DeconvolutionDepthwiseCPUKernel
*>
(
cdata
);
auto
ret
=
deconv_dw
->
Exe
cute
(
task_id
);
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"DeconvolutionDepthwiseRun error task_id["
<<
task_id
<<
"] error_code["
<<
ret
<<
"]"
;
return
RET_ERROR
;
...
...
@@ -133,26 +163,26 @@ int DeconvolutionDepthwiseCPUKernel::Run() {
auto
input_addr
=
reinterpret_cast
<
float
*>
(
input_tensor
->
Data
());
// pack input: to nhwc4
if
(
convert_func_
!=
nullptr
)
{
convert_func_
(
input_addr
,
packed_input_
,
conv_param_
->
input_batch_
,
conv_param_
->
input_h_
*
conv_param_
->
input_w
_
,
conv_param_
->
input_channel_
);
if
(
need_align_
)
{
PackNHWCToNHWC4Fp32
(
input_addr
,
packed_input_
,
conv_param_
->
input_batch
_
,
conv_param_
->
input_h_
*
conv_param_
->
input_w_
,
conv_param_
->
input_channel_
);
}
else
{
packed_input_
=
input_addr
;
}
output_addr
=
reinterpret_cast
<
float
*>
(
outputs_
.
at
(
kOutputIndex
)
->
Data
());
memset
(
output_addr
,
0
,
outputs_
.
at
(
kOutputIndex
)
->
ElementsNum
()
*
sizeof
(
float
));
if
(
!
need_pack_
)
{
auto
output_addr
=
reinterpret_cast
<
float
*>
(
outputs_
.
at
(
kOutputIndex
)
->
Data
());
if
(
!
need_align_
)
{
memset
(
output_addr
,
0
,
outputs_
.
at
(
kOutputIndex
)
->
ElementsNum
()
*
sizeof
(
float
));
packed_output_
=
output_addr
;
}
auto
ret
=
LiteBackendParallelLaunch
(
DeconvDwRun
,
this
,
conv_param_
->
thread_num_
);
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"
C
onvDwRun error: error_code["
<<
ret
<<
"]"
;
MS_LOG
(
ERROR
)
<<
"
Dec
onvDwRun error: error_code["
<<
ret
<<
"]"
;
return
RET_ERROR
;
}
if
(
need_
pack
_
)
{
if
(
need_
align
_
)
{
PackNHWC4ToNHWCFp32
(
packed_output_
,
output_addr
,
conv_param_
->
output_batch_
,
conv_param_
->
output_h_
*
conv_param_
->
output_w_
,
conv_param_
->
output_channel_
);
}
...
...
@@ -182,4 +212,3 @@ kernel::LiteKernel *CpuDeconvDwFp32KernelCreator(const std::vector<lite::tensor:
REG_KERNEL
(
kCPU
,
kNumberTypeFloat32
,
PrimitiveType_DeDepthwiseConv2D
,
CpuDeconvDwFp32KernelCreator
)
}
// namespace mindspore::kernel
mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.h
浏览文件 @
c0e7f422
...
...
@@ -31,8 +31,10 @@ class DeconvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel {
~
DeconvolutionDepthwiseCPUKernel
()
override
{
delete
sliding_
;
free
(
packed_weight_
);
free
(
packed_input_
);
free
(
packed_output_
);
if
(
need_align_
)
{
free
(
packed_input_
);
free
(
packed_output_
);
}
};
int
Init
()
override
;
...
...
@@ -40,17 +42,17 @@ class DeconvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel {
int
ReSize
()
override
;
int
Run
()
override
;
int
DoExcute
(
int
task_id
);
int
InitBuffer
();
int
InitWeightBias
();
int
Execute
(
int
task_id
);
private:
SlidingWindowParam
*
sliding_
;
float
*
packed_weight_
;
float
*
packed_input_
;
float
*
packed_output_
;
float
*
output_addr
;
bool
need_pack_
=
false
;
bool
need_align_
=
false
;
};
}
// namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DECONVOLUTION_DEPTHWISE_H_
mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc
浏览文件 @
c0e7f422
...
...
@@ -35,11 +35,19 @@ int ConvolutionDepthwiseInt8CPUKernel::InitWeightBias() {
int
OC4
=
UP_DIV
(
conv_param_
->
output_channel_
,
C4NUM
);
int
pack_weight_size
=
C4NUM
*
OC4
*
conv_param_
->
kernel_h_
*
conv_param_
->
kernel_w_
;
packed_weight_
=
reinterpret_cast
<
int16_t
*>
(
malloc
(
pack_weight_size
*
sizeof
(
int16_t
)));
if
(
packed_weight_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Malloc buffer failed."
;
return
RET_ERROR
;
}
memset
(
packed_weight_
,
0
,
pack_weight_size
*
sizeof
(
int16_t
));
PackDepthwiseInt8Weight
(
origin_weight
,
packed_weight_
,
conv_param_
);
// init bias, add output zp
bias_data_
=
reinterpret_cast
<
int32_t
*>
(
malloc
(
C4NUM
*
OC4
*
sizeof
(
int32_t
)));
if
(
bias_data_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Malloc buffer failed."
;
return
RET_ERROR
;
}
memset
(
bias_data_
,
0
,
C4NUM
*
OC4
*
sizeof
(
int32_t
));
if
(
inputs_
.
size
()
==
kInputSize2
)
{
auto
ori_bias
=
reinterpret_cast
<
int32_t
*>
(
inputs_
.
at
(
kBiasIndex
)
->
Data
());
...
...
@@ -48,6 +56,30 @@ int ConvolutionDepthwiseInt8CPUKernel::InitWeightBias() {
return
RET_OK
;
}
int
ConvolutionDepthwiseInt8CPUKernel
::
InitBuffer
()
{
// malloc packed input buffer
int
pack_input_size
=
conv_param_
->
input_batch_
*
conv_param_
->
input_h_
*
conv_param_
->
input_w_
*
C4NUM
*
UP_DIV
(
conv_param_
->
input_channel_
,
4
);
packed_input_
=
reinterpret_cast
<
int16_t
*>
(
malloc
(
pack_input_size
*
sizeof
(
int16_t
)));
memset
(
packed_input_
,
0
,
pack_input_size
*
sizeof
(
int16_t
));
if
(
packed_input_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Malloc buffer failed."
;
return
RET_ERROR
;
}
if
(
conv_param_
->
input_channel_
%
C4NUM
!=
0
)
{
need_align_
=
true
;
int
pack_output_size
=
conv_param_
->
output_batch_
*
conv_param_
->
output_h_
*
conv_param_
->
output_w_
*
C4NUM
*
UP_DIV
(
conv_param_
->
output_channel_
,
C4NUM
);
packed_output_
=
reinterpret_cast
<
int8_t
*>
(
malloc
(
pack_output_size
*
sizeof
(
int8_t
)));
if
(
packed_input_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Malloc buffer failed."
;
return
RET_ERROR
;
}
}
return
RET_OK
;
}
int
ConvolutionDepthwiseInt8CPUKernel
::
Init
()
{
// conv base init
ConvolutionBaseCPUKernel
::
Init
();
...
...
@@ -66,7 +98,7 @@ int ConvolutionDepthwiseInt8CPUKernel::Init() {
return
ret
;
}
ret
=
ReSize
();
ret
=
InitBuffer
();
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Depthwise int8 ReSize error!"
;
return
ret
;
...
...
@@ -75,26 +107,23 @@ int ConvolutionDepthwiseInt8CPUKernel::Init() {
}
int
ConvolutionDepthwiseInt8CPUKernel
::
ReSize
()
{
// malloc packed input buffer
int
pack_input_size
=
conv_param_
->
input_batch_
*
conv_param_
->
input_h_
*
conv_param_
->
input_w_
*
C4NUM
*
UP_DIV
(
conv_param_
->
input_channel_
,
4
);
packed_input_
=
reinterpret_cast
<
int16_t
*>
(
malloc
(
pack_input_size
*
sizeof
(
int16_t
)));
memset
(
packed_input_
,
0
,
pack_input_size
*
sizeof
(
int16_t
));
if
(
packed_input_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Malloc buffer failed."
;
return
RET_ERROR
;
free
(
packed_input_
);
if
(
need_align_
)
{
free
(
packed_output_
);
}
// conv base init
ConvolutionBaseCPUKernel
::
Init
();
if
(
conv_param_
->
input_channel_
%
C4NUM
!=
0
)
{
need_align_
=
true
;
int
pack_output_size
=
conv_param_
->
output_batch_
*
conv_param_
->
output_h_
*
conv_param_
->
output_w_
*
C4NUM
*
(
conv_param_
->
output_channel_
,
C4NUM
);
packed_output_
=
reinterpret_cast
<
int8_t
*>
(
malloc
(
pack_output_size
*
sizeof
(
int8_t
))
);
if
(
packed_input_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Malloc buffer failed."
;
return
RET_ERROR
;
}
memset
(
packed_output_
,
0
,
pack_output_size
*
sizeof
(
int8_t
))
;
// init sliding window param
InitSlidingParam
(
sliding
,
conv_param_
,
C4NUM
)
;
// init quant param
ConvolutionBaseCPUKernel
::
SetQuantParam
(
);
auto
ret
=
InitBuffer
()
;
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Depthwise int8 ReSize error!"
;
return
ret
;
}
return
RET_OK
;
}
...
...
@@ -106,8 +135,8 @@ int ConvolutionDepthwiseInt8CPUKernel::Execute(int task_id) {
}
int
ConvDwInt8Run
(
int
task_id
,
LiteParallelGroupEnv
*
penv
,
void
*
cdata
)
{
auto
conv_dw
=
reinterpret_cast
<
ConvolutionDepthwiseInt8CPUKernel
*>
(
cdata
);
auto
ret
=
conv_dw
->
Execute
(
task_id
);
auto
conv_dw
_int8
=
reinterpret_cast
<
ConvolutionDepthwiseInt8CPUKernel
*>
(
cdata
);
auto
ret
=
conv_dw
_int8
->
Execute
(
task_id
);
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"ConvolutionDepthwiseInt8Run error task_id["
<<
task_id
<<
"] error_code["
<<
ret
<<
"]"
;
return
RET_ERROR
;
...
...
@@ -127,7 +156,6 @@ int ConvolutionDepthwiseInt8CPUKernel::Run() {
PackDepthwiseInt8Input
(
input_addr
,
packed_input_
,
conv_param_
);
auto
output_addr
=
reinterpret_cast
<
int8_t
*>
(
outputs_
.
at
(
kOutputIndex
)
->
Data
());
memset
(
output_addr
,
0
,
outputs_
.
at
(
kOutputIndex
)
->
ElementsNum
()
*
sizeof
(
int8_t
));
if
(
!
need_align_
)
{
packed_output_
=
output_addr
;
}
...
...
mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h
浏览文件 @
c0e7f422
...
...
@@ -42,6 +42,7 @@ class ConvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel {
int
Run
()
override
;
int
InitWeightBias
();
int
InitBuffer
();
int
Execute
(
int
task_id
);
private:
...
...
mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc
浏览文件 @
c0e7f422
...
...
@@ -35,11 +35,19 @@ int DeconvolutionDepthwiseInt8CPUKernel::InitWeightBias() {
int
OC4
=
UP_DIV
(
conv_param_
->
output_channel_
,
C4NUM
);
int
pack_weight_size
=
C4NUM
*
OC4
*
conv_param_
->
kernel_h_
*
conv_param_
->
kernel_w_
;
packed_weight_
=
reinterpret_cast
<
int16_t
*>
(
malloc
(
pack_weight_size
*
sizeof
(
int16_t
)));
if
(
packed_weight_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Malloc buffer failed."
;
return
RET_ERROR
;
}
memset
(
packed_weight_
,
0
,
pack_weight_size
*
sizeof
(
int16_t
));
PackDepthwiseInt8Weight
(
origin_weight
,
packed_weight_
,
conv_param_
);
// init bias, add output zp
bias_data_
=
reinterpret_cast
<
int32_t
*>
(
malloc
(
C4NUM
*
OC4
*
sizeof
(
int32_t
)));
if
(
bias_data_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Malloc buffer failed."
;
return
RET_ERROR
;
}
memset
(
bias_data_
,
0
,
C4NUM
*
OC4
*
sizeof
(
int32_t
));
if
(
inputs_
.
size
()
==
kInputSize2
)
{
auto
ori_bias
=
reinterpret_cast
<
int32_t
*>
(
inputs_
.
at
(
kBiasIndex
)
->
Data
());
...
...
@@ -59,7 +67,6 @@ int DeconvolutionDepthwiseInt8CPUKernel::InitSlideParam() {
conv_param_
->
output_channel_
=
inputs_
.
front
()
->
shape
().
at
(
kNHWC_C
);
// init sliding window param
sliding
=
new
SlidingWindowParam
;
InitSlidingParam
(
sliding
,
conv_param_
,
C4NUM
);
sliding
->
in_h_step_
=
conv_param_
->
input_w_
*
C4NUM
;
...
...
@@ -70,31 +77,7 @@ int DeconvolutionDepthwiseInt8CPUKernel::InitSlideParam() {
return
RET_OK
;
}
int
DeconvolutionDepthwiseInt8CPUKernel
::
Init
()
{
InitSlideParam
();
// conv base init
ConvolutionBaseCPUKernel
::
Init
();
// init quant param
ConvolutionBaseCPUKernel
::
SetQuantParam
();
// init weight and bias
auto
ret
=
InitWeightBias
();
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Deconv Depthwise int8 InitWeightBias error!"
;
return
ret
;
}
ret
=
ReSize
();
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Deconv Depthwise int8 ReSize error!"
;
return
ret
;
}
return
RET_OK
;
}
int
DeconvolutionDepthwiseInt8CPUKernel
::
ReSize
()
{
int
DeconvolutionDepthwiseInt8CPUKernel
::
InitBuffer
()
{
// malloc packed input buffer
int
pack_input_size
=
conv_param_
->
input_batch_
*
conv_param_
->
input_h_
*
conv_param_
->
input_w_
*
C4NUM
*
UP_DIV
(
conv_param_
->
input_channel_
,
4
);
...
...
@@ -108,9 +91,9 @@ int DeconvolutionDepthwiseInt8CPUKernel::ReSize() {
if
(
conv_param_
->
input_channel_
%
C4NUM
!=
0
)
{
need_align_
=
true
;
int
pack_output_size
=
conv_param_
->
output_batch_
*
conv_param_
->
output_h_
*
conv_param_
->
output_w_
*
C4NUM
*
(
conv_param_
->
output_channel_
,
C4NUM
);
UP_DIV
(
conv_param_
->
output_channel_
,
C4NUM
);
packed_output_
=
reinterpret_cast
<
int8_t
*>
(
malloc
(
pack_output_size
*
sizeof
(
int8_t
)));
if
(
packed_
in
put_
==
nullptr
)
{
if
(
packed_
out
put_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Malloc buffer failed."
;
return
RET_ERROR
;
}
...
...
@@ -120,6 +103,10 @@ int DeconvolutionDepthwiseInt8CPUKernel::ReSize() {
// malloc tmp buffer for int32 output
output_buffer
=
reinterpret_cast
<
int32_t
*>
(
malloc
(
conv_param_
->
output_h_
*
conv_param_
->
output_w_
*
C4NUM
*
sizeof
(
int32_t
)));
if
(
output_buffer
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Malloc buffer failed."
;
return
RET_ERROR
;
}
if
(
packed_input_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Malloc buffer failed."
;
return
RET_ERROR
;
...
...
@@ -127,6 +114,49 @@ int DeconvolutionDepthwiseInt8CPUKernel::ReSize() {
return
RET_OK
;
}
int
DeconvolutionDepthwiseInt8CPUKernel
::
Init
()
{
sliding
=
new
SlidingWindowParam
;
InitSlideParam
();
// conv base init
ConvolutionBaseCPUKernel
::
Init
();
// init quant param
ConvolutionBaseCPUKernel
::
SetQuantParam
();
// init weight and bias
auto
ret
=
InitWeightBias
();
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Deconv Depthwise int8 InitWeightBias error!"
;
return
ret
;
}
ret
=
InitBuffer
();
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Deconv Depthwise int8 InitBuffer error!"
;
return
ret
;
}
return
RET_OK
;
}
int
DeconvolutionDepthwiseInt8CPUKernel
::
ReSize
()
{
free
(
packed_input_
);
if
(
need_align_
)
{
free
(
packed_output_
);
}
InitSlideParam
();
// conv base init
ConvolutionBaseCPUKernel
::
Init
();
auto
ret
=
InitBuffer
();
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Deconv Depthwise int8 InitBuffer error!"
;
return
ret
;
}
return
RET_OK
;
}
int
DeconvolutionDepthwiseInt8CPUKernel
::
Execute
(
int
task_id
)
{
DeconvDwInt8
(
packed_output_
,
output_buffer
,
packed_input_
,
packed_weight_
,
reinterpret_cast
<
int32_t
*>
(
bias_data_
),
conv_param_
,
sliding
,
task_id
);
...
...
@@ -134,8 +164,8 @@ int DeconvolutionDepthwiseInt8CPUKernel::Execute(int task_id) {
}
int
DeconvDwInt8Run
(
int
task_id
,
LiteParallelGroupEnv
*
penv
,
void
*
cdata
)
{
auto
deconv_dw
=
reinterpret_cast
<
DeconvolutionDepthwiseInt8CPUKernel
*>
(
cdata
);
auto
ret
=
deconv_dw
->
Execute
(
task_id
);
auto
deconv_dw
_int8
=
reinterpret_cast
<
DeconvolutionDepthwiseInt8CPUKernel
*>
(
cdata
);
auto
ret
=
deconv_dw
_int8
->
Execute
(
task_id
);
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"DeconvolutionDepthwiseInt8Run error task_id["
<<
task_id
<<
"] error_code["
<<
ret
<<
"]"
;
return
RET_ERROR
;
...
...
@@ -155,8 +185,8 @@ int DeconvolutionDepthwiseInt8CPUKernel::Run() {
PackDepthwiseInt8Input
(
input_addr
,
packed_input_
,
conv_param_
);
auto
output_addr
=
reinterpret_cast
<
int8_t
*>
(
outputs_
.
at
(
kOutputIndex
)
->
Data
());
memset
(
output_addr
,
0
,
outputs_
.
at
(
kOutputIndex
)
->
ElementsNum
()
*
sizeof
(
int8_t
));
if
(
!
need_align_
)
{
memset
(
output_addr
,
0
,
outputs_
.
at
(
kOutputIndex
)
->
ElementsNum
()
*
sizeof
(
int8_t
));
packed_output_
=
output_addr
;
}
...
...
mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h
浏览文件 @
c0e7f422
...
...
@@ -43,6 +43,7 @@ class DeconvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel {
int
InitSlideParam
();
int
InitWeightBias
();
int
InitBuffer
();
int
Execute
(
int
task_id
);
private:
...
...
mindspore/lite/src/runtime/kernel/arm/opclib/fp16/conv_depthwise_fp16.cc
浏览文件 @
c0e7f422
...
...
@@ -21,6 +21,9 @@
void
DepthwiseBorderPixelFp16
(
float16_t
*
dst
,
const
float16_t
*
src
,
const
float16_t
*
weight
,
const
float16_t
*
bias
,
int
height
,
int
width
,
int
in_kh_step
,
int
in_kw_step
,
int
kernel_w
,
bool
is_relu
,
bool
is_relu6
)
{
for
(
int
c
=
0
;
c
<
C8NUM
;
c
++
)
{
dst
[
c
]
=
0
;
}
const
float16_t
*
src_kh
=
src
;
const
float16_t
*
weight_kh
=
weight
;
for
(
int
kh
=
0
;
kh
<
height
;
kh
++
)
{
...
...
@@ -87,6 +90,9 @@ void DepthwiseCenterFp16(float16_t *dst, const float16_t *src, const float16_t *
for
(
int
ow
=
0
;
ow
<
width
;
ow
++
)
{
const
float16_t
*
src_kh
=
src_w
;
const
float16_t
*
weight_kh
=
weight
;
for
(
int
c
=
0
;
c
<
C8NUM
;
c
++
)
{
dst_w
[
c
]
=
0
;
}
for
(
int
kh
=
0
;
kh
<
kernel_h
;
kh
++
)
{
const
float16_t
*
src_kw
=
src_kh
;
const
float16_t
*
weight_kw
=
weight_kh
;
...
...
@@ -297,4 +303,3 @@ void DeconvDwC8Fp16(float16_t *output_data, const float16_t *input_data, const f
// output nchwc8
}
/*deconv depthwise fp16 end*/
mindspore/lite/src/runtime/kernel/arm/opclib/fp32/conv_depthwise.cc
浏览文件 @
c0e7f422
...
...
@@ -63,6 +63,9 @@ void DepthwiseBorderPixel(float *dst, const float *src, const float *weight, con
int
in_kh_step
,
int
in_kw_step
,
int
kernel_w
,
bool
is_relu
,
bool
is_relu6
)
{
const
float
*
src_kh
=
src
;
const
float
*
weight_kh
=
weight
;
for
(
int
c
=
0
;
c
<
C4NUM
;
c
++
)
{
dst
[
c
]
=
0
;
}
for
(
int
kh
=
0
;
kh
<
height
;
kh
++
)
{
const
float
*
src_kw
=
src_kh
;
const
float
*
weight_kw
=
weight_kh
;
...
...
@@ -132,6 +135,9 @@ void DepthwiseCenter(float *dst, const float *src, const float *weight, const fl
for
(
int
ow
=
0
;
ow
<
width
;
ow
++
)
{
const
float
*
src_kh
=
src_w
;
const
float
*
weight_kh
=
weight
;
for
(
int
c
=
0
;
c
<
C4NUM
;
c
++
)
{
dst_w
[
c
]
=
0
;
}
for
(
int
kh
=
0
;
kh
<
kernel_h
;
kh
++
)
{
const
float
*
src_kw
=
src_kh
;
const
float
*
weight_kw
=
weight_kh
;
...
...
@@ -202,7 +208,7 @@ void ConvDwC4Fp32(float *output_data, const float *input_data, const float *weig
src
+=
sliding
->
in_step_
;
dst
+=
sliding
->
out_step_
;
}
// batch loop
// output n
c4
hwc4
// output nhwc4
}
/*conv depthwise fp32 end*/
...
...
@@ -350,6 +356,6 @@ void DeconvDwC4Fp32(float *output_data, const float *input_data, const float *we
src
+=
sliding
->
in_step_
;
dst
+=
sliding
->
out_step_
;
}
// batch loop
// output n
c4
hwc4
// output nhwc4
}
/*deconv depthwise fp32 end*/
mindspore/lite/src/runtime/kernel/arm/opclib/int8/conv_depthwise_int8.cc
浏览文件 @
c0e7f422
...
...
@@ -171,7 +171,7 @@ void ConvDwInt8(int8_t *output_data, const int16_t *input_data, const int16_t *w
src
+=
sliding
->
in_step_
;
dst
+=
sliding
->
out_step_
;
}
// batch loop
// output n
c4
hwc4
// output nhwc4
}
/*conv depthwise int8 end*/
...
...
@@ -317,6 +317,6 @@ void DeconvDwInt8(int8_t *output_data, int32_t *output_buffer, const int16_t *in
src
+=
sliding
->
in_step_
;
dst
+=
sliding
->
out_step_
;
}
// batch loop
// output n
c4
hwc4
// output nhwc4
}
/*deconv depthwise int8 end*/
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录