Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
8beb1b0f
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8beb1b0f
编写于
8月 03, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 03, 2020
浏览文件
操作
浏览文件
下载
差异文件
!3799 conv1x1 & deconv change
Merge pull request !3799 from ling/conv1x1
上级
8ff7c0b6
fa86096f
变更
31
展开全部
隐藏空白更改
内联
并排
Showing
31 changed file
with
2261 addition
and
300 deletion
+2261
-300
mindspore/lite/src/populate_parameter.cc
mindspore/lite/src/populate_parameter.cc
+2
-4
mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc
...pore/lite/src/runtime/kernel/arm/base/convolution_base.cc
+4
-22
mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h
...spore/lite/src/runtime/kernel/arm/base/convolution_base.h
+1
-2
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc
...spore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc
+68
-99
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.h
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.h
+11
-12
mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.cc
mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.cc
+72
-84
mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.h
mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.h
+13
-10
mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection.cc
mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection.cc
+2
-2
mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.cc
...e/lite/src/runtime/kernel/arm/int8/fullconnection_int8.cc
+3
-3
mindspore/lite/src/runtime/kernel/arm/opclib/fp32/common_func.cc
...re/lite/src/runtime/kernel/arm/opclib/fp32/common_func.cc
+19
-8
mindspore/lite/src/runtime/kernel/arm/opclib/fp32/common_func.h
...ore/lite/src/runtime/kernel/arm/opclib/fp32/common_func.h
+4
-3
mindspore/lite/src/runtime/kernel/arm/opclib/fp32/deconv.cc
mindspore/lite/src/runtime/kernel/arm/opclib/fp32/deconv.cc
+48
-5
mindspore/lite/src/runtime/kernel/arm/opclib/fp32/deconv.h
mindspore/lite/src/runtime/kernel/arm/opclib/fp32/deconv.h
+4
-3
mindspore/lite/src/runtime/kernel/arm/opclib/fp32/matmul.cc
mindspore/lite/src/runtime/kernel/arm/opclib/fp32/matmul.cc
+14
-7
mindspore/lite/src/runtime/kernel/arm/opclib/fp32/matmul.h
mindspore/lite/src/runtime/kernel/arm/opclib/fp32/matmul.h
+2
-2
mindspore/lite/src/runtime/kernel/arm/opclib/int8/deconv.cc
mindspore/lite/src/runtime/kernel/arm/opclib/int8/deconv.cc
+8
-5
mindspore/lite/src/runtime/kernel/arm/opclib/matmul.h
mindspore/lite/src/runtime/kernel/arm/opclib/matmul.h
+3
-3
mindspore/lite/src/runtime/kernel/arm/opclib/pack.cc
mindspore/lite/src/runtime/kernel/arm/opclib/pack.cc
+28
-15
mindspore/lite/src/runtime/kernel/arm/opclib/pack.h
mindspore/lite/src/runtime/kernel/arm/opclib/pack.h
+2
-0
mindspore/lite/src/runtime/kernel/arm/opclib/quantization/quantize.h
...ite/src/runtime/kernel/arm/opclib/quantization/quantize.h
+17
-8
mindspore/lite/test/run_test.sh
mindspore/lite/test/run_test.sh
+10
-0
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/conv1x1_fp32_tests.cc
...test/ut/src/runtime/kernel/arm/fp32/conv1x1_fp32_tests.cc
+395
-0
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/deconvolution_fp32_tests.cc
...t/src/runtime/kernel/arm/fp32/deconvolution_fp32_tests.cc
+548
-0
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/fullconnection_fp32_tests.cc
.../src/runtime/kernel/arm/fp32/fullconnection_fp32_tests.cc
+145
-0
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/strassen_fp32_tests.cc
...est/ut/src/runtime/kernel/arm/fp32/strassen_fp32_tests.cc
+369
-0
mindspore/lite/test/ut/src/runtime/kernel/arm/int8/deconv_int8_tests.cc
.../test/ut/src/runtime/kernel/arm/int8/deconv_int8_tests.cc
+266
-0
mindspore/lite/test/ut/src/runtime/kernel/arm/int8/fullconnection_int8_tests.cc
.../src/runtime/kernel/arm/int8/fullconnection_int8_tests.cc
+2
-3
mindspore/lite/test/ut/src/runtime/kernel/arm/int8/pad_int8_tests.cc
...ite/test/ut/src/runtime/kernel/arm/int8/pad_int8_tests.cc
+201
-0
mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/conv1x1fp32_output1_nhwc.bin
...me/kernel/arm/test_data/conv/conv1x1fp32_output1_nhwc.bin
+0
-0
mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconv_fp32_nchw_output1.bin
.../kernel/arm/test_data/deconv/deconv_fp32_nchw_output1.bin
+0
-0
mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconv_fp32_nhwc_input1.bin
...e/kernel/arm/test_data/deconv/deconv_fp32_nhwc_input1.bin
+0
-0
未找到文件。
mindspore/lite/src/populate_parameter.cc
浏览文件 @
8beb1b0f
...
...
@@ -165,8 +165,7 @@ OpParameter *PopulateFullconnectionParameter(const lite::Primitive *primitive) {
matmul_param
->
b_transpose_
=
true
;
matmul_param
->
a_transpose_
=
false
;
matmul_param
->
has_bias_
=
param
->
hasBias
();
matmul_param
->
minf_
=
-
FLT_MAX
;
matmul_param
->
maxf_
=
FLT_MAX
;
matmul_param
->
act_type_
=
ActType_No
;
return
reinterpret_cast
<
OpParameter
*>
(
matmul_param
);
}
...
...
@@ -181,8 +180,7 @@ OpParameter *PopulateMatMulParameter(const lite::Primitive *primitive) {
matmul_param
->
b_transpose_
=
param
->
transposeB
();
matmul_param
->
a_transpose_
=
param
->
transposeA
();
matmul_param
->
has_bias_
=
false
;
matmul_param
->
minf_
=
-
FLT_MAX
;
matmul_param
->
maxf_
=
FLT_MAX
;
matmul_param
->
act_type_
=
ActType_No
;
return
reinterpret_cast
<
OpParameter
*>
(
matmul_param
);
}
...
...
mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc
浏览文件 @
8beb1b0f
...
...
@@ -146,28 +146,10 @@ int ConvolutionBaseCPUKernel::SetQuantParam() {
QuantizeRoundParameter
(
real_multiplier
,
&
conv_quant_arg_
->
quant_multiplier_
[
0
],
&
conv_quant_arg_
->
left_shift_
[
0
],
&
conv_quant_arg_
->
right_shift_
[
0
]);
ComputeQuantOutRange
(
conv_param_
);
CalculateActivationRangeQuantized
(
conv_param_
->
is_relu_
,
conv_param_
->
is_relu6_
,
conv_param_
->
conv_quant_arg_
.
quant_args_
[
2
][
0
].
zp_
,
conv_param_
->
conv_quant_arg_
.
quant_args_
[
2
][
0
].
scale_
,
&
conv_param_
->
conv_quant_arg_
.
out_act_min_
[
0
],
&
conv_param_
->
conv_quant_arg_
.
out_act_max_
[
0
]);
return
RET_OK
;
}
void
ComputeQuantOutRange
(
ConvParameter
*
conv_param
)
{
int32_t
min
=
std
::
numeric_limits
<
int8_t
>::
min
();
int32_t
max
=
std
::
numeric_limits
<
int8_t
>::
max
();
float
scale
=
conv_param
->
conv_quant_arg_
.
quant_args_
[
2
][
0
].
scale_
;
int32_t
zp
=
conv_param
->
conv_quant_arg_
.
quant_args_
[
2
][
0
].
zp_
;
bool
is_relu
=
conv_param
->
is_relu_
;
bool
is_relu6
=
conv_param
->
is_relu6_
;
int32_t
quantized_zero
=
QuantizeToInt8
(
0
,
scale
,
zp
);
int32_t
quantized_six
=
QuantizeToInt8
(
6
,
scale
,
zp
);
if
(
is_relu
)
{
min
=
min
>
quantized_zero
?
min
:
quantized_zero
;
}
else
if
(
is_relu6
)
{
min
=
min
>
quantized_zero
?
min
:
quantized_zero
;
max
=
max
<
quantized_six
?
max
:
quantized_six
;
}
else
{
// do nothing
}
conv_param
->
conv_quant_arg_
.
out_act_min_
[
0
]
=
min
;
conv_param
->
conv_quant_arg_
.
out_act_max_
[
0
]
=
max
;
}
}
// namespace mindspore::kernel
mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h
浏览文件 @
8beb1b0f
...
...
@@ -38,7 +38,7 @@ class ConvolutionBaseCPUKernel : public LiteKernel {
public:
ConvolutionBaseCPUKernel
(
OpParameter
*
parameter
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
,
const
Context
*
ctx
)
:
LiteKernel
(
parameter
,
inputs
,
outputs
),
ctx_
(
ctx
),
thread_count_
(
ctx
->
threadNum
)
{
:
LiteKernel
(
parameter
,
inputs
,
outputs
),
ctx_
(
ctx
),
thread_count_
(
ctx
->
threadNum
)
{
opParameter
->
thread_num_
=
ctx
->
threadNum
;
conv_param_
=
reinterpret_cast
<
ConvParameter
*>
(
opParameter
);
}
...
...
@@ -60,7 +60,6 @@ class ConvolutionBaseCPUKernel : public LiteKernel {
ConvParameter
*
conv_param_
;
LayoutConvertor
convert_func_
;
};
void
ComputeQuantOutRange
(
ConvParameter
*
conv_param
);
bool
CheckSupportFP16
();
}
// namespace mindspore::kernel
...
...
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc
浏览文件 @
8beb1b0f
...
...
@@ -23,62 +23,71 @@ using mindspore::lite::RET_OK;
namespace
mindspore
::
kernel
{
Convolution1x1CPUKernel
::~
Convolution1x1CPUKernel
()
{
if
(
c4_output
_
!=
nullptr
)
{
free
(
c4_output
_
);
c4_output
_
=
nullptr
;
if
(
weight_ptr
_
!=
nullptr
)
{
free
(
weight_ptr
_
);
weight_ptr
_
=
nullptr
;
}
if
(
c4
_input_
!=
nullptr
)
{
free
(
c4
_input_
);
c4
_input_
=
nullptr
;
if
(
pack
_input_
!=
nullptr
)
{
free
(
pack
_input_
);
pack
_input_
=
nullptr
;
}
if
(
pre_trans_input_
)
{
if
(
pack_output_
!=
nullptr
)
{
free
(
pack_output_
);
pack_output_
=
nullptr
;
}
if
(
pre_trans_input_
&&
input_ptr_
!=
nullptr
)
{
free
(
input_ptr_
);
input_ptr_
=
nullptr
;
}
if
(
tmp_ptr_
!=
nullptr
)
{
free
(
tmp_ptr_
);
tmp_ptr_
=
nullptr
;
}
if
(
weight_ptr_
!=
nullptr
)
{
free
(
weight_ptr_
);
weight_ptr_
=
nullptr
;
}
delete
matmul_param_
;
}
int
Convolution1x1CPUKernel
::
ReSize
()
{
return
RET_OK
;
}
int
Convolution1x1CPUKernel
::
ReSize
()
{
if
(
pack_input_
!=
nullptr
)
{
free
(
pack_input_
);
pack_input_
=
nullptr
;
}
if
(
pre_trans_input_
&&
input_ptr_
!=
nullptr
)
{
free
(
input_ptr_
);
input_ptr_
=
nullptr
;
}
InitConv1x1MatmulParam
();
InitConv1x1Param
();
return
RET_OK
;
}
void
Convolution1x1CPUKernel
::
InitConv1x1MatmulParam
()
{
matmul_param_
=
new
StrassenMatMulParameter
();
matmul_param_
->
row_
=
conv_param_
->
output_h_
*
conv_param_
->
output_w_
;
matmul_param_
->
col_
=
UP_DIV
(
conv_param_
->
output_channel_
,
FP32_STRASSEN_UINT
);
matmul_param_
->
deep_
=
UP_DIV
(
conv_param_
->
input_channel_
,
FP32_STRASSEN_UINT
);
matmul_param_
->
a_stride_
=
matmul_param_
->
row_
*
FP32_STRASSEN_UINT
;
matmul_param_
->
b_stride_
=
matmul_param_
->
deep_
*
FP32_STRASSEN_WEIGHT_UINT
;
matmul_param_
->
c_stride_
=
matmul_param_
->
row_
*
FP32_STRASSEN_UINT
;
matmul_param_
->
col_
=
conv_param_
->
output_channel_
;
matmul_param_
->
deep_
=
conv_param_
->
input_channel_
;
matmul_param_
->
row_8_
=
UP_ROUND
(
matmul_param_
->
row_
,
C8NUM
);
matmul_param_
->
col_8_
=
UP_ROUND
(
matmul_param_
->
col_
,
C8NUM
);
matmul_param_
->
act_type_
=
(
conv_param_
->
is_relu6_
)
?
ActType_Relu6
:
ActType_No
;
matmul_param_
->
act_type_
=
(
conv_param_
->
is_relu_
)
?
ActType_Relu
:
matmul_param_
->
act_type_
;
return
;
}
int
Convolution1x1CPUKernel
::
InitConv1x1BiasWeight
()
{
if
(
inputs_
.
size
()
==
3
)
{
bias_data_
=
malloc
(
matmul_param_
->
col_
*
C4NUM
*
sizeof
(
float
));
bias_data_
=
malloc
(
matmul_param_
->
col_
8_
*
sizeof
(
float
));
if
(
bias_data_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Conv1x1 Malloc bias_ptr_ error!"
;
return
RET_ERROR
;
}
memset
(
bias_data_
,
0
,
matmul_param_
->
col_
*
C4NUM
*
sizeof
(
float
));
memset
(
bias_data_
,
0
,
matmul_param_
->
col_
8_
*
sizeof
(
float
));
memcpy
(
bias_data_
,
inputs_
[
2
]
->
Data
(),
conv_param_
->
output_channel_
*
sizeof
(
float
));
}
else
{
bias_data_
=
nullptr
;
}
weight_ptr_
=
reinterpret_cast
<
float
*>
(
malloc
(
matmul_param_
->
col_
*
matmul_param_
->
deep_
*
FP32_STRASSEN_WEIGHT_UINT
*
sizeof
(
float
)));
weight_ptr_
=
reinterpret_cast
<
float
*>
(
malloc
(
matmul_param_
->
row_8_
*
matmul_param_
->
col_8_
*
sizeof
(
float
)));
if
(
weight_ptr_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Conv1x1 Malloc weight_ptr_ error!"
;
return
RET_ERROR
;
}
memset
(
weight_ptr_
,
0
,
matmul_param_
->
col_
*
matmul_param_
->
deep_
*
FP32_STRASSEN_WEIGHT_UINT
*
sizeof
(
float
));
Pack1x1WeightFp32
(
reinterpret_cast
<
float
*>
(
inputs_
[
1
]
->
Data
()),
weight_ptr_
,
conv_param_
);
memset
(
weight_ptr_
,
0
,
matmul_param_
->
row_8_
*
matmul_param_
->
col_8_
*
sizeof
(
float
));
RowMajor2Col8Major
(
reinterpret_cast
<
float
*>
(
inputs_
[
1
]
->
Data
()),
weight_ptr_
,
matmul_param_
->
col_
,
matmul_param_
->
deep_
);
return
RET_OK
;
}
...
...
@@ -86,52 +95,43 @@ int Convolution1x1CPUKernel::InitConv1x1Param() {
pre_trans_input_
=
(
conv_param_
->
pad_h_
!=
0
||
conv_param_
->
pad_w_
!=
0
||
conv_param_
->
stride_h_
!=
1
||
conv_param_
->
stride_w_
!=
1
);
if
(
pre_trans_input_
)
{
input_ptr_
=
reinterpret_cast
<
float
*>
(
malloc
(
matmul_param_
->
a_stride
_
*
matmul_param_
->
deep_
*
sizeof
(
float
)));
input_ptr_
=
reinterpret_cast
<
float
*>
(
malloc
(
matmul_param_
->
row
_
*
matmul_param_
->
deep_
*
sizeof
(
float
)));
if
(
input_ptr_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Conv1x1 Malloc input_ptr_ error!"
;
return
RET_MEMORY_FAILED
;
}
memset
(
input_ptr_
,
0
,
matmul_param_
->
a_stride
_
*
matmul_param_
->
deep_
*
sizeof
(
float
));
memset
(
input_ptr_
,
0
,
matmul_param_
->
row
_
*
matmul_param_
->
deep_
*
sizeof
(
float
));
}
thread_hw_count_
=
MSMIN
(
opParameter
->
thread_num_
,
matmul_param_
->
row_
);
thread_hw_stride_
=
UP_DIV
(
matmul_param_
->
row_
,
thread_hw_count_
);
thread_oc4_count_
=
MSMIN
(
opParameter
->
thread_num_
,
matmul_param_
->
col_
);
thread_oc_stride_
=
UP_DIV
(
matmul_param_
->
col_
,
thread_oc4_count_
)
*
C4NUM
;
thread_count_
=
MSMIN
(
opParameter
->
thread_num_
,
UP_DIV
(
matmul_param_
->
col_
,
C8NUM
));
thread_stride_
=
UP_DIV
(
UP_DIV
(
matmul_param_
->
col_
,
C8NUM
),
thread_count_
)
*
C8NUM
;
tmp_ptr_
=
reinterpret_cast
<
float
*>
(
malloc
(
matmul_param_
->
a_stride_
*
matmul_param_
->
deep_
*
sizeof
(
float
)));
if
(
tmp_ptr_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Conv1x1 Malloc tmp_ptr_ error!"
;
return
RET_MEMORY_FAILED
;
}
c4_output_
=
reinterpret_cast
<
float
*>
(
malloc
(
outputs_
[
0
]
->
ElementsC4Num
()
/
conv_param_
->
output_batch_
*
sizeof
(
float
)));
if
(
c4_output_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Conv1x1 Malloc c4_output_ error!"
;
pack_input_
=
reinterpret_cast
<
float
*>
(
malloc
(
matmul_param_
->
row_8_
*
matmul_param_
->
deep_
*
sizeof
(
float
)));
if
(
pack_input_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Conv1x1 Malloc pack_input_ error!"
;
return
RET_MEMORY_FAILED
;
}
memset
(
pack_input_
,
0
,
matmul_param_
->
row_8_
*
matmul_param_
->
deep_
*
sizeof
(
float
));
c4_input_
=
reinterpret_cast
<
float
*>
(
malloc
(
inputs_
[
0
]
->
ElementsC4Num
()
/
conv_param_
->
input_batch_
*
sizeof
(
float
)));
if
(
c4_input_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Conv1x1 Malloc c4_input_ error!"
;
pack_output_
=
reinterpret_cast
<
float
*>
(
malloc
(
matmul_param_
->
row_8_
*
matmul_param_
->
col_8_
*
sizeof
(
float
)));
if
(
pack_output_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Conv1x1 Malloc pack_output_ error!"
;
return
RET_MEMORY_FAILED
;
}
memset
(
pack_output_
,
0
,
matmul_param_
->
row_8_
*
matmul_param_
->
col_8_
*
sizeof
(
float
));
return
RET_OK
;
}
void
Convolution1x1CPUKernel
::
Pre1x1Trans
(
float
*
src_input
,
float
*
src_output
)
{
output_ptr_
=
src_output
;
PackNHWCToNC4HW4Fp32
(
src_input
,
c4_input_
,
1
,
conv_param_
->
input_h_
*
conv_param_
->
input_w_
,
conv_param_
->
input_channel_
);
if
(
!
pre_trans_input_
)
{
input_ptr_
=
c4_input_
;
return
;
if
(
pre_trans_input_
)
{
Conv1x1InputPackFp32
(
src_input
,
input_ptr_
,
conv_param_
);
}
else
{
input_ptr_
=
src_input
;
}
Conv1x1InputPackFp32
(
c4_input_
,
input_ptr_
,
conv_param
_
);
RowMajor2Col8Major
(
input_ptr_
,
pack_input_
,
matmul_param_
->
row_
,
matmul_param_
->
deep
_
);
return
;
}
...
...
@@ -152,53 +152,26 @@ int Convolution1x1CPUKernel::Init() {
return
RET_OK
;
}
int
Convolution1x1CPUKernel
::
DoStrassen
(
int
task_id
)
{
matmul_param_
->
row_
=
MSMIN
(
thread_hw_stride_
,
matmul_param_
->
row_
-
task_id
*
thread_hw_stride_
);
if
(
matmul_param_
->
row_
<=
0
)
{
return
RET_OK
;
}
auto
error_code
=
Conv1x1Fp32
(
input_ptr_
+
task_id
*
thread_hw_stride_
*
C4NUM
,
weight_ptr_
,
c4_output_
+
task_id
*
thread_hw_stride_
*
C4NUM
,
tmp_ptr_
+
task_id
*
thread_hw_stride_
*
matmul_param_
->
deep_
*
C4NUM
,
*
matmul_param_
);
if
(
error_code
!=
0
)
{
MS_LOG
(
ERROR
)
<<
"DoStrassen error task_id["
<<
task_id
<<
"] error_code["
<<
error_code
<<
"]"
;
return
RET_ERROR
;
}
matmul_param_
->
row_
=
conv_param_
->
output_h_
*
conv_param_
->
output_w_
;
return
RET_OK
;
}
int
Convolution1x1StrassenRun
(
int
task_id
,
LiteParallelGroupEnv
*
penv
,
void
*
cdata
)
{
auto
conv1x1
=
reinterpret_cast
<
Convolution1x1CPUKernel
*>
(
cdata
);
auto
error_code
=
conv1x1
->
DoStrassen
(
task_id
);
if
(
error_code
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Convolution1x1StrassenRun error task_id["
<<
task_id
<<
"] error_code["
<<
error_code
<<
"]"
;
return
RET_ERROR
;
}
return
RET_OK
;
}
int
Convolution1x1CPUKernel
::
DoPostFunc
(
int
task_id
)
{
int
cur_oc
=
MSMIN
(
thread_oc_stride_
,
conv_param_
->
output_channel_
-
task_id
*
thread_oc_stride_
);
int
Convolution1x1CPUKernel
::
DoConv1x1
(
int
task_id
)
{
int
cur_oc
=
MSMIN
(
thread_stride_
,
matmul_param_
->
col_8_
-
task_id
*
thread_stride_
);
if
(
cur_oc
<=
0
)
{
return
RET_OK
;
}
float
*
cur_bias
=
(
bias_data_
==
nullptr
)
?
nullptr
:
reinterpret_cast
<
float
*>
(
bias_data_
)
+
task_id
*
thread_oc_stride_
;
auto
bias
=
(
bias_data_
==
nullptr
)
?
nullptr
:
reinterpret_cast
<
float
*>
(
bias_data_
)
+
thread_stride_
*
task_id
;
MatMul
(
pack_input_
,
weight_ptr_
+
task_id
*
thread_stride_
*
matmul_param_
->
deep_
,
pack_output_
+
task_id
*
thread_stride_
*
matmul_param_
->
row_8_
,
bias
,
matmul_param_
->
act_type_
,
matmul_param_
->
deep_
,
matmul_param_
->
row_8_
,
cur_oc
);
PostConvFuncFp32
(
c4_output_
+
matmul_param_
->
row_
*
thread_oc_stride_
*
task_id
,
output_ptr_
+
task_id
*
thread_oc_stride_
,
cur_bias
,
cur_oc
,
matmul_param_
->
row_
,
conv_param_
->
output_channel_
,
conv_param_
->
is_relu_
,
conv_param_
->
is_relu6_
);
return
RET_OK
;
}
int
Convolution1x1
PostFunc
Run
(
int
task_id
,
LiteParallelGroupEnv
*
penv
,
void
*
cdata
)
{
int
Convolution1x1Run
(
int
task_id
,
LiteParallelGroupEnv
*
penv
,
void
*
cdata
)
{
auto
conv1x1
=
reinterpret_cast
<
Convolution1x1CPUKernel
*>
(
cdata
);
auto
error_code
=
conv1x1
->
Do
PostFunc
(
task_id
);
auto
error_code
=
conv1x1
->
Do
Conv1x1
(
task_id
);
if
(
error_code
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Convolution1x1
PostFunc
Run error task_id["
<<
task_id
<<
"] error_code["
<<
error_code
<<
"]"
;
MS_LOG
(
ERROR
)
<<
"Convolution1x1Run error task_id["
<<
task_id
<<
"] error_code["
<<
error_code
<<
"]"
;
return
RET_ERROR
;
}
return
RET_OK
;
...
...
@@ -209,20 +182,16 @@ int Convolution1x1CPUKernel::Run() {
auto
src_out
=
reinterpret_cast
<
float
*>
(
outputs_
[
0
]
->
Data
());
for
(
int
batch_index
=
0
;
batch_index
<
conv_param_
->
input_batch_
;
batch_index
++
)
{
Pre1x1Trans
(
src_in
+
batch_index
*
matmul_param_
->
deep_
*
matmul_param_
->
a_stride
_
,
src_out
+
batch_index
*
matmul_param_
->
col_
*
matmul_param_
->
c_stride
_
);
Pre1x1Trans
(
src_in
+
batch_index
*
conv_param_
->
input_h_
*
conv_param_
->
input_w_
*
conv_param_
->
input_channel
_
,
src_out
+
batch_index
*
matmul_param_
->
row_
*
matmul_param_
->
col
_
);
int
error_code
=
LiteBackendParallelLaunch
(
Convolution1x1
StrassenRun
,
this
,
thread_hw
_count_
);
int
error_code
=
LiteBackendParallelLaunch
(
Convolution1x1
Run
,
this
,
thread
_count_
);
if
(
error_code
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"conv1x1 strassen error error_code["
<<
error_code
<<
"]"
;
return
RET_ERROR
;
}
error_code
=
LiteBackendParallelLaunch
(
Convolution1x1PostFuncRun
,
this
,
thread_oc4_count_
);
if
(
error_code
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"conv1x1 post function error error_code["
<<
error_code
<<
"]"
;
return
RET_ERROR
;
}
Row8x8Major2RowMajor
(
pack_output_
,
output_ptr_
,
matmul_param_
->
row_
,
matmul_param_
->
col_
);
}
return
RET_OK
;
}
...
...
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.h
浏览文件 @
8beb1b0f
...
...
@@ -17,6 +17,7 @@
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_1X1_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_1X1_H_
#include <float.h>
#include <vector>
#include "src/lite_kernel.h"
#include "include/errorcode.h"
...
...
@@ -26,21 +27,24 @@
#include "src/runtime/kernel/arm/base/layout_transform.h"
#include "src/runtime/kernel/arm/opclib/fp32/conv.h"
#include "src/runtime/kernel/arm/opclib/fp32/common_func.h"
#include "src/runtime/kernel/arm/opclib/matmul.h"
#include "src/runtime/kernel/arm/opclib/fp32/matmul.h"
namespace
mindspore
::
kernel
{
class
Convolution1x1CPUKernel
:
public
ConvolutionBaseCPUKernel
{
public:
Convolution1x1CPUKernel
(
OpParameter
*
parameter
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
,
const
Context
*
ctx
)
:
ConvolutionBaseCPUKernel
(
parameter
,
inputs
,
outputs
,
ctx
)
{}
:
ConvolutionBaseCPUKernel
(
parameter
,
inputs
,
outputs
,
ctx
)
{
matmul_param_
=
new
MatMulParameter
();
}
~
Convolution1x1CPUKernel
();
int
Init
()
override
;
int
Run
()
override
;
int
ReSize
()
override
;
public:
int
DoStrassen
(
int
task_id
);
int
DoPostFunc
(
int
task_id
);
int
DoConv1x1
(
int
task_id
);
private:
int
InitConv1x1Param
();
...
...
@@ -49,20 +53,15 @@ class Convolution1x1CPUKernel : public ConvolutionBaseCPUKernel {
void
Pre1x1Trans
(
float
*
src_input
,
float
*
src_output
);
private:
Strassen
MatMulParameter
*
matmul_param_
=
nullptr
;
MatMulParameter
*
matmul_param_
=
nullptr
;
bool
pre_trans_input_
=
false
;
int
thread_count_
=
0
;
int
thread_hw_count_
=
0
;
int
thread_hw_stride_
=
0
;
int
thread_oc4_count_
=
0
;
int
thread_oc_stride_
=
0
;
int
thread_stride_
=
0
;
float
*
weight_ptr_
=
nullptr
;
float
*
tmp_ptr_
=
nullptr
;
float
*
c4_input_
=
nullptr
;
float
*
c4_output_
=
nullptr
;
float
*
pack_input_
=
nullptr
;
float
*
pack_output_
=
nullptr
;
float
*
input_ptr_
=
nullptr
;
float
*
output_ptr_
=
nullptr
;
};
}
// namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_1X1_H_
mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.cc
浏览文件 @
8beb1b0f
...
...
@@ -30,27 +30,38 @@ DeConvolutionCPUKernel::~DeConvolutionCPUKernel() {
free
(
weight_ptr_
);
weight_ptr_
=
nullptr
;
}
if
(
tmp_output_
!=
nullptr
)
{
free
(
tmp_output_
);
tmp_output_
=
nullptr
;
}
if
(
tmp_buffer_
!=
nullptr
)
{
free
(
tmp_buffer_
);
tmp_buffer_
=
nullptr
;
}
if
(
c4
_input_
!=
nullptr
)
{
free
(
c4
_input_
);
c4
_input_
=
nullptr
;
if
(
pack
_input_
!=
nullptr
)
{
free
(
pack
_input_
);
pack
_input_
=
nullptr
;
}
if
(
c4
_output_
!=
nullptr
)
{
free
(
c4
_output_
);
c4
_output_
=
nullptr
;
if
(
pack
_output_
!=
nullptr
)
{
free
(
pack
_output_
);
pack
_output_
=
nullptr
;
}
return
;
}
int
DeConvolutionCPUKernel
::
ReSize
()
{
return
0
;
}
int
DeConvolutionCPUKernel
::
ReSize
()
{
if
(
tmp_buffer_
!=
nullptr
)
{
free
(
tmp_buffer_
);
tmp_buffer_
=
nullptr
;
}
if
(
pack_input_
!=
nullptr
)
{
free
(
pack_input_
);
pack_input_
=
nullptr
;
}
if
(
pack_output_
!=
nullptr
)
{
free
(
pack_output_
);
pack_output_
=
nullptr
;
}
InitParam
();
return
RET_OK
;
}
int
DeConvolutionCPUKernel
::
InitWeightBias
()
{
if
(
inputs_
.
size
()
==
3
)
{
...
...
@@ -65,60 +76,50 @@ int DeConvolutionCPUKernel::InitWeightBias() {
bias_data_
=
nullptr
;
}
size_t
weight_pack_size
=
conv_param_
->
kernel_w_
*
conv_param_
->
kernel_h_
*
UP_ROUND
(
conv_param_
->
output_channel_
,
C4NUM
)
*
UP_ROUND
(
conv_param_
->
input_channel_
,
C4NUM
)
*
sizeof
(
float
);
size_t
weight_pack_size
=
conv_param_
->
input_channel_
*
conv_param_
->
kernel_w_
*
conv_param_
->
kernel_h_
*
UP_ROUND
(
conv_param_
->
output_channel_
,
C8NUM
)
*
sizeof
(
float
);
weight_ptr_
=
reinterpret_cast
<
float
*>
(
malloc
(
weight_pack_size
));
if
(
weight_ptr_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"deconv malloc weight_ptr_ error!"
;
return
RET_ERROR
;
}
memset
(
weight_ptr_
,
0
,
weight_pack_size
);
Pack
DeConvWeight
Fp32
(
reinterpret_cast
<
float
*>
(
inputs_
[
1
]
->
Data
()),
weight_ptr_
,
conv_param_
->
input_channel_
,
conv_param_
->
output_channel_
,
conv_param_
->
kernel_w_
*
conv_param_
->
kernel_h
_
);
Pack
NHWCToC8HWN8
Fp32
(
reinterpret_cast
<
float
*>
(
inputs_
[
1
]
->
Data
()),
weight_ptr_
,
conv_param_
->
input_channel_
,
kernel_plane_
,
conv_param_
->
output_channel
_
);
return
RET_OK
;
}
int
DeConvolutionCPUKernel
::
InitParam
()
{
matmul_param_
=
new
StrassenMatMulParameter
();
matmul_param_
->
row_
=
conv_param_
->
input_h_
*
conv_param_
->
input_w_
;
matmul_param_
->
deep_
=
UP_DIV
(
conv_param_
->
input_channel_
,
C4NUM
);
matmul_param_
->
col_
=
UP_DIV
(
conv_param_
->
output_channel_
,
4
)
*
conv_param_
->
kernel_w_
*
conv_param_
->
kernel_h_
;
matmul_param_
->
a_stride_
=
matmul_param_
->
row_
*
C4NUM
;
matmul_param_
->
b_stride_
=
matmul_param_
->
deep_
*
C4NUM
*
C4NUM
;
matmul_param_
->
c_stride_
=
matmul_param_
->
row_
*
C4NUM
;
thread_hw_count_
=
MSMIN
(
opParameter
->
thread_num_
,
matmul_param_
->
row_
);
thread_hw_stride_
=
UP_DIV
(
matmul_param_
->
row_
,
thread_hw_count_
);
thread_co4_count_
=
MSMIN
(
opParameter
->
thread_num_
,
UP_DIV
(
conv_param_
->
output_channel_
,
C4NUM
));
thread_co_stride_
=
UP_DIV
(
UP_DIV
(
conv_param_
->
output_channel_
,
C4NUM
),
thread_co4_count_
)
*
C4NUM
;
tmp_buffer_
=
reinterpret_cast
<
float
*>
(
malloc
(
matmul_param_
->
a_stride_
*
matmul_param_
->
deep_
*
C4NUM
*
sizeof
(
float
)));
if
(
tmp_buffer_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Conv1x1 Malloc tmp_buffer_ error!"
;
input_plane_
=
conv_param_
->
input_h_
*
conv_param_
->
input_w_
;
kernel_plane_
=
conv_param_
->
kernel_w_
*
conv_param_
->
kernel_h_
;
output_plane_
=
conv_param_
->
output_h_
*
conv_param_
->
output_w_
;
matmul_param_
->
row_
=
input_plane_
;
matmul_param_
->
deep_
=
conv_param_
->
input_channel_
;
matmul_param_
->
col_
=
conv_param_
->
output_channel_
*
kernel_plane_
;
matmul_param_
->
row_8_
=
UP_ROUND
(
matmul_param_
->
row_
,
C8NUM
);
matmul_param_
->
col_8_
=
UP_ROUND
(
conv_param_
->
output_channel_
,
C8NUM
)
*
kernel_plane_
;
thread_count_
=
MSMIN
(
opParameter
->
thread_num_
,
UP_DIV
(
conv_param_
->
output_channel_
,
C8NUM
));
thread_stride_
=
UP_DIV
(
UP_DIV
(
conv_param_
->
output_channel_
,
C8NUM
),
thread_count_
);
pack_input_
=
reinterpret_cast
<
float
*>
(
malloc
(
matmul_param_
->
row_8_
*
matmul_param_
->
deep_
*
sizeof
(
float
)));
if
(
pack_input_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"deconv Malloc pack_input_ error!"
;
return
RET_ERROR
;
}
tmp_output_
=
reinterpret_cast
<
float
*>
(
malloc
(
matmul_param_
->
row_
*
matmul_param_
->
col_
*
C4NUM
*
sizeof
(
float
)));
if
(
tmp_output_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Conv1x1 Malloc tmp_output_ error!"
;
return
RET_ERROR
;
}
c4_input_
=
reinterpret_cast
<
float
*>
(
malloc
(
inputs_
[
0
]
->
ElementsC4Num
()
/
conv_param_
->
input_batch_
*
sizeof
(
float
)));
if
(
c4_input_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Conv1x1 Malloc c4_input_ error!"
;
pack_output_
=
reinterpret_cast
<
float
*>
(
malloc
(
UP_ROUND
(
conv_param_
->
output_channel_
,
C8NUM
)
*
output_plane_
*
sizeof
(
float
)));
if
(
pack_output_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"deconv Malloc pack_output_ error!"
;
return
RET_NULL_PTR
;
}
c4_output_
=
reinterpret_cast
<
float
*>
(
malloc
(
outputs_
[
0
]
->
ElementsC4Num
()
/
conv_param_
->
output_batch_
*
sizeof
(
float
)));
if
(
c4_output_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Conv1x1 Malloc c4_output_ error!"
;
return
RET_NULL_PTR
;
tmp_buffer_
=
reinterpret_cast
<
float
*>
(
malloc
(
matmul_param_
->
row_8_
*
matmul_param_
->
col_8_
*
sizeof
(
float
)));
if
(
tmp_buffer_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Conv1x1 Malloc tmp_buffer_ error!"
;
return
RET_ERROR
;
}
return
RET_OK
;
}
...
...
@@ -132,6 +133,7 @@ int DeConvFp32Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
}
return
RET_OK
;
}
int
DeConvFp32PostRun
(
int
task_id
,
LiteParallelGroupEnv
*
penv
,
void
*
cdata
)
{
auto
deconv
=
reinterpret_cast
<
DeConvolutionCPUKernel
*>
(
cdata
);
auto
error_code
=
deconv
->
DoPostFunc
(
task_id
);
...
...
@@ -141,51 +143,39 @@ int DeConvFp32PostRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
}
return
RET_OK
;
}
int
DeConvolutionCPUKernel
::
DoDeconv
(
int
task_id
)
{
matmul_param_
->
row_
=
MSMIN
(
thread_hw_stride_
,
matmul_param_
->
row_
-
task_id
*
thread_hw
_stride_
);
if
(
matmul_param_
->
row_
<=
0
)
{
int
oc
=
MSMIN
(
thread_stride_
,
UP_DIV
(
conv_param_
->
output_channel_
,
C8NUM
)
-
task_id
*
thread
_stride_
);
if
(
oc
<=
0
)
{
return
RET_OK
;
}
int
error_code
=
DeConvFp32
(
c4_input_
+
task_id
*
thread_hw_stride_
*
C4NUM
,
weight_ptr_
,
tmp_output_
+
task_id
*
thread_hw_stride_
*
C4NUM
,
tmp_buffer_
+
task_id
*
thread_hw_stride_
*
matmul_param_
->
deep_
*
C4NUM
,
*
matmul_param_
);
if
(
error_code
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"DeConvFp32 error! error code: "
<<
error_code
;
return
error_code
;
}
MatMul
(
pack_input_
,
weight_ptr_
+
task_id
*
thread_stride_
*
C8NUM
*
kernel_plane_
*
matmul_param_
->
deep_
,
tmp_buffer_
+
task_id
*
thread_stride_
*
C8NUM
*
kernel_plane_
*
matmul_param_
->
row_8_
,
nullptr
,
ActType_No
,
matmul_param_
->
deep_
,
matmul_param_
->
row_8_
,
oc
*
C8NUM
*
kernel_plane_
);
matmul_param_
->
row_
=
conv_param_
->
input_h_
*
conv_param_
->
input_w_
;
return
RET_OK
;
}
int
DeConvolutionCPUKernel
::
DoPostFunc
(
int
task_id
)
{
int
input_plane
=
conv_param_
->
input_h_
*
conv_param_
->
input_w_
;
int
kernel_plane
=
conv_param_
->
kernel_w_
*
conv_param_
->
kernel_h_
;
int
output_plane
=
conv_param_
->
output_h_
*
conv_param_
->
output_w_
;
int
cur_oc
=
MSMIN
(
thread_co_stride_
,
conv_param_
->
output_channel_
-
task_id
*
thread_co_stride_
);
if
(
cur_oc
<=
0
)
{
int
oc
=
MSMIN
(
thread_stride_
*
C8NUM
,
conv_param_
->
output_channel_
-
task_id
*
thread_stride_
*
C8NUM
);
if
(
oc
<=
0
)
{
return
RET_OK
;
}
float
*
cur_
bias
=
(
bias_data_
==
nullptr
)
?
nullptr
:
reinterpret_cast
<
float
*>
(
bias_data_
)
+
thread_
co_stride_
*
task_id
;
float
*
bias
=
(
bias_data_
==
nullptr
)
?
nullptr
:
reinterpret_cast
<
float
*>
(
bias_data_
)
+
thread_
stride_
*
task_id
*
C8NUM
;
DeConvPostFp32
(
tmp_output_
+
thread_co_stride_
*
task_id
*
input_plane
*
kernel_plane
,
c4_output_
+
thread_co_stride_
*
task_id
*
output_plane
,
output_ptr_
+
thread_co_stride_
*
task_id
,
cur_bias
,
cur_oc
,
input_plane
,
kernel_plane
,
output_plane
,
conv_param_
);
DeConvPostFp32
C8x8
(
tmp_buffer_
+
task_id
*
thread_stride_
*
C8NUM
*
kernel_plane_
*
matmul_param_
->
row_8_
,
pack_output_
+
task_id
*
thread_stride_
*
C8NUM
*
output_plane_
,
bias
,
output_ptr_
+
task_id
*
thread_stride_
*
C8NUM
,
oc
,
conv_param_
);
return
RET_OK
;
}
int
DeConvolutionCPUKernel
::
Init
()
{
int
error_code
=
ConvolutionBaseCPUKernel
::
Init
();
if
(
error_code
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Conv base init error!"
;
return
error_code
;
}
ConvolutionBaseCPUKernel
::
Init
();
error_code
=
InitParam
();
int
error_code
=
InitParam
();
if
(
error_code
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"deconv InitParam error!"
;
return
error_code
;
...
...
@@ -204,20 +194,18 @@ int DeConvolutionCPUKernel::Run() {
float
*
src_out
=
reinterpret_cast
<
float
*>
(
outputs_
[
0
]
->
Data
());
for
(
int
batch_index
=
0
;
batch_index
<
conv_param_
->
input_batch_
;
batch_index
++
)
{
input_ptr_
=
src_in
+
batch_index
*
conv_param_
->
input_w_
*
conv_param_
->
input_h_
*
conv_param_
->
input_channel_
;
output_ptr_
=
src_out
+
batch_index
*
conv_param_
->
output_h_
*
conv_param_
->
output_w_
*
conv_param_
->
output_channel_
;
input_ptr_
=
src_in
+
batch_index
*
input_plane_
*
conv_param_
->
input_channel_
;
output_ptr_
=
src_out
+
batch_index
*
output_plane_
*
conv_param_
->
output_channel_
;
PackNHWCToNC4HW4Fp32
(
input_ptr_
,
c4_input_
,
1
,
conv_param_
->
input_h_
*
conv_param_
->
input_w_
,
conv_param_
->
input_channel_
);
RowMajor2Col8Major
(
input_ptr_
,
pack_input_
,
input_plane_
,
conv_param_
->
input_channel_
);
int
error_code
=
LiteBackendParallelLaunch
(
DeConvFp32Run
,
this
,
thread_
hw_
count_
);
int
error_code
=
LiteBackendParallelLaunch
(
DeConvFp32Run
,
this
,
thread_count_
);
if
(
error_code
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"deconv fp32 run error! error_code["
<<
error_code
<<
"]"
;
return
RET_ERROR
;
}
error_code
=
LiteBackendParallelLaunch
(
DeConvFp32PostRun
,
this
,
thread_co
4_co
unt_
);
error_code
=
LiteBackendParallelLaunch
(
DeConvFp32PostRun
,
this
,
thread_count_
);
if
(
error_code
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"deconv fp32 postrun error! error_code["
<<
error_code
<<
"]"
;
return
RET_ERROR
;
...
...
mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.h
浏览文件 @
8beb1b0f
...
...
@@ -17,6 +17,7 @@
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DECONVOLUTION_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DECONVOLUTION_H_
#include <float.h>
#include <vector>
#include "src/lite_kernel.h"
#include "src/kernel_registry.h"
...
...
@@ -24,13 +25,16 @@
#include "schema/model_generated.h"
#include "src/runtime/kernel/arm/base/convolution_base.h"
#include "src/runtime/kernel/arm/opclib/fp32/deconv.h"
#include "src/runtime/kernel/arm/opclib/fp32/matmul.h"
namespace
mindspore
::
kernel
{
class
DeConvolutionCPUKernel
:
public
ConvolutionBaseCPUKernel
{
public:
DeConvolutionCPUKernel
(
OpParameter
*
parameter
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
,
const
Context
*
ctx
)
:
ConvolutionBaseCPUKernel
(
parameter
,
inputs
,
outputs
,
ctx
)
{}
:
ConvolutionBaseCPUKernel
(
parameter
,
inputs
,
outputs
,
ctx
)
{
matmul_param_
=
new
MatMulParameter
();
}
~
DeConvolutionCPUKernel
()
override
;
int
Init
()
override
;
int
Run
()
override
;
...
...
@@ -45,19 +49,18 @@ class DeConvolutionCPUKernel : public ConvolutionBaseCPUKernel {
int
InitWeightBias
();
private:
StrassenMatMulParameter
*
matmul_param_
;
int
thread_hw_count_
;
int
thread_hw_stride_
;
int
thread_co4_count_
;
int
thread_co_stride_
;
MatMulParameter
*
matmul_param_
;
int
input_plane_
;
int
kernel_plane_
;
int
output_plane_
;
int
thread_count_
;
int
thread_stride_
;
float
*
weight_ptr_
;
float
*
pack_input_
;
float
*
pack_output_
;
float
*
tmp_buffer_
;
float
*
tmp_output_
;
float
*
c4_input_
;
float
*
c4_output_
;
float
*
input_ptr_
;
float
*
output_ptr_
;
};
}
// namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DECONVOLUTION_H_
mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection.cc
浏览文件 @
8beb1b0f
...
...
@@ -99,8 +99,8 @@ int FullconnectionCPUKernel::DoMatmul(int task_id) {
MatMul
(
a_c8_ptr_
,
b_r8_ptr_
+
task_id
*
thread_stride_
*
C8NUM
*
fc_param_
->
deep_
,
c_r8x8_ptr_
+
task_id
*
thread_stride_
*
C8NUM
*
fc_param_
->
row_8_
,
bias_ptr_
+
task_id
*
thread_stride_
*
C8NUM
,
fc_param_
->
maxf_
,
fc_param_
->
minf_
,
fc_param_
->
deep
_
,
fc_param_
->
row_8_
,
cur_oc
*
8
);
bias_ptr_
+
task_id
*
thread_stride_
*
C8NUM
,
fc_param_
->
act_type_
,
fc_param_
->
deep_
,
fc_param_
->
row_8
_
,
cur_oc
*
8
);
return
RET_OK
;
}
...
...
mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.cc
浏览文件 @
8beb1b0f
...
...
@@ -82,9 +82,9 @@ int FullconnectionInt8CPUKernel::Init() {
double
real_multiplier
=
quant_params_
.
input
.
scale_
*
quant_params_
.
weight
.
scale_
/
quant_params_
.
output
.
scale_
;
QuantizeRoundParameter
(
real_multiplier
,
&
quant_params_
.
quant_multiplier
,
&
quant_params_
.
left_shift
,
&
quant_params_
.
right_shift
);
CalculateActivationRangeQuantized
(
fc_param_
->
maxf_
,
fc_param_
->
minf_
,
quant_params_
.
output
.
scale_
,
quant_params_
.
output
.
zp_
,
&
quant_params_
.
out_act_max
,
&
quant_params_
.
out_act_min
);
CalculateActivationRangeQuantized
(
fc_param_
->
act_type_
==
ActType_Relu
,
fc_param_
->
act_type_
==
ActType_Relu6
,
quant_params_
.
output
.
zp_
,
quant_params_
.
output
.
scale_
,
&
quant_params_
.
out_act_max
,
&
quant_params_
.
out_act_min
);
return
RET_OK
;
}
...
...
mindspore/lite/src/runtime/kernel/arm/opclib/fp32/common_func.cc
浏览文件 @
8beb1b0f
...
...
@@ -63,23 +63,29 @@ void MatrixMultiAdd(float *c11, float *c12, float *c21, float *c22, float *x_ptr
return
;
}
void
PostConvFuncFp32
(
const
float
*
c4_out_ptr
,
float
*
out_ptr
,
const
float
*
bias_ptr
,
size_t
output_channel
,
size_t
plane_size
,
size_t
stride
,
bool
is_relu
,
bool
is_relu6
)
{
#ifndef ENABLE_ARM64
void
PostConvFuncComm
(
const
float
*
src_ptr_
,
float
*
out_ptr
,
const
float
*
bias_ptr
,
size_t
output_channel
,
size_t
plane_size
,
size_t
stride
,
bool
is_relu
,
bool
is_relu6
,
int
size
)
{
for
(
int
oc
=
0
;
oc
<
output_channel
;
oc
++
)
{
int
oc
4div
=
oc
/
4
,
oc4mod
=
oc
%
4
;
int
oc
_div
=
oc
/
size
,
oc_mod
=
oc
%
size
;
for
(
int
hw
=
0
;
hw
<
plane_size
;
hw
++
)
{
int
src_index
=
oc
4div
*
4
*
plane_size
+
hw
*
4
+
oc4
mod
;
int
src_index
=
oc
_div
*
size
*
plane_size
+
hw
*
size
+
oc_
mod
;
int
dst_index
=
hw
*
stride
+
oc
;
float
value
=
c4_out_ptr
[
src_index
];
float
value
=
src_ptr_
[
src_index
];
if
(
bias_ptr
!=
nullptr
)
{
value
=
value
+
bias_ptr
[
oc
];
}
value
=
(
is_relu
)
?
(
MSMAX
(
0
,
value
))
:
(
value
);
value
=
(
is_relu6
)
?
(
MSMIN
(
6
,
MSMAX
(
0
,
value
)
))
:
(
value
);
value
=
(
is_relu
||
is_relu6
)
?
(
MSMAX
(
0.
f
,
value
))
:
(
value
);
value
=
(
is_relu6
)
?
(
MSMIN
(
6
.
f
,
value
))
:
(
value
);
out_ptr
[
dst_index
]
=
value
;
}
}
return
;
}
void
PostConvFuncFp32C4
(
const
float
*
c4_out_ptr
,
float
*
out_ptr
,
const
float
*
bias_ptr
,
size_t
output_channel
,
size_t
plane_size
,
size_t
stride
,
bool
is_relu
,
bool
is_relu6
)
{
#ifndef ENABLE_ARM64
PostConvFuncComm
(
c4_out_ptr
,
out_ptr
,
bias_ptr
,
output_channel
,
plane_size
,
stride
,
is_relu
,
is_relu6
,
C4NUM
);
#else
if
(
bias_ptr
!=
nullptr
)
{
if
(
is_relu
)
{
...
...
@@ -102,3 +108,8 @@ void PostConvFuncFp32(const float *c4_out_ptr, float *out_ptr, const float *bias
return
;
}
void
PostConvFuncFp32C8
(
const
float
*
c8_out_ptr
,
float
*
out_ptr
,
const
float
*
bias_ptr
,
size_t
output_channel
,
size_t
plane_size
,
size_t
stride
,
bool
is_relu
,
bool
is_relu6
)
{
PostConvFuncComm
(
c8_out_ptr
,
out_ptr
,
bias_ptr
,
output_channel
,
plane_size
,
stride
,
is_relu
,
is_relu6
,
C8NUM
);
return
;
}
mindspore/lite/src/runtime/kernel/arm/opclib/fp32/common_func.h
浏览文件 @
8beb1b0f
...
...
@@ -27,8 +27,10 @@
extern
"C"
{
#endif
void
PostConvFuncFp32
(
const
float
*
c4_out_ptr
,
float
*
out_ptr
,
const
float
*
bias_ptr
,
size_t
output_channel
,
size_t
plane_size
,
size_t
stride
,
bool
is_relu
,
bool
is_relu6
);
void
PostConvFuncFp32C4
(
const
float
*
c4_out_ptr
,
float
*
out_ptr
,
const
float
*
bias_ptr
,
size_t
output_channel
,
size_t
plane_size
,
size_t
stride
,
bool
is_relu
,
bool
is_relu6
);
void
PostConvFuncFp32C8
(
const
float
*
c8_out_ptr
,
float
*
out_ptr
,
const
float
*
bias_ptr
,
size_t
output_channel
,
size_t
plane_size
,
size_t
stride
,
bool
is_relu
,
bool
is_relu6
);
void
MatrixAdd
(
const
float
*
a_ptr
,
const
float
*
b_ptr
,
float
*
dst
,
size_t
a_stride
,
size_t
b_stride
,
size_t
c_stride
,
size_t
row
,
size_t
col
);
void
MatrixSub
(
const
float
*
a_ptr
,
const
float
*
b_ptr
,
float
*
dst
,
size_t
a_stride
,
size_t
b_stride
,
size_t
c_stride
,
...
...
@@ -60,4 +62,3 @@ void DeconvDwFp32Center(float *dst, const float *src, const float *weight, size_
#endif
#endif
/* MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_COMMON_FUNC_H_ */
mindspore/lite/src/runtime/kernel/arm/opclib/fp32/deconv.cc
浏览文件 @
8beb1b0f
...
...
@@ -38,8 +38,52 @@ int DeConvFp32(const float *input, const float *weight, float *output, float *tm
return
StrassenMatmul
(
input
,
weight
,
output
,
&
matmul_param
,
FP32_STRASSEN_MAX_RECURSION
,
0
,
tmp_buffer
);
}
int
DeConvPostFp32
(
const
float
*
src
,
float
*
tmp_c4
,
float
*
dst
,
const
float
*
bias
,
int
output_channel
,
int
input_plane
,
int
kernel_plane
,
int
output_plane
,
ConvParameter
*
conv_param
)
{
int
DeConvPostFp32C8x8
(
const
float
*
src
,
float
*
tmp
,
const
float
*
bias
,
float
*
dst
,
int
output_channel
,
ConvParameter
*
conv_param
)
{
/* row8x8-major(ih*iw x oc*kh*kw) -> row8-major(oh*ow x oc) */
size_t
input_plane
=
conv_param
->
input_w_
*
conv_param
->
input_h_
;
size_t
kernel_plane
=
conv_param
->
kernel_w_
*
conv_param
->
kernel_h_
;
size_t
output_plane
=
conv_param
->
output_w_
*
conv_param
->
output_h_
;
int
oc8
=
UP_DIV
(
output_channel
,
C8NUM
);
int
in_plane8
=
UP_ROUND
(
input_plane
,
C8NUM
);
for
(
int
c
=
0
;
c
<
oc8
;
c
++
)
{
float
*
dst_ptr
=
tmp
+
c
*
output_plane
*
C8NUM
;
const
float
*
src_ptr
=
src
+
c
*
in_plane8
*
kernel_plane
*
C8NUM
;
memset
(
dst_ptr
,
0
,
output_plane
*
C8NUM
*
sizeof
(
int32_t
));
for
(
int
ih
=
0
;
ih
<
conv_param
->
input_h_
;
ih
++
)
{
for
(
int
iw
=
0
;
iw
<
conv_param
->
input_w_
;
iw
++
)
{
int
oh
=
ih
*
conv_param
->
stride_h_
-
conv_param
->
pad_h_
;
int
ow
=
iw
*
conv_param
->
stride_w_
-
conv_param
->
pad_w_
;
int
kh_start
=
MSMAX
(
0
,
UP_DIV
(
-
oh
,
conv_param
->
dilation_h_
));
int
kh_end
=
MSMIN
(
conv_param
->
kernel_h_
,
UP_DIV
(
conv_param
->
output_h_
-
oh
,
conv_param
->
dilation_h_
));
int
kw_start
=
MSMAX
(
0
,
UP_DIV
(
-
ow
,
conv_param
->
dilation_w_
));
int
kw_end
=
MSMIN
(
conv_param
->
kernel_w_
,
UP_DIV
(
conv_param
->
output_w_
-
ow
,
conv_param
->
dilation_w_
));
for
(
int
kh
=
kh_start
;
kh
<
kh_end
;
kh
++
)
{
for
(
int
kw
=
kw_start
;
kw
<
kw_end
;
kw
++
)
{
int
src_index
=
ih
*
conv_param
->
input_w_
*
C8NUM
+
iw
*
C8NUM
+
kh
*
in_plane8
*
conv_param
->
kernel_w_
*
C8NUM
+
kw
*
in_plane8
*
C8NUM
;
int
dst_index
=
oh
*
conv_param
->
output_w_
*
C8NUM
+
ow
*
C8NUM
+
kh
*
conv_param
->
dilation_h_
*
conv_param
->
output_w_
*
C8NUM
+
kw
*
conv_param
->
dilation_w_
*
C8NUM
;
for
(
int
i
=
0
;
i
<
C8NUM
;
i
++
)
{
dst_ptr
[
dst_index
+
i
]
+=
src_ptr
[
src_index
+
i
];
}
}
/*kw*/
}
/*kh*/
}
/*iw*/
}
/*ih*/
}
/*oc8*/
PostConvFuncFp32C8
(
tmp
,
dst
,
bias
,
output_channel
,
output_plane
,
conv_param
->
output_channel_
,
conv_param
->
is_relu_
,
conv_param
->
is_relu6_
);
return
OPCLIB_OK
;
}
int
DeConvPostFp32C4
(
const
float
*
src
,
float
*
tmp_c4
,
float
*
dst
,
const
float
*
bias
,
int
output_channel
,
int
input_plane
,
int
kernel_plane
,
int
output_plane
,
ConvParameter
*
conv_param
)
{
int
oc4
=
UP_DIV
(
output_channel
,
C4NUM
);
for
(
int
c
=
0
;
c
<
oc4
;
c
++
)
{
float
*
dst_ptr
=
tmp_c4
+
c
*
output_plane
*
C4NUM
;
...
...
@@ -71,8 +115,7 @@ int DeConvPostFp32(const float *src, float *tmp_c4, float *dst, const float *bia
}
/*ih*/
}
/*oc4*/
PostConvFuncFp32
(
tmp_c4
,
dst
,
bias
,
output_channel
,
output_plane
,
conv_param
->
output_channel_
,
conv_param
->
is_relu_
,
conv_param
->
is_relu6_
);
PostConvFuncFp32
C4
(
tmp_c4
,
dst
,
bias
,
output_channel
,
output_plane
,
conv_param
->
output_channel_
,
conv_param
->
is_relu_
,
conv_param
->
is_relu6_
);
return
OPCLIB_OK
;
}
mindspore/lite/src/runtime/kernel/arm/opclib/fp32/deconv.h
浏览文件 @
8beb1b0f
...
...
@@ -26,8 +26,9 @@ void PackDeConvWeightFp32(const float *weight, float *dst, int input_channel, in
int
DeConvFp32
(
const
float
*
input
,
const
float
*
weight
,
float
*
output
,
float
*
tmp_buffer
,
StrassenMatMulParameter
matmul_param
);
int
DeConvPostFp32
(
const
float
*
src
,
float
*
tmp_c4
,
float
*
dst
,
const
float
*
bias
,
int
output_channel
,
int
input_plane
,
int
kernel_plane
,
int
output_plane
,
ConvParameter
*
conv_param
);
int
DeConvPostFp32C4
(
const
float
*
src
,
float
*
tmp_c4
,
float
*
dst
,
const
float
*
bias
,
int
output_channel
,
int
input_plane
,
int
kernel_plane
,
int
output_plane
,
ConvParameter
*
conv_param
);
int
DeConvPostFp32C8x8
(
const
float
*
src
,
float
*
tmp_out
,
const
float
*
bias
,
float
*
dst
,
int
output_channel
,
ConvParameter
*
conv_param
);
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_DECONV_H_
mindspore/lite/src/runtime/kernel/arm/opclib/fp32/matmul.cc
浏览文件 @
8beb1b0f
...
...
@@ -48,10 +48,11 @@ void Row8x8Major2RowMajor(float *src_ptr, float *dst_ptr, int row, int col) {
dst_ptr
[
r
*
col
+
c
]
=
src_ptr
[
cd8
*
row8
*
8
+
r
*
8
+
cm8
];
}
}
return
;
}
void
MatMul8x8
(
const
float
*
a
,
const
float
*
b
,
float
*
c
,
const
float
*
bias
,
float
maxf
,
float
minf
,
int
deep
,
int
row_8_
,
int
col_8_
)
{
void
MatMul8x8
(
const
float
*
a
,
const
float
*
b
,
float
*
c
,
const
float
*
bias
,
ActType
act_type
,
int
deep
,
int
row_8_
,
int
col_8_
)
{
/* col8-major * row8-major => col8x8-major */
for
(
int
row
=
0
;
row
<
row_8_
;
row
++
)
{
for
(
int
col
=
0
;
col
<
col_8_
;
col
++
)
{
...
...
@@ -64,19 +65,25 @@ void MatMul8x8(const float *a, const float *b, float *c, const float *bias, floa
size_t
bi
=
c8div
*
deep
*
8
+
d
*
8
+
c8mod
;
value
=
value
+
a
[
ai
]
*
b
[
bi
];
}
value
+=
bias
[
col
];
value
=
MSMIN
(
maxf
,
value
);
value
=
MSMAX
(
minf
,
value
);
if
(
bias
!=
nullptr
)
{
value
+=
bias
[
col
];
}
if
(
act_type
==
ActType_Relu6
)
value
=
MSMIN
(
6.0
f
,
value
);
if
(
act_type
!=
ActType_No
)
value
=
MSMAX
(
0.0
f
,
value
);
c
[
ci
]
=
value
;
}
}
return
;
}
void
MatMul
(
const
float
*
a
,
const
float
*
b
,
float
*
c
,
const
float
*
bias
,
float
maxf
,
float
minf
,
int
deep
,
int
row_8_
,
void
MatMul
(
const
float
*
a
,
const
float
*
b
,
float
*
c
,
const
float
*
bias
,
ActType
act_type
,
int
deep
,
int
row_8_
,
int
col_8_
)
{
#ifdef __aarch64__
float
minf
=
(
act_type
==
ActType_No
)
?
FLT_MIN
:
0.
f
;
float
maxf
=
(
act_type
==
ActType_Relu6
)
?
6.0
f
:
FLT_MAX
;
MatMulFloatNeon64
(
a
,
b
,
c
,
bias
,
maxf
,
minf
,
deep
,
row_8_
,
col_8_
);
#else
MatMul8x8
(
a
,
b
,
c
,
bias
,
maxf
,
minf
,
deep
,
row_8_
,
col_8_
);
MatMul8x8
(
a
,
b
,
c
,
bias
,
act_type
,
deep
,
row_8_
,
col_8_
);
#endif
return
;
}
mindspore/lite/src/runtime/kernel/arm/opclib/fp32/matmul.h
浏览文件 @
8beb1b0f
...
...
@@ -17,12 +17,12 @@
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_MATMUL_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_MATMUL_H_
#include <float.h>
#include "src/runtime/kernel/arm/opclib/errorcode.h"
#include "src/runtime/kernel/arm/opclib/op_base.h"
#include "src/runtime/kernel/arm/opclib/matmul.h"
void
MatMul
(
const
float
*
a
,
const
float
*
b
,
float
*
c
,
const
float
*
bias
,
float
maxf
,
float
minf
,
int
depth
,
int
row
,
int
col
);
void
MatMul
(
const
float
*
a
,
const
float
*
b
,
float
*
c
,
const
float
*
bias
,
ActType
act_type
,
int
depth
,
int
row
,
int
col
);
void
RowMajor2Row8Major
(
float
*
src_ptr
,
float
*
dst_ptr
,
int
row
,
int
col
);
void
RowMajor2Col8Major
(
float
*
src_ptr
,
float
*
dst_ptr
,
int
row
,
int
col
);
void
Row8x8Major2RowMajor
(
float
*
src_ptr
,
float
*
dst_ptr
,
int
row
,
int
col
);
...
...
mindspore/lite/src/runtime/kernel/arm/opclib/int8/deconv.cc
浏览文件 @
8beb1b0f
...
...
@@ -25,15 +25,18 @@ int DeConvInt8(const int8_t *input, const int8_t *weight, int32_t *output, size_
int
DeConvPostInt8
(
const
int32_t
*
src
,
const
int32_t
*
bias
,
int32_t
*
tmp
,
int8_t
*
out
,
int
output_channel
,
ConvParameter
*
conv_param
)
{
int
oc8
=
UP_DIV
(
output_channel
,
C8NUM
);
/* row8x8-major(ih*iw x oc*kh*kw) -> row8x8-major(oh*ow x oc) */
size_t
input_plane
=
conv_param
->
input_w_
*
conv_param
->
input_h_
;
size_t
kernel_plane
=
conv_param
->
kernel_w_
*
conv_param
->
kernel_h_
;
size_t
output_plane
=
conv_param
->
output_w_
*
conv_param
->
output_h_
;
int
oc8
=
UP_DIV
(
output_channel
,
C8NUM
);
int
in_plane8
=
UP_ROUND
(
input_plane
,
8
);
int
out_plane8
=
UP_ROUND
(
output_plane
,
8
);
for
(
int
c
=
0
;
c
<
oc8
;
c
++
)
{
int32_t
*
dst_ptr
=
tmp
+
c
*
out
put_plane
*
C8NUM
;
const
int32_t
*
src_ptr
=
src
+
c
*
in
put_plane
*
kernel_plane
*
C8NUM
;
memset
(
dst_ptr
,
0
,
out
put_plane
*
C8NUM
*
sizeof
(
int32_t
));
int32_t
*
dst_ptr
=
tmp
+
c
*
out
_plane8
*
C8NUM
;
const
int32_t
*
src_ptr
=
src
+
c
*
in
_plane8
*
kernel_plane
*
C8NUM
;
memset
(
dst_ptr
,
0
,
out
_plane8
*
C8NUM
*
sizeof
(
int32_t
));
for
(
int
ih
=
0
;
ih
<
conv_param
->
input_h_
;
ih
++
)
{
for
(
int
iw
=
0
;
iw
<
conv_param
->
input_w_
;
iw
++
)
{
...
...
@@ -60,7 +63,7 @@ int DeConvPostInt8(const int32_t *src, const int32_t *bias, int32_t *tmp, int8_t
}
/*ih*/
}
/*oc8*/
PostFuncInt8
(
tmp
,
bias
,
out
,
output_channel
,
output_plane
,
UP_ROUND
(
output_plane
,
8
)
,
PostFuncInt8
(
tmp
,
bias
,
out
,
output_channel
,
output_plane
,
out_plane8
,
conv_param
->
conv_quant_arg_
.
quant_multiplier_
[
0
],
conv_param
->
conv_quant_arg_
.
left_shift_
[
0
],
conv_param
->
conv_quant_arg_
.
right_shift_
[
0
],
conv_param
->
conv_quant_arg_
.
quant_args_
[
2
][
0
].
zp_
,
conv_param
->
conv_quant_arg_
.
out_act_min_
[
0
],
conv_param
->
conv_quant_arg_
.
out_act_max_
[
0
]);
...
...
mindspore/lite/src/runtime/kernel/arm/opclib/matmul.h
浏览文件 @
8beb1b0f
...
...
@@ -19,6 +19,8 @@
#include "src/runtime/kernel/arm/opclib/op_base.h"
enum
ActType
{
ActType_No
,
ActType_Relu
,
ActType_Relu6
};
struct
MatMulParameter
{
OpParameter
op_parameter_
;
int
row_
;
...
...
@@ -26,12 +28,10 @@ struct MatMulParameter {
int
row_8_
;
int
col_8_
;
int
deep_
;
float
minf_
;
float
maxf_
;
bool
has_bias_
;
bool
a_transpose_
;
/* false : row-major */
bool
b_transpose_
;
/* true : col-major */
ActType
act_type_
;
};
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_MATMUL_H_
mindspore/lite/src/runtime/kernel/arm/opclib/pack.cc
浏览文件 @
8beb1b0f
...
...
@@ -150,23 +150,21 @@ void PackWeightInt8Opt(int8_t *weight_data, ConvParameter *conv_param, int8_t *p
}
void
Conv1x1InputPackFp32
(
const
float
*
src
,
float
*
dst
,
ConvParameter
*
conv_param
)
{
for
(
int
c
=
0
;
c
<
UP_DIV
(
conv_param
->
input_channel_
,
C4NUM
);
c
++
)
{
const
float
*
src_c_ptr
=
src
+
c
*
conv_param
->
input_h_
*
conv_param
->
input_w_
*
C4NUM
;
float
*
dst_c_ptr
=
dst
+
c
*
conv_param
->
output_h_
*
conv_param
->
output_w_
*
C4NUM
;
for
(
int
dst_h
=
0
;
dst_h
<
conv_param
->
output_h_
;
dst_h
++
)
{
int
src_h
=
dst_h
*
conv_param
->
stride_h_
-
conv_param
->
pad_h_
;
if
(
src_h
<
0
||
src_h
>=
conv_param
->
input_h_
)
{
/* support nhwc */
for
(
int
dst_h
=
0
;
dst_h
<
conv_param
->
output_h_
;
dst_h
++
)
{
int
src_h
=
dst_h
*
conv_param
->
stride_h_
-
conv_param
->
pad_h_
;
if
(
src_h
<
0
||
src_h
>=
conv_param
->
input_h_
)
{
continue
;
}
const
float
*
src_h_ptr
=
src
+
src_h
*
conv_param
->
input_w_
*
conv_param
->
input_channel_
;
float
*
dst_h_ptr
=
dst
+
dst_h
*
conv_param
->
output_w_
*
conv_param
->
input_channel_
;
for
(
int
dst_w
=
0
;
dst_w
<
conv_param
->
output_w_
;
dst_w
++
)
{
int
src_w
=
dst_w
*
conv_param
->
stride_w_
-
conv_param
->
pad_w_
;
if
(
src_w
<
0
||
src_w
>=
conv_param
->
input_w_
)
{
continue
;
}
const
float
*
src_h_ptr
=
src_c_ptr
+
src_h
*
conv_param
->
input_w_
*
C4NUM
;
float
*
dst_h_ptr
=
dst_c_ptr
+
dst_h
*
conv_param
->
output_w_
*
C4NUM
;
for
(
int
dst_w
=
0
;
dst_w
<
conv_param
->
output_w_
;
dst_w
++
)
{
int
src_w
=
dst_w
*
conv_param
->
stride_w_
-
conv_param
->
pad_w_
;
if
(
src_w
<
0
||
src_w
>=
conv_param
->
input_w_
)
{
continue
;
}
memcpy
(
dst_h_ptr
+
dst_w
*
C4NUM
,
src_h_ptr
+
src_w
*
C4NUM
,
C4NUM
*
sizeof
(
float
));
}
memcpy
(
dst_h_ptr
+
dst_w
*
conv_param
->
input_channel_
,
src_h_ptr
+
src_w
*
conv_param
->
input_channel_
,
conv_param
->
input_channel_
*
sizeof
(
float
));
}
}
return
;
...
...
@@ -572,6 +570,21 @@ void PackNC4HW4ToNCHWFp32(const void *src, void *dst, int batch, int plane, int
}
}
void
PackNHWCToC8HWN8Fp32
(
const
void
*
src
,
void
*
dst
,
int
batch
,
int
plane
,
int
channel
)
{
for
(
int
n
=
0
;
n
<
batch
;
n
++
)
{
for
(
int
hw
=
0
;
hw
<
plane
;
hw
++
)
{
for
(
int
c
=
0
;
c
<
channel
;
c
++
)
{
int
c8div
=
c
/
C8NUM
;
int
c8mod
=
c
%
C8NUM
;
int
src_index
=
n
*
plane
*
channel
+
hw
*
channel
+
c
;
int
dst_index
=
c8div
*
batch
*
plane
*
C8NUM
+
hw
*
batch
*
C8NUM
+
n
*
C8NUM
+
c8mod
;
((
float
*
)
dst
)[
dst_index
]
=
((
float
*
)
src
)[
src_index
];
}
}
}
return
;
}
void
PackNHWCToNHWC4Int8
(
const
void
*
src
,
void
*
dst
,
int
batch
,
int
plane
,
int
channel
)
{
int
c4
=
UP_DIV
(
channel
,
C4NUM
);
int
nhwc4_batch_unit_offset
=
c4
*
C4NUM
*
plane
;
...
...
mindspore/lite/src/runtime/kernel/arm/opclib/pack.h
浏览文件 @
8beb1b0f
...
...
@@ -69,6 +69,8 @@ void PackNC4HW4ToNHWCFp32(const void *src, void *dst, int batch, int plane, int
void
PackNC4HW4ToNCHWFp32
(
const
void
*
src
,
void
*
dst
,
int
batch
,
int
plane
,
int
channel
);
void
PackNHWCToC8HWN8Fp32
(
const
void
*
src
,
void
*
dst
,
int
batch
,
int
plane
,
int
channel
);
void
PackNHWCToNHWC4Int8
(
const
void
*
src
,
void
*
dst
,
int
batch
,
int
plane
,
int
channel
);
void
PackNHWC4ToNHWCInt8
(
const
void
*
src
,
void
*
dst
,
int
batch
,
int
plane
,
int
channel
);
...
...
mindspore/lite/src/runtime/kernel/arm/opclib/quantization/quantize.h
浏览文件 @
8beb1b0f
...
...
@@ -21,6 +21,7 @@
#include <math.h>
#include <stdlib.h>
#include <limits.h>
#include <limits>
struct
QuantArg
{
double
scale_
;
...
...
@@ -112,13 +113,21 @@ inline uint8_t QuantizeToUint8(float real_value, float scale, int32_t zp) { retu
inline
int32_t
QuantizeToInt8
(
float
real_value
,
float
scale
,
int32_t
zp
)
{
return
round
(
real_value
/
scale
+
zp
);
}
inline
void
CalculateActivationRangeQuantized
(
float
fmax
,
float
fmin
,
float
scale
,
int
zero_point
,
int
*
imax
,
int
*
imin
)
{
int8_t
qmin
=
(
int8_t
)
CHAR_MIN
;
int8_t
qmax
=
(
int8_t
)
CHAR_MAX
;
int8_t
qfmin
=
QuantizeToInt8
(
fmin
,
scale
,
zero_point
);
int8_t
qfmax
=
QuantizeToInt8
(
fmax
,
scale
,
zero_point
);
*
imin
=
qmin
<
qfmin
?
qmin
:
qfmin
;
*
imax
=
qmax
>
qfmax
?
qmax
:
qfmax
;
inline
void
CalculateActivationRangeQuantized
(
bool
is_relu
,
bool
is_relu6
,
int32_t
zp
,
int32_t
scale
,
int
*
mini
,
int
*
maxi
)
{
int32_t
min
=
std
::
numeric_limits
<
int8_t
>::
min
();
int32_t
max
=
std
::
numeric_limits
<
int8_t
>::
max
();
int32_t
quantized_zero
=
QuantizeToInt8
(
0
,
scale
,
zp
);
int32_t
quantized_six
=
QuantizeToInt8
(
6
,
scale
,
zp
);
if
(
is_relu
)
{
min
=
min
>
quantized_zero
?
min
:
quantized_zero
;
}
else
if
(
is_relu6
)
{
min
=
min
>
quantized_zero
?
min
:
quantized_zero
;
max
=
max
<
quantized_six
?
max
:
quantized_six
;
}
else
{
// do nothing
}
*
mini
=
min
;
*
maxi
=
max
;
}
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_QUANTIZATION_QUANTIZE_H_
mindspore/lite/test/run_test.sh
浏览文件 @
8beb1b0f
...
...
@@ -6,5 +6,15 @@ BUILD_DIR=${CUR_DIR}/../build
mkdir
-pv
${
CUR_DIR
}
/do_test
cd
${
CUR_DIR
}
/do_test
cp
${
BUILD_DIR
}
/test/lite-test ./
cp
-r
${
CUR_DIR
}
/ut/src/runtime/kernel/arm/test_data/
*
./
./lite-test
--gtest_filter
=
"*TestHebing*"
./lite-test
--gtest_filter
=
TestFcFp32
*
./lite-test
--gtest_filter
=
TestConv1x1Fp32
*
./lite-test
--gtest_filter
=
TestStrassenFp32
*
./lite-test
--gtest_filter
=
TestDeConvolutionFp32
*
./lite-test
--gtest_filter
=
TestPadInt8
*
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/conv1x1_fp32_tests.cc
0 → 100644
浏览文件 @
8beb1b0f
此差异已折叠。
点击以展开。
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/deconvolution_fp32_tests.cc
0 → 100644
浏览文件 @
8beb1b0f
此差异已折叠。
点击以展开。
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/fullconnection_fp32_tests.cc
0 → 100644
浏览文件 @
8beb1b0f
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <sys/time.h>
#include <iostream>
#include <memory>
#include "utils/log_adapter.h"
#include "common/common_test.h"
#include "src/common/file_utils.h"
#include "src/runtime/kernel/arm/fp32/fullconnection.h"
#include "src/runtime/kernel/arm/opclib/fp32/matmul.h"
namespace
mindspore
{
using
mindspore
::
lite
::
tensor
::
Tensor
;
class
TestFcFp32
:
public
mindspore
::
Common
{
public:
TestFcFp32
()
{}
};
int
FcTestInit1
(
std
::
vector
<
lite
::
tensor
::
Tensor
*>
*
inputs_
,
std
::
vector
<
lite
::
tensor
::
Tensor
*>
*
outputs_
,
MatMulParameter
*
matmal_param
,
float
**
correct
)
{
Tensor
*
in_t
=
new
Tensor
(
kNumberTypeFloat
,
{
2
,
2
,
2
,
2
},
schema
::
Format_NHWC
,
static_cast
<
schema
::
NodeType
>
(
1
));
in_t
->
MallocData
();
float
in
[]
=
{
-
3.2366564
,
-
4.7733846
,
-
7.8329225
,
16.146885
,
5.060793
,
-
6.1471
,
-
1.7680453
,
-
6.5721383
,
17.87506
,
-
5.1192183
,
10.742863
,
1.4536934
,
19.693445
,
19.45783
,
5.063163
,
0.5234792
};
memcpy
(
in_t
->
Data
(),
in
,
sizeof
(
float
)
*
in_t
->
ElementsNum
());
inputs_
->
push_back
(
in_t
);
Tensor
*
weight_t
=
new
Tensor
(
kNumberTypeFloat
,
{
3
,
8
},
schema
::
Format_NHWC
,
static_cast
<
schema
::
NodeType
>
(
1
));
weight_t
->
MallocData
();
float
weight
[]
=
{
-
0.0024438887
,
0.0006738146
,
-
0.008169129
,
0.0021510671
,
-
0.012470592
,
-
0.0053063435
,
0.006050155
,
0.008656233
,
0.012911413
,
-
0.0028635843
,
-
0.00034080597
,
-
0.0010622552
,
-
0.012254699
,
-
0.01312836
,
0.0025241964
,
-
0.004706142
,
0.002451482
,
-
0.009558459
,
0.004481974
,
0.0033251503
,
-
0.011705584
,
-
0.001720293
,
-
0.0039410214
,
-
0.0073637343
};
memcpy
(
weight_t
->
Data
(),
weight
,
sizeof
(
float
)
*
weight_t
->
ElementsNum
());
inputs_
->
push_back
(
weight_t
);
Tensor
*
bias_t
=
new
Tensor
(
kNumberTypeFloat
,
{
3
},
schema
::
Format_NHWC
,
static_cast
<
schema
::
NodeType
>
(
1
));
bias_t
->
MallocData
();
float
bias
[]
=
{
1.6103756
,
-
0.9872417
,
0.546849
};
memcpy
(
bias_t
->
Data
(),
bias
,
sizeof
(
float
)
*
bias_t
->
ElementsNum
());
inputs_
->
push_back
(
bias_t
);
Tensor
*
out_t
=
new
Tensor
(
kNumberTypeFloat
,
{
2
,
3
},
schema
::
Format_NHWC
,
static_cast
<
schema
::
NodeType
>
(
1
));
out_t
->
MallocData
();
outputs_
->
push_back
(
out_t
);
*
correct
=
reinterpret_cast
<
float
*>
(
malloc
(
out_t
->
ElementsNum
()
*
sizeof
(
float
)));
float
nchw_co
[]
=
{
1.6157111
,
-
0.98469573
,
0.6098231
,
1.1649342
,
-
1.2334653
,
0.404779
};
memcpy
(
*
correct
,
nchw_co
,
out_t
->
ElementsNum
()
*
sizeof
(
float
));
matmal_param
->
b_transpose_
=
true
;
matmal_param
->
a_transpose_
=
false
;
matmal_param
->
has_bias_
=
true
;
matmal_param
->
act_type_
=
ActType_No
;
return
out_t
->
ElementsNum
();
}
TEST_F
(
TestFcFp32
,
FcTest1
)
{
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs_
;
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs_
;
auto
matmul_param
=
new
MatMulParameter
();
float
*
correct
;
int
total_size
=
FcTestInit1
(
&
inputs_
,
&
outputs_
,
matmul_param
,
&
correct
);
lite
::
Context
*
ctx
=
new
lite
::
Context
;
ctx
->
threadNum
=
2
;
kernel
::
FullconnectionCPUKernel
*
fc
=
new
kernel
::
FullconnectionCPUKernel
(
reinterpret_cast
<
OpParameter
*>
(
matmul_param
),
inputs_
,
outputs_
,
ctx
);
fc
->
Init
();
fc
->
Run
();
CompareOutputData
(
reinterpret_cast
<
float
*>
(
outputs_
[
0
]
->
Data
()),
correct
,
total_size
,
0.0001
);
}
int
FcTestInit2
(
std
::
vector
<
lite
::
tensor
::
Tensor
*>
*
inputs_
,
std
::
vector
<
lite
::
tensor
::
Tensor
*>
*
outputs_
,
MatMulParameter
*
matmal_param
,
float
**
correct
)
{
size_t
buffer_size
;
Tensor
*
in_t
=
new
Tensor
(
kNumberTypeFloat
,
{
20
,
4
,
2
,
10
},
schema
::
Format_NCHW
,
static_cast
<
schema
::
NodeType
>
(
1
));
in_t
->
MallocData
();
std
::
string
in_path
=
"./matmul/FcFp32_input1.bin"
;
auto
in_data
=
mindspore
::
lite
::
ReadFile
(
in_path
.
c_str
(),
&
buffer_size
);
memcpy
(
in_t
->
Data
(),
in_data
,
buffer_size
);
inputs_
->
push_back
(
in_t
);
Tensor
*
weight_t
=
new
Tensor
(
kNumberTypeFloat
,
{
30
,
80
},
schema
::
Format_NCHW
,
static_cast
<
schema
::
NodeType
>
(
1
));
weight_t
->
MallocData
();
std
::
string
weight_path
=
"./matmul/FcFp32_weight1.bin"
;
auto
w_data
=
mindspore
::
lite
::
ReadFile
(
weight_path
.
c_str
(),
&
buffer_size
);
memcpy
(
weight_t
->
Data
(),
w_data
,
buffer_size
);
inputs_
->
push_back
(
weight_t
);
Tensor
*
bias_t
=
new
Tensor
(
kNumberTypeFloat
,
{
30
},
schema
::
Format_NCHW
,
static_cast
<
schema
::
NodeType
>
(
1
));
bias_t
->
MallocData
();
std
::
string
bias_path
=
"./matmul/FcFp32_bias1.bin"
;
auto
bias_data
=
mindspore
::
lite
::
ReadFile
(
bias_path
.
c_str
(),
&
buffer_size
);
memcpy
(
bias_t
->
Data
(),
bias_data
,
buffer_size
);
inputs_
->
push_back
(
bias_t
);
Tensor
*
out_t
=
new
Tensor
(
kNumberTypeFloat
,
{
20
,
30
},
schema
::
Format_NCHW
,
static_cast
<
schema
::
NodeType
>
(
1
));
out_t
->
MallocData
();
outputs_
->
push_back
(
out_t
);
*
correct
=
reinterpret_cast
<
float
*>
(
malloc
(
out_t
->
ElementsNum
()
*
sizeof
(
float
)));
std
::
string
out_path
=
"./matmul/FcFp32_output1.bin"
;
auto
out_data
=
mindspore
::
lite
::
ReadFile
(
out_path
.
c_str
(),
&
buffer_size
);
memcpy
(
*
correct
,
out_data
,
out_t
->
ElementsNum
()
*
sizeof
(
float
));
matmal_param
->
b_transpose_
=
true
;
matmal_param
->
a_transpose_
=
false
;
matmal_param
->
has_bias_
=
true
;
matmal_param
->
act_type_
=
ActType_No
;
return
out_t
->
ElementsNum
();
}
TEST_F
(
TestFcFp32
,
FcTest2
)
{
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs_
;
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs_
;
auto
matmul_param
=
new
MatMulParameter
();
float
*
correct
;
int
total_size
=
FcTestInit2
(
&
inputs_
,
&
outputs_
,
matmul_param
,
&
correct
);
lite
::
Context
*
ctx
=
new
lite
::
Context
;
ctx
->
threadNum
=
1
;
kernel
::
FullconnectionCPUKernel
*
fc
=
new
kernel
::
FullconnectionCPUKernel
(
reinterpret_cast
<
OpParameter
*>
(
matmul_param
),
inputs_
,
outputs_
,
ctx
);
fc
->
Init
();
fc
->
Run
();
CompareOutputData
(
reinterpret_cast
<
float
*>
(
outputs_
[
0
]
->
Data
()),
correct
,
total_size
,
0.0001
);
}
}
// namespace mindspore
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/strassen_fp32_tests.cc
0 → 100644
浏览文件 @
8beb1b0f
此差异已折叠。
点击以展开。
mindspore/lite/test/ut/src/runtime/kernel/arm/int8/deconv_int8_tests.cc
0 → 100644
浏览文件 @
8beb1b0f
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include <memory>
#include "common/common_test.h"
#include "src/common/file_utils.h"
#include "mindspore/lite/src/kernel_registry.h"
#include "mindspore/lite/src/runtime/kernel/arm/opclib/pack.h"
#include "mindspore/lite/src/runtime/kernel/arm/opclib/fp32/matmul.h"
#include "mindspore/lite/src/runtime/kernel/arm/opclib/int8/deconv.h"
#include "mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h"
using
mindspore
::
lite
::
DeviceType
;
namespace
mindspore
{
using
mindspore
::
lite
::
tensor
::
QuantArg
;
using
mindspore
::
lite
::
tensor
::
Tensor
;
using
mindspore
::
schema
::
Format_NHWC
;
using
mindspore
::
schema
::
NodeType_Parameter
;
class
TestDeconvInt8
:
public
mindspore
::
Common
{
public:
TestDeconvInt8
()
{}
};
void
FloatToInt8
(
float
*
fptr
,
int8_t
*
iptr
,
size_t
size
,
int32_t
zp
,
double
scale
)
{
for
(
int
i
=
0
;
i
<
size
;
i
++
)
{
int32_t
value
=
round
(
fptr
[
i
]
/
scale
+
zp
);
value
=
MSMIN
(
value
,
INT8_MAX
);
value
=
MSMAX
(
value
,
INT8_MIN
);
iptr
[
i
]
=
(
int8_t
)
value
;
}
}
TEST_F
(
TestDeconvInt8
,
PackWeight1
)
{
int8_t
in
[]
=
{
-
8
,
11
,
99
,
-
80
,
8
,
-
12
,
37
,
-
45
,
31
,
-
69
,
-
66
,
26
,
112
,
124
,
-
109
,
85
,
-
24
,
28
,
-
46
,
100
,
72
,
-
36
,
-
82
,
64
,
-
110
,
37
,
-
72
,
65
,
-
124
,
91
,
-
43
,
99
,
3
,
100
,
19
,
51
,
-
14
,
-
81
,
67
,
90
,
4
,
-
106
,
105
,
28
,
-
61
,
-
79
,
55
,
-
54
,
47
,
-
38
,
114
,
125
,
-
65
,
100
,
6
,
-
72
,
-
33
,
60
,
109
,
-
68
};
int8_t
co
[]
=
{
-
8
,
11
,
99
,
-
80
,
8
,
-
12
,
0
,
0
,
112
,
124
,
-
109
,
85
,
-
24
,
28
,
0
,
0
,
-
110
,
37
,
-
72
,
65
,
-
124
,
91
,
0
,
0
,
-
14
,
-
81
,
67
,
90
,
4
,
-
106
,
0
,
0
,
47
,
-
38
,
114
,
125
,
-
65
,
100
,
0
,
0
,
37
,
-
45
,
31
,
-
69
,
-
66
,
26
,
0
,
0
,
-
46
,
100
,
72
,
-
36
,
-
82
,
64
,
0
,
0
,
-
43
,
99
,
3
,
100
,
19
,
51
,
0
,
0
,
105
,
28
,
-
61
,
-
79
,
55
,
-
54
,
0
,
0
,
6
,
-
72
,
-
33
,
60
,
109
,
-
68
,
0
,
0
};
int8_t
dst
[
80
]
=
{
0
};
/*5*1*2*6 nhwc*/
PackNHWCToC8HWN8Int8
(
in
,
dst
,
5
,
2
,
6
);
CompareOutputData
(
dst
,
co
,
80
,
1
);
}
TEST_F
(
TestDeconvInt8
,
PackWeight2
)
{
int8_t
in
[]
=
{
40
,
24
,
94
,
122
,
67
,
34
,
-
89
,
31
,
-
43
,
121
,
48
,
-
54
,
44
,
-
91
,
35
,
89
,
-
37
,
114
,
-
8
,
103
,
-
22
,
32
,
26
,
112
,
-
92
,
-
23
,
43
,
9
,
81
,
118
,
-
73
,
-
54
,
65
,
-
99
,
51
,
-
90
,
121
,
-
62
,
119
,
-
93
,
21
,
-
92
,
-
1
,
-
82
,
-
71
,
-
54
,
63
,
-
93
,
92
,
-
93
,
99
,
122
,
-
104
,
-
16
,
-
8
,
-
32
,
90
,
-
126
,
51
,
91
,
4
,
70
,
-
7
,
116
,
99
,
81
,
-
79
,
124
,
-
14
,
28
,
97
,
9
,
-
97
,
99
,
88
,
-
15
,
54
,
26
,
77
,
-
25
,
113
,
119
,
119
,
-
75
,
-
17
,
7
,
7
,
1
,
69
,
66
,
40
,
-
13
,
80
,
-
115
,
-
98
,
-
8
,
-
17
,
31
,
88
,
65
,
-
1
,
-
15
,
-
98
,
77
,
56
,
119
,
-
20
,
-
32
,
-
54
,
-
58
,
-
16
,
52
,
121
,
126
,
-
33
,
43
,
92
,
-
34
,
-
17
,
-
52
,
104
,
-
52
,
-
91
,
76
,
79
,
105
,
102
,
-
65
,
43
,
32
,
13
,
15
,
-
38
,
95
,
-
18
,
-
82
,
-
7
,
118
,
-
79
,
-
85
,
120
,
-
15
,
2
,
32
,
-
94
,
111
,
115
,
102
,
-
18
,
121
,
-
106
,
54
,
63
,
111
,
-
16
,
92
,
82
,
-
23
,
111
,
53
,
1
,
-
48
,
45
,
19
,
-
4
,
-
15
,
-
72
,
41
,
80
,
-
51
,
116
,
31
,
94
,
101
,
-
10
,
18
,
0
,
-
49
,
108
,
28
,
-
36
,
47
,
-
14
,
-
2
,
-
10
,
31
,
-
92
,
-
84
,
74
,
-
114
,
-
107
,
66
,
99
,
-
121
,
-
107
,
31
,
-
38
,
56
,
-
30
,
109
,
-
7
,
28
,
-
22
,
-
17
,
-
3
,
-
2
,
27
,
-
3
,
108
,
-
84
,
-
23
,
-
71
,
-
54
,
20
,
-
45
,
109
,
-
42
,
78
,
-
79
,
98
,
-
10
,
57
,
52
,
1
,
25
,
73
,
21
,
-
78
,
46
,
121
,
66
,
92
,
24
,
55
,
4
,
-
110
,
-
37
,
112
,
-
18
,
10
,
-
42
,
16
,
-
9
,
31
,
39
,
-
70
,
108
,
-
3
,
-
90
,
-
60
,
-
121
,
11
,
50
,
-
88
,
-
104
,
-
29
,
-
89
,
94
,
64
,
-
91
,
-
101
,
-
7
,
23
,
-
57
,
93
,
16
,
17
,
35
,
-
48
,
-
25
,
13
,
-
121
,
73
,
-
68
,
-
54
,
-
122
,
-
20
,
12
,
64
,
20
,
-
11
,
-
6
,
-
71
,
-
52
,
-
97
,
109
,
116
,
-
107
,
117
,
-
124
,
56
,
80
,
-
108
,
30
,
123
,
56
,
-
80
,
39
,
-
18
,
-
97
,
-
103
,
122
,
114
,
-
10
,
-
31
,
97
,
-
92
,
105
,
-
61
,
-
25
,
10
,
-
119
,
-
106
,
41
,
77
,
-
117
,
55
,
-
83
,
-
29
,
14
,
27
,
-
106
,
-
86
,
41
,
43
,
23
,
11
,
-
76
,
-
34
,
121
,
94
,
18
,
69
,
73
,
100
,
54
,
43
,
32
,
13
,
15
,
-
38
,
95
,
-
18
,
-
82
,
-
7
,
118
,
-
79
,
-
85
,
120
,
-
15
,
2
,
32
,
-
94
,
111
,
115
,
102
,
-
18
,
121
,
-
106
,
54
,
63
,
111
,
-
16
,
92
,
82
,
-
23
,
111
,
53
,
1
,
-
48
,
45
,
19
,
-
4
,
-
15
,
-
72
,
41
,
80
,
-
51
,
116
,
31
,
94
,
101
,
-
10
,
18
,
0
,
-
49
,
108
,
28
,
-
36
,
47
,
-
14
,
-
2
,
-
10
,
31
,
-
92
,
-
84
,
74
,
-
114
,
-
107
,
66
,
99
,
-
121
,
-
107
,
31
,
-
38
,
56
,
-
30
,
109
,
-
7
,
28
,
-
22
,
-
17
,
-
3
,
-
2
,
27
,
-
3
,
108
,
-
84
,
-
23
,
-
71
,
-
54
,
20
,
-
45
,
109
,
-
42
,
78
,
-
79
,
98
,
-
10
,
57
,
52
,
1
,
25
,
73
,
21
,
-
78
,
46
,
121
,
66
,
92
};
int8_t
co
[]
=
{
40
,
24
,
94
,
122
,
67
,
34
,
-
89
,
31
,
-
22
,
32
,
26
,
112
,
-
92
,
-
23
,
43
,
9
,
21
,
-
92
,
-
1
,
-
82
,
-
71
,
-
54
,
63
,
-
93
,
4
,
70
,
-
7
,
116
,
99
,
81
,
-
79
,
124
,
113
,
119
,
119
,
-
75
,
-
17
,
7
,
7
,
1
,
-
1
,
-
15
,
-
98
,
77
,
56
,
119
,
-
20
,
-
32
,
104
,
-
52
,
-
91
,
76
,
79
,
105
,
102
,
-
65
,
120
,
-
15
,
2
,
32
,
-
94
,
111
,
115
,
102
,
1
,
-
48
,
45
,
19
,
-
4
,
-
15
,
-
72
,
41
,
-
36
,
47
,
-
14
,
-
2
,
-
10
,
31
,
-
92
,
-
84
,
-
7
,
28
,
-
22
,
-
17
,
-
3
,
-
2
,
27
,
-
3
,
-
10
,
57
,
52
,
1
,
25
,
73
,
21
,
-
78
,
-
42
,
16
,
-
9
,
31
,
39
,
-
70
,
108
,
-
3
,
-
101
,
-
7
,
23
,
-
57
,
93
,
16
,
17
,
35
,
-
11
,
-
6
,
-
71
,
-
52
,
-
97
,
109
,
116
,
-
107
,
-
103
,
122
,
114
,
-
10
,
-
31
,
97
,
-
92
,
105
,
27
,
-
106
,
-
86
,
41
,
43
,
23
,
11
,
-
76
,
-
38
,
95
,
-
18
,
-
82
,
-
7
,
118
,
-
79
,
-
85
,
63
,
111
,
-
16
,
92
,
82
,
-
23
,
111
,
53
,
94
,
101
,
-
10
,
18
,
0
,
-
49
,
108
,
28
,
99
,
-
121
,
-
107
,
31
,
-
38
,
56
,
-
30
,
109
,
-
54
,
20
,
-
45
,
109
,
-
42
,
78
,
-
79
,
98
,
-
43
,
121
,
48
,
-
54
,
44
,
-
91
,
35
,
89
,
81
,
118
,
-
73
,
-
54
,
65
,
-
99
,
51
,
-
90
,
92
,
-
93
,
99
,
122
,
-
104
,
-
16
,
-
8
,
-
32
,
-
14
,
28
,
97
,
9
,
-
97
,
99
,
88
,
-
15
,
69
,
66
,
40
,
-
13
,
80
,
-
115
,
-
98
,
-
8
,
-
54
,
-
58
,
-
16
,
52
,
121
,
126
,
-
33
,
43
,
43
,
32
,
13
,
15
,
-
38
,
95
,
-
18
,
-
82
,
-
18
,
121
,
-
106
,
54
,
63
,
111
,
-
16
,
92
,
80
,
-
51
,
116
,
31
,
94
,
101
,
-
10
,
18
,
74
,
-
114
,
-
107
,
66
,
99
,
-
121
,
-
107
,
31
,
108
,
-
84
,
-
23
,
-
71
,
-
54
,
20
,
-
45
,
109
,
46
,
121
,
66
,
92
,
24
,
55
,
4
,
-
110
,
-
90
,
-
60
,
-
121
,
11
,
50
,
-
88
,
-
104
,
-
29
,
-
48
,
-
25
,
13
,
-
121
,
73
,
-
68
,
-
54
,
-
122
,
117
,
-
124
,
56
,
80
,
-
108
,
30
,
123
,
56
,
-
61
,
-
25
,
10
,
-
119
,
-
106
,
41
,
77
,
-
117
,
-
34
,
121
,
94
,
18
,
69
,
73
,
100
,
54
,
120
,
-
15
,
2
,
32
,
-
94
,
111
,
115
,
102
,
1
,
-
48
,
45
,
19
,
-
4
,
-
15
,
-
72
,
41
,
-
36
,
47
,
-
14
,
-
2
,
-
10
,
31
,
-
92
,
-
84
,
-
7
,
28
,
-
22
,
-
17
,
-
3
,
-
2
,
27
,
-
3
,
-
10
,
57
,
52
,
1
,
25
,
73
,
21
,
-
78
,
-
37
,
114
,
-
8
,
103
,
0
,
0
,
0
,
0
,
121
,
-
62
,
119
,
-
93
,
0
,
0
,
0
,
0
,
90
,
-
126
,
51
,
91
,
0
,
0
,
0
,
0
,
54
,
26
,
77
,
-
25
,
0
,
0
,
0
,
0
,
-
17
,
31
,
88
,
65
,
0
,
0
,
0
,
0
,
92
,
-
34
,
-
17
,
-
52
,
0
,
0
,
0
,
0
,
-
7
,
118
,
-
79
,
-
85
,
0
,
0
,
0
,
0
,
82
,
-
23
,
111
,
53
,
0
,
0
,
0
,
0
,
0
,
-
49
,
108
,
28
,
0
,
0
,
0
,
0
,
-
38
,
56
,
-
30
,
109
,
0
,
0
,
0
,
0
,
-
42
,
78
,
-
79
,
98
,
0
,
0
,
0
,
0
,
-
37
,
112
,
-
18
,
10
,
0
,
0
,
0
,
0
,
-
89
,
94
,
64
,
-
91
,
0
,
0
,
0
,
0
,
-
20
,
12
,
64
,
20
,
0
,
0
,
0
,
0
,
-
80
,
39
,
-
18
,
-
97
,
0
,
0
,
0
,
0
,
55
,
-
83
,
-
29
,
14
,
0
,
0
,
0
,
0
,
43
,
32
,
13
,
15
,
0
,
0
,
0
,
0
,
-
18
,
121
,
-
106
,
54
,
0
,
0
,
0
,
0
,
80
,
-
51
,
116
,
31
,
0
,
0
,
0
,
0
,
74
,
-
114
,
-
107
,
66
,
0
,
0
,
0
,
0
,
108
,
-
84
,
-
23
,
-
71
,
0
,
0
,
0
,
0
,
46
,
121
,
66
,
92
,
0
,
0
,
0
,
0
};
int8_t
dst
[
528
]
=
{
0
};
PackNHWCToC8HWN8Int8
(
in
,
dst
,
22
,
1
,
20
);
CompareOutputData
(
dst
,
co
,
528
,
1
);
}
TEST_F
(
TestDeconvInt8
,
MatMulTest1
)
{
int8_t
a_row_major_10_12
[]
=
{
-
6
,
76
,
32
,
80
,
-
73
,
8
,
-
85
,
-
3
,
114
,
80
,
30
,
42
,
-
41
,
117
,
62
,
-
76
,
-
77
,
-
111
,
88
,
105
,
68
,
105
,
-
74
,
13
,
51
,
94
,
31
,
-
52
,
-
92
,
-
4
,
-
35
,
-
71
,
101
,
-
93
,
46
,
-
65
,
57
,
-
41
,
-
51
,
77
,
1
,
9
,
73
,
-
19
,
-
36
,
57
,
81
,
-
24
,
40
,
103
,
112
,
109
,
-
41
,
-
68
,
57
,
61
,
55
,
-
20
,
3
,
2
,
17
,
-
16
,
-
31
,
58
,
-
4
,
67
,
-
4
,
-
95
,
-
5
,
-
72
,
81
,
15
,
-
7
,
-
16
,
-
47
,
112
,
114
,
-
26
,
-
98
,
53
,
15
,
-
49
,
26
,
19
,
19
,
8
,
-
57
,
-
35
,
-
79
,
118
,
29
,
21
,
37
,
-
48
,
83
,
7
,
124
,
113
,
-
5
,
15
,
-
8
,
107
,
-
65
,
-
88
,
50
,
-
47
,
-
80
,
-
84
,
3
,
-
45
,
92
,
42
,
-
20
,
-
101
,
106
,
-
10
,
89
,
67
,
55
,
10
};
int32_t
zp_a
=
15
;
int8_t
a_col8_major
[
16
*
12
]
=
{
0
};
int8_t
b_col_major_12_18
[]
=
{
92
,
27
,
22
,
52
,
-
112
,
-
20
,
-
57
,
-
2
,
89
,
32
,
93
,
-
66
,
-
25
,
-
54
,
94
,
-
97
,
-
119
,
-
98
,
101
,
-
99
,
77
,
-
83
,
76
,
95
,
59
,
97
,
8
,
40
,
-
109
,
-
20
,
67
,
-
107
,
37
,
-
6
,
-
54
,
-
20
,
-
30
,
36
,
-
106
,
-
103
,
-
3
,
-
86
,
-
82
,
59
,
4
,
-
75
,
-
50
,
-
106
,
55
,
104
,
-
117
,
-
71
,
-
20
,
-
85
,
-
77
,
16
,
-
25
,
-
58
,
4
,
80
,
-
75
,
94
,
32
,
-
68
,
2
,
40
,
56
,
-
103
,
11
,
-
98
,
-
70
,
-
69
,
0
,
57
,
-
6
,
82
,
66
,
-
112
,
-
61
,
33
,
-
77
,
-
53
,
95
,
-
38
,
87
,
-
46
,
-
3
,
81
,
-
47
,
43
,
21
,
26
,
-
45
,
-
57
,
50
,
-
24
,
-
82
,
-
114
,
61
,
46
,
-
53
,
78
,
-
24
,
31
,
-
7
,
37
,
29
,
38
,
45
,
106
,
52
,
-
42
,
31
,
-
6
,
-
61
,
-
87
,
2
,
79
,
-
5
,
-
42
,
43
,
-
106
,
-
104
,
7
,
91
,
-
63
,
58
,
97
,
-
15
,
74
,
-
96
,
15
,
-
23
,
-
3
,
-
47
,
-
97
,
100
,
-
54
,
26
,
-
46
,
35
,
26
,
100
,
-
80
,
34
,
-
25
,
96
,
-
67
,
-
80
,
-
27
,
66
,
41
,
41
,
-
43
,
-
43
,
-
38
,
-
4
,
-
64
,
31
,
7
,
-
8
,
6
,
-
2
,
39
,
-
119
,
53
,
75
,
-
91
,
-
44
,
77
,
-
62
,
22
,
-
44
,
78
,
-
67
,
-
48
,
-
115
,
-
4
,
43
,
81
,
40
,
-
20
,
-
5
,
-
89
,
60
,
-
62
,
-
4
,
-
48
,
66
,
-
64
,
-
69
,
62
,
17
,
-
89
,
1
,
87
,
81
,
32
,
-
29
,
51
,
40
,
27
,
66
,
67
,
11
,
-
69
,
85
,
-
79
,
-
106
,
55
,
22
,
-
23
,
62
,
69
,
-
74
,
49
};
int32_t
zp_b
=
-
20
;
int8_t
b_row8_major
[
12
*
24
]
=
{
0
};
int32_t
co_row_major_10_18
[]
=
{
32005
,
3597
,
16595
,
-
3458
,
6627
,
-
6663
,
818
,
-
3910
,
10228
,
15079
,
-
19205
,
-
10203
,
-
3178
,
-
10046
,
10374
,
-
6199
,
5330
,
12163
,
1819
,
20533
,
17382
,
18283
,
9778
,
9185
,
-
12623
,
-
26234
,
-
11987
,
7904
,
8144
,
-
1603
,
27611
,
-
10190
,
-
20053
,
4999
,
-
28389
,
21852
,
24680
,
25858
,
23506
,
17944
,
11768
,
24378
,
-
6102
,
-
4675
,
-
23460
,
10434
,
-
47579
,
1986
,
12018
,
-
19418
,
-
7248
,
4938
,
-
32613
,
-
941
,
8171
,
-
4788
,
3325
,
-
11310
,
-
8351
,
-
14786
,
6909
,
16401
,
2017
,
-
6456
,
11242
,
7393
,
-
9119
,
17312
,
2646
,
-
14402
,
7201
,
-
9949
,
23986
,
17607
,
27461
,
-
1547
,
2783
,
7558
,
19487
,
11158
,
-
2686
,
6328
,
-
8225
,
-
11668
,
21858
,
-
2079
,
-
8671
,
-
639
,
-
1544
,
1235
,
1156
,
6582
,
2829
,
-
10311
,
-
2692
,
5154
,
1527
,
10870
,
106
,
-
8189
,
-
24174
,
-
1846
,
-
15399
,
-
3598
,
14874
,
-
5591
,
-
619
,
-
13667
,
-
6053
,
-
31103
,
-
24499
,
13008
,
9143
,
-
17982
,
28437
,
2176
,
-
2114
,
-
11631
,
10779
,
-
1032
,
-
24690
,
-
3112
,
2125
,
432
,
20270
,
-
33859
,
8907
,
10063
,
1603
,
3761
,
4805
,
4904
,
-
15594
,
10786
,
4287
,
-
13591
,
-
18777
,
-
1679
,
2109
,
-
2243
,
12051
,
-
8504
,
-
6558
,
4209
,
13606
,
-
25803
,
27922
,
12092
,
7140
,
27142
,
-
12267
,
2339
,
-
26224
,
23674
,
-
26579
,
-
11398
,
-
1823
,
-
18976
,
3641
,
4415
,
-
24878
,
-
2045
,
15937
,
41465
,
12601
,
-
14513
,
-
17619
,
-
5728
,
334
,
-
424
,
8147
,
-
1369
,
5984
,
11000
,
19016
,
4456
,
-
25920
,
4506
,
5930
,
15458
};
int32_t
c_row8x8_major
[
16
*
24
]
=
{
0
};
int32_t
out_row_major
[
180
]
=
{
0
};
RowMajor2Col8MajorInt8
(
a_row_major_10_12
,
a_col8_major
,
10
,
12
);
RowMajor2Col8MajorInt8
(
b_col_major_12_18
,
b_row8_major
,
18
,
12
);
MatMulInt8
(
a_col8_major
,
b_row8_major
,
c_row8x8_major
,
16
,
24
,
12
,
zp_a
,
zp_b
);
Row8x8Major2RowMajor
(
reinterpret_cast
<
float
*>
(
c_row8x8_major
),
reinterpret_cast
<
float
*>
(
out_row_major
),
10
,
18
);
CompareOutputData
(
out_row_major
,
co_row_major_10_18
,
180
,
1
);
}
TEST_F
(
TestDeconvInt8
,
PostAddTest1
)
{
int32_t
in
[]
=
{
-
4956
,
-
3923
,
868
,
-
8880
,
-
4089
,
-
5179
,
-
4526
,
-
4527
,
-
10464
,
99
,
-
5826
,
-
2995
,
-
4519
,
-
4519
,
-
10509
,
-
2505
,
-
11272
,
434
,
-
4522
,
-
4523
,
-
5287
,
-
8936
,
-
878
,
373
,
-
4528
,
-
4529
,
-
1960
,
-
6589
,
1688
,
2287
,
-
8059
,
926
,
-
2506
,
-
6972
,
-
2834
,
-
8281
,
-
8118
,
-
3110
,
-
4526
,
-
4527
,
-
4528
,
-
4529
,
-
4519
,
-
4519
,
-
4519
,
-
4519
,
-
4519
,
-
4519
,
-
4520
,
-
4521
,
-
4522
,
-
4523
,
-
4524
,
-
4525
,
-
4526
,
-
4527
,
-
4528
,
-
4529
,
-
4519
,
-
4519
,
-
4519
,
-
4519
,
-
4519
,
-
4519
,
1578
,
2231
,
-
4522
,
-
4523
,
-
4524
,
-
4525
,
-
4526
,
-
4527
,
-
8449
,
-
990
,
-
4519
,
-
4519
,
-
4519
,
-
4519
,
-
4519
,
-
4519
,
-
4303
,
-
10293
,
-
4522
,
-
4523
,
-
4524
,
-
4525
,
-
4526
,
-
4527
,
-
4528
,
-
4529
,
-
4519
,
-
4519
,
-
4519
,
-
4519
,
-
4519
,
-
4519
,
-
7025
,
924
,
-
4522
,
-
4523
,
-
4524
,
-
4525
,
-
4526
,
-
4527
,
-
4528
,
-
4529
,
-
4519
,
-
4519
,
-
4519
,
-
4519
,
-
4519
,
-
4519
,
-
4520
,
-
4521
,
-
4522
,
-
4523
,
-
4524
,
-
4525
,
-
4526
,
-
4527
,
-
4528
,
-
4529
,
-
4519
,
-
4519
,
-
4519
,
-
4519
,
-
4519
,
-
4519
};
int8_t
co
[]
=
{
-
8
,
11
,
99
,
-
80
,
8
,
-
12
,
0
,
0
,
112
,
124
,
-
109
,
85
,
-
24
,
28
,
0
,
0
,
-
110
,
37
,
-
72
,
65
,
-
124
,
91
,
0
,
0
,
-
14
,
-
81
,
67
,
90
,
4
,
-
106
,
0
,
0
,
47
,
-
38
,
114
,
125
,
-
65
,
100
,
0
,
0
,
37
,
-
45
,
31
,
-
69
,
-
66
,
26
,
0
,
0
,
-
46
,
100
};
int32_t
bias
[]
=
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
};
int8_t
out
[
50
]
=
{
0
};
double
multiplier
=
0.0183649725490196
;
int32_t
quant_multiplier
;
int32_t
left_shift
;
int32_t
right_shift
;
QuantizeRoundParameter
(
multiplier
,
&
quant_multiplier
,
&
left_shift
,
&
right_shift
);
int32_t
zp
=
83
;
PostFuncInt8
(
in
,
bias
,
out
,
10
,
5
,
8
,
quant_multiplier
,
left_shift
,
right_shift
,
zp
,
-
128
,
127
);
CompareOutputData
(
out
,
co
,
50
,
1
);
int8_t
co_relu
[]
=
{
0
,
11
,
99
,
0
,
8
,
0
,
0
,
0
,
112
,
124
,
0
,
85
,
0
,
28
,
0
,
0
,
0
,
37
,
0
,
65
,
0
,
91
,
0
,
0
,
0
,
0
,
67
,
90
,
4
,
0
,
0
,
0
,
47
,
0
,
114
,
125
,
0
,
100
,
0
,
0
,
37
,
0
,
31
,
0
,
0
,
26
,
0
,
0
,
0
,
100
};
PostFuncInt8
(
in
,
bias
,
out
,
10
,
5
,
8
,
quant_multiplier
,
left_shift
,
right_shift
,
zp
,
0
,
127
);
CompareOutputData
(
out
,
co_relu
,
50
,
1
);
int8_t
co_relu6
[]
=
{
0
,
6
,
6
,
0
,
6
,
0
,
0
,
0
,
6
,
6
,
0
,
6
,
0
,
6
,
0
,
0
,
0
,
6
,
0
,
6
,
0
,
6
,
0
,
0
,
0
,
0
,
6
,
6
,
4
,
0
,
0
,
0
,
6
,
0
,
6
,
6
,
0
,
6
,
0
,
0
,
6
,
0
,
6
,
0
,
0
,
6
,
0
,
0
,
0
,
6
};
PostFuncInt8
(
in
,
bias
,
out
,
10
,
5
,
8
,
quant_multiplier
,
left_shift
,
right_shift
,
zp
,
0
,
6
);
CompareOutputData
(
out
,
co_relu6
,
50
,
1
);
}
int
DeConvInt8TestInit1
(
std
::
vector
<
lite
::
tensor
::
Tensor
*>
*
inputs_
,
std
::
vector
<
lite
::
tensor
::
Tensor
*>
*
outputs_
,
ConvParameter
*
conv_param
,
int8_t
**
correct
)
{
/* float data from deconv fp32 testcase : DeConvTestInit2 */
/* vq = (vi - zp) * s vi = vq / s + zp */
Tensor
*
in_t
=
new
Tensor
(
kNumberTypeInt8
,
{
1
,
4
,
2
,
3
},
Format_NHWC
,
NodeType_Parameter
);
in_t
->
MallocData
();
int8_t
in
[]
=
{
6
,
43
,
38
,
24
,
-
8
,
12
,
41
,
-
24
,
-
20
,
41
,
-
19
,
-
6
,
-
26
,
-
6
,
23
,
-
31
,
34
,
45
,
8
,
45
,
-
39
,
-
27
,
-
48
,
12
};
memcpy
(
in_t
->
Data
(),
in
,
sizeof
(
int8_t
)
*
in_t
->
ElementsNum
());
QuantArg
*
in_quant_arg
=
new
QuantArg
();
in_quant_arg
->
zeroPoint
=
-
19
,
in_quant_arg
->
scale
=
0.31228156
;
in_t
->
AddQuantParam
(
*
in_quant_arg
);
inputs_
->
push_back
(
in_t
);
Tensor
*
weight_t
=
new
Tensor
(
kNumberTypeInt8
,
{
3
,
3
,
3
,
2
},
Format_NHWC
,
NodeType_Parameter
);
weight_t
->
MallocData
();
int8_t
weight
[]
=
{
66
,
89
,
98
,
74
,
95
,
86
,
125
,
95
,
105
,
83
,
116
,
94
,
90
,
80
,
86
,
59
,
72
,
92
,
64
,
76
,
92
,
80
,
90
,
87
,
106
,
55
,
105
,
60
,
75
,
53
,
81
,
81
,
98
,
81
,
86
,
59
,
74
,
82
,
97
,
105
,
71
,
67
,
79
,
87
,
72
,
79
,
80
,
76
,
96
,
80
,
83
,
71
,
61
,
79
};
memcpy
(
weight_t
->
Data
(),
weight
,
sizeof
(
int8_t
)
*
weight_t
->
ElementsNum
());
QuantArg
*
w_quant_arg
=
new
QuantArg
();
w_quant_arg
->
zeroPoint
=
83
,
w_quant_arg
->
scale
=
0.023649725490196
;
weight_t
->
AddQuantParam
(
*
w_quant_arg
);
inputs_
->
push_back
(
weight_t
);
Tensor
*
out_t
=
new
Tensor
(
kNumberTypeInt8
,
{
1
,
7
,
3
,
2
},
Format_NHWC
,
NodeType_Parameter
);
out_t
->
MallocData
();
QuantArg
*
out_quant_arg
=
new
QuantArg
();
out_quant_arg
->
zeroPoint
=
31
,
out_quant_arg
->
scale
=
0.3439215686275
;
out_t
->
AddQuantParam
(
*
out_quant_arg
);
outputs_
->
push_back
(
out_t
);
*
correct
=
reinterpret_cast
<
int8_t
*>
(
malloc
(
out_t
->
ElementsNum
()
*
sizeof
(
int8_t
)));
int8_t
co_nchw
[]
=
{
57
,
76
,
49
,
71
,
8
,
61
,
57
,
127
,
56
,
46
,
-
11
,
61
,
23
,
31
,
34
,
50
,
59
,
49
,
78
,
17
,
6
,
-
3
,
-
5
,
23
,
-
11
,
6
,
-
5
,
33
,
64
,
30
,
21
,
18
,
25
,
21
,
-
15
,
0
,
4
,
31
,
36
,
2
,
17
,
43
};
PackNCHWToNHWCInt8
(
co_nchw
,
*
correct
,
out_t
->
Batch
(),
out_t
->
Width
()
*
out_t
->
Height
(),
out_t
->
Channel
());
conv_param
->
kernel_h_
=
conv_param
->
kernel_w_
=
3
;
conv_param
->
pad_h_
=
conv_param
->
pad_w_
=
1
;
conv_param
->
stride_h_
=
conv_param
->
stride_w_
=
2
;
conv_param
->
dilation_h_
=
conv_param
->
dilation_w_
=
1
;
return
out_t
->
ElementsNum
();
}
TEST_F
(
TestDeconvInt8
,
DeConvInt8Test1
)
{
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs_
;
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs_
;
auto
deconv_param
=
new
ConvParameter
();
lite
::
Context
*
ctx
=
new
lite
::
Context
;
ctx
->
threadNum
=
2
;
int8_t
*
correct
;
int
total_size
=
DeConvInt8TestInit1
(
&
inputs_
,
&
outputs_
,
deconv_param
,
&
correct
);
mindspore
::
kernel
::
DeConvInt8CPUKernel
*
deconv
=
new
mindspore
::
kernel
::
DeConvInt8CPUKernel
(
reinterpret_cast
<
OpParameter
*>
(
deconv_param
),
inputs_
,
outputs_
,
ctx
);
deconv
->
Init
();
deconv
->
Run
();
CompareOutputData
(
reinterpret_cast
<
int8_t
*>
(
outputs_
[
0
]
->
Data
()),
correct
,
total_size
,
3
);
delete
deconv_param
;
// delete deconv;
for
(
auto
t
:
inputs_
)
delete
t
;
for
(
auto
t
:
outputs_
)
delete
t
;
free
(
correct
);
}
}
// namespace mindspore
mindspore/lite/test/ut/src/runtime/kernel/arm/int8/fullconnection_int8_tests.cc
浏览文件 @
8beb1b0f
...
...
@@ -27,7 +27,7 @@ namespace mindspore {
using
lite
::
tensor
::
Tensor
;
class
TestFcInt8
:
public
mindspore
::
Common
{
public:
TestFcInt8
(){}
TestFcInt8
()
{}
};
void
Quantize
(
float
*
input_data
,
int
length
,
float
scale
,
int
zero_point
,
int8_t
*
output_data
)
{
...
...
@@ -110,8 +110,7 @@ int FcInt8TestInit(std::vector<lite::tensor::Tensor *> *inputs_, std::vector<lit
matmal_param
->
b_transpose_
=
true
;
matmal_param
->
a_transpose_
=
false
;
matmal_param
->
has_bias_
=
true
;
matmal_param
->
minf_
=
-
FLT_MAX
;
matmal_param
->
maxf_
=
FLT_MAX
;
matmal_param
->
act_type_
=
ActType_No
;
return
out_t
->
ElementsNum
();
}
...
...
mindspore/lite/test/ut/src/runtime/kernel/arm/int8/pad_int8_tests.cc
0 → 100644
浏览文件 @
8beb1b0f
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include "include/context.h"
#include "src/ir/tensor.h"
#include "common/common_test.h"
#include "src/common/file_utils.h"
#include "src/runtime/kernel/arm/opclib/pad_parameter.h"
#include "src/runtime/kernel/arm/int8/pad_int8.h"
namespace
mindspore
{
using
mindspore
::
lite
::
tensor
::
QuantArg
;
using
mindspore
::
lite
::
tensor
::
Tensor
;
class
TestPadInt8
:
public
mindspore
::
Common
{
public:
TestPadInt8
()
{}
};
int
PadInt8TestInit1
(
std
::
vector
<
Tensor
*>
*
inputs_
,
std
::
vector
<
Tensor
*>
*
outputs_
,
PadParameter
*
pad_param
,
int8_t
**
correct
)
{
Tensor
*
in_t
=
new
Tensor
(
kNumberTypeInt8
,
{
3
},
schema
::
Format_NHWC
,
schema
::
NodeType_Parameter
);
in_t
->
MallocData
();
int8_t
in
[]
=
{
1
,
1
,
1
};
memcpy
(
in_t
->
Data
(),
in
,
sizeof
(
int8_t
)
*
in_t
->
ElementsNum
());
QuantArg
*
in_quant_arg
=
new
QuantArg
();
in_quant_arg
->
zeroPoint
=
10
,
in_quant_arg
->
scale
=
0.31228156
;
in_t
->
AddQuantParam
(
*
in_quant_arg
);
inputs_
->
push_back
(
in_t
);
Tensor
*
out_t
=
new
Tensor
(
kNumberTypeInt8
,
{
7
},
schema
::
Format_NHWC
,
schema
::
NodeType_Parameter
);
out_t
->
MallocData
();
QuantArg
*
out_quant_arg
=
new
QuantArg
();
out_quant_arg
->
zeroPoint
=
10
,
out_quant_arg
->
scale
=
0.31228156
;
out_t
->
AddQuantParam
(
*
out_quant_arg
);
outputs_
->
push_back
(
out_t
);
*
correct
=
reinterpret_cast
<
int8_t
*>
(
malloc
(
out_t
->
ElementsNum
()
*
sizeof
(
int8_t
)));
int8_t
co
[]
=
{
10
,
10
,
1
,
1
,
1
,
10
,
10
};
memcpy
(
*
correct
,
co
,
out_t
->
ElementsNum
()
*
sizeof
(
int8_t
));
int
padding
[]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
2
,
2
};
memcpy
(
pad_param
->
paddings_
,
padding
,
MAX_PAD_SIZE
*
sizeof
(
int
));
pad_param
->
constant_value_
=
0
;
return
out_t
->
ElementsNum
();
}
TEST_F
(
TestPadInt8
,
PadInt8Test1
)
{
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs_
;
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs_
;
auto
pad_param
=
new
PadParameter
();
lite
::
Context
*
ctx
=
new
lite
::
Context
;
int8_t
*
correct
;
int
total_size
=
PadInt8TestInit1
(
&
inputs_
,
&
outputs_
,
pad_param
,
&
correct
);
kernel
::
PadInt8CPUKernel
*
pad
=
new
kernel
::
PadInt8CPUKernel
(
reinterpret_cast
<
OpParameter
*>
(
pad_param
),
inputs_
,
outputs_
,
ctx
);
pad
->
Init
();
pad
->
Run
();
CompareOutputData
(
reinterpret_cast
<
int8_t
*>
(
outputs_
[
0
]
->
Data
()),
correct
,
total_size
,
0
);
delete
pad_param
;
delete
pad
;
for
(
auto
t
:
inputs_
)
delete
t
;
for
(
auto
t
:
outputs_
)
delete
t
;
free
(
correct
);
}
int
PadInt8TestInit2
(
std
::
vector
<
Tensor
*>
*
inputs_
,
std
::
vector
<
Tensor
*>
*
outputs_
,
PadParameter
*
pad_param
,
int8_t
**
correct
)
{
Tensor
*
in_t
=
new
Tensor
(
kNumberTypeInt8
,
{
6
,
2
},
schema
::
Format_NHWC
,
schema
::
NodeType_Parameter
);
in_t
->
MallocData
();
int8_t
in
[]
=
{
18
,
71
,
99
,
-
6
,
5
,
-
119
,
86
,
13
,
15
,
-
85
,
-
41
,
-
77
};
memcpy
(
in_t
->
Data
(),
in
,
sizeof
(
int8_t
)
*
in_t
->
ElementsNum
());
QuantArg
*
in_quant_arg
=
new
QuantArg
();
in_quant_arg
->
zeroPoint
=
10
,
in_quant_arg
->
scale
=
0.31228156
;
in_t
->
AddQuantParam
(
*
in_quant_arg
);
inputs_
->
push_back
(
in_t
);
Tensor
*
out_t
=
new
Tensor
(
kNumberTypeInt8
,
{
10
,
5
},
schema
::
Format_NHWC
,
schema
::
NodeType_Parameter
);
out_t
->
MallocData
();
QuantArg
*
out_quant_arg
=
new
QuantArg
();
out_quant_arg
->
zeroPoint
=
10
,
out_quant_arg
->
scale
=
0.31228156
;
out_t
->
AddQuantParam
(
*
out_quant_arg
);
outputs_
->
push_back
(
out_t
);
*
correct
=
reinterpret_cast
<
int8_t
*>
(
malloc
(
out_t
->
ElementsNum
()
*
sizeof
(
int8_t
)));
int8_t
co
[]
=
{
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
18
,
71
,
10
,
10
,
10
,
99
,
-
6
,
10
,
10
,
10
,
5
,
-
119
,
10
,
10
,
10
,
86
,
13
,
10
,
10
,
10
,
15
,
-
85
,
10
,
10
,
10
,
-
41
,
-
77
,
10
,
10
,
10
,
10
,
10
,
10
,
10
};
memcpy
(
*
correct
,
co
,
out_t
->
ElementsNum
()
*
sizeof
(
int8_t
));
int
padding
[]
=
{
0
,
0
,
0
,
0
,
3
,
1
,
1
,
2
};
memcpy
(
pad_param
->
paddings_
,
padding
,
MAX_PAD_SIZE
*
sizeof
(
int
));
pad_param
->
constant_value_
=
0
;
return
out_t
->
ElementsNum
();
}
TEST_F
(
TestPadInt8
,
PadInt8Test2
)
{
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs_
;
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs_
;
auto
pad_param
=
new
PadParameter
();
lite
::
Context
*
ctx
=
new
lite
::
Context
;
int8_t
*
correct
;
int
total_size
=
PadInt8TestInit2
(
&
inputs_
,
&
outputs_
,
pad_param
,
&
correct
);
kernel
::
PadInt8CPUKernel
*
pad
=
new
kernel
::
PadInt8CPUKernel
(
reinterpret_cast
<
OpParameter
*>
(
pad_param
),
inputs_
,
outputs_
,
ctx
);
pad
->
Init
();
pad
->
Run
();
CompareOutputData
(
reinterpret_cast
<
int8_t
*>
(
outputs_
[
0
]
->
Data
()),
correct
,
total_size
,
0
);
delete
pad_param
;
delete
pad
;
for
(
auto
t
:
inputs_
)
delete
t
;
for
(
auto
t
:
outputs_
)
delete
t
;
free
(
correct
);
}
int
PadInt8TestInit4
(
std
::
vector
<
Tensor
*>
*
inputs_
,
std
::
vector
<
Tensor
*>
*
outputs_
,
PadParameter
*
pad_param
,
int8_t
**
correct
)
{
Tensor
*
in_t
=
new
Tensor
(
kNumberTypeInt8
,
{
2
,
3
,
2
,
1
},
schema
::
Format_NHWC
,
schema
::
NodeType_Parameter
);
in_t
->
MallocData
();
int8_t
in
[]
=
{
73
,
24
,
7
,
-
31
,
-
109
,
-
2
,
69
,
-
64
,
51
,
-
45
,
38
,
53
};
memcpy
(
in_t
->
Data
(),
in
,
sizeof
(
int8_t
)
*
in_t
->
ElementsNum
());
QuantArg
*
in_quant_arg
=
new
QuantArg
();
in_quant_arg
->
zeroPoint
=
10
,
in_quant_arg
->
scale
=
0.31228156
;
in_t
->
AddQuantParam
(
*
in_quant_arg
);
inputs_
->
push_back
(
in_t
);
Tensor
*
out_t
=
new
Tensor
(
kNumberTypeInt8
,
{
6
,
6
,
4
,
3
},
schema
::
Format_NHWC
,
schema
::
NodeType_Parameter
);
out_t
->
MallocData
();
QuantArg
*
out_quant_arg
=
new
QuantArg
();
out_quant_arg
->
zeroPoint
=
10
,
out_quant_arg
->
scale
=
0.31228156
;
out_t
->
AddQuantParam
(
*
out_quant_arg
);
outputs_
->
push_back
(
out_t
);
*
correct
=
reinterpret_cast
<
int8_t
*>
(
malloc
(
out_t
->
ElementsNum
()
*
sizeof
(
int8_t
)));
int8_t
co
[]
=
{
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
73
,
10
,
10
,
24
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
7
,
10
,
10
,
-
31
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
-
109
,
10
,
10
,
-
2
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
69
,
10
,
10
,
-
64
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
51
,
10
,
10
,
-
45
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
38
,
10
,
10
,
53
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
};
memcpy
(
*
correct
,
co
,
out_t
->
ElementsNum
()
*
sizeof
(
int8_t
));
int
padding
[]
=
{
3
,
1
,
1
,
2
,
2
,
0
,
1
,
1
};
memcpy
(
pad_param
->
paddings_
,
padding
,
MAX_PAD_SIZE
*
sizeof
(
int
));
pad_param
->
constant_value_
=
0
;
return
out_t
->
ElementsNum
();
}
TEST_F
(
TestPadInt8
,
PadInt8TestInit4
)
{
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs_
;
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs_
;
auto
pad_param
=
new
PadParameter
();
lite
::
Context
*
ctx
=
new
lite
::
Context
;
int8_t
*
correct
;
int
total_size
=
PadInt8TestInit2
(
&
inputs_
,
&
outputs_
,
pad_param
,
&
correct
);
kernel
::
PadInt8CPUKernel
*
pad
=
new
kernel
::
PadInt8CPUKernel
(
reinterpret_cast
<
OpParameter
*>
(
pad_param
),
inputs_
,
outputs_
,
ctx
);
pad
->
Init
();
pad
->
Run
();
CompareOutputData
(
reinterpret_cast
<
int8_t
*>
(
outputs_
[
0
]
->
Data
()),
correct
,
total_size
,
0
);
delete
pad_param
;
delete
pad
;
for
(
auto
t
:
inputs_
)
delete
t
;
for
(
auto
t
:
outputs_
)
delete
t
;
free
(
correct
);
}
}
// namespace mindspore
mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/conv1x1fp32_output1_nhwc.bin
浏览文件 @
8beb1b0f
无法预览此类型文件
mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconv_fp32_nchw_output1.bin
浏览文件 @
8beb1b0f
无法预览此类型文件
mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconv_fp32_nhwc_input1.bin
浏览文件 @
8beb1b0f
无法预览此类型文件
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录