Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
2301_77200941
mindspore
提交
6cfcdaab
M
mindspore
项目概览
2301_77200941
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
6cfcdaab
编写于
8月 21, 2020
作者:
L
ling
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[MS][LITE][Develop]conv1x1 int8
上级
80d570f0
变更
16
显示空白变更内容
内联
并排
Showing
16 changed file
with
839 addition
and
87 deletion
+839
-87
mindspore/lite/nnacl/int8/conv_int8.c
mindspore/lite/nnacl/int8/conv_int8.c
+20
-0
mindspore/lite/nnacl/int8/conv_int8.h
mindspore/lite/nnacl/int8/conv_int8.h
+7
-0
mindspore/lite/nnacl/int8/deconv.c
mindspore/lite/nnacl/int8/deconv.c
+1
-67
mindspore/lite/nnacl/int8/matmul_int8.c
mindspore/lite/nnacl/int8/matmul_int8.c
+45
-2
mindspore/lite/nnacl/int8/matmul_int8.h
mindspore/lite/nnacl/int8/matmul_int8.h
+6
-1
mindspore/lite/nnacl/matmul_parameter.h
mindspore/lite/nnacl/matmul_parameter.h
+5
-0
mindspore/lite/nnacl/opt_op_handler.c
mindspore/lite/nnacl/opt_op_handler.c
+8
-0
mindspore/lite/nnacl/pack.c
mindspore/lite/nnacl/pack.c
+106
-5
mindspore/lite/nnacl/pack.h
mindspore/lite/nnacl/pack.h
+6
-1
mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc
...pore/lite/src/runtime/kernel/arm/base/convolution_base.cc
+7
-6
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc
...spore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc
+1
-1
mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.cc
.../lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.cc
+270
-0
mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.h
...e/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.h
+68
-0
mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc
...pore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc
+4
-0
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/conv1x1_fp32_tests.cc
...test/ut/src/runtime/kernel/arm/fp32/conv1x1_fp32_tests.cc
+4
-4
mindspore/lite/test/ut/src/runtime/kernel/arm/int8/conv_1x1_int8_tests.cc
...est/ut/src/runtime/kernel/arm/int8/conv_1x1_int8_tests.cc
+281
-0
未找到文件。
mindspore/lite/nnacl/int8/conv_int8.c
浏览文件 @
6cfcdaab
...
...
@@ -367,6 +367,26 @@ void ConvInt8Opt(int8_t *input_data, int8_t *packed_input, int8_t *packed_weight
}
}
void
Conv1x1Int8
(
const
int8_t
*
packed_input
,
const
int8_t
*
packed_weight
,
int8_t
*
dst
,
const
int32_t
*
input_sum
,
const
int32_t
*
bias
,
int
row
,
int
col
,
int
deep16
,
ConvParameter
*
conv_param
,
MATMUL_OPT_R_FUNC
matmul_func
)
{
if
(
matmul_func
!=
NULL
)
{
matmul_func
(
packed_input
,
packed_weight
,
dst
,
row
,
col
,
deep16
,
conv_param
->
output_channel_
,
input_sum
,
bias
,
conv_param
->
conv_quant_arg_
.
left_shift_
,
conv_param
->
conv_quant_arg_
.
right_shift_
,
conv_param
->
conv_quant_arg_
.
quant_multiplier_
,
conv_param
->
conv_quant_arg_
.
output_quant_args_
[
0
].
zp_
,
conv_param
->
conv_quant_arg_
.
out_act_min_
[
0
],
conv_param
->
conv_quant_arg_
.
out_act_max_
[
0
],
(
conv_param
->
conv_quant_arg_
.
filter_arg_num_
>
1
));
}
else
{
MatMulInt8_16x4_r
(
packed_input
,
packed_weight
,
dst
,
row
,
col
,
deep16
,
conv_param
->
output_channel_
,
input_sum
,
bias
,
conv_param
->
conv_quant_arg_
.
left_shift_
,
conv_param
->
conv_quant_arg_
.
right_shift_
,
conv_param
->
conv_quant_arg_
.
quant_multiplier_
,
conv_param
->
conv_quant_arg_
.
output_quant_args_
[
0
].
zp_
,
conv_param
->
conv_quant_arg_
.
out_act_min_
[
0
],
conv_param
->
conv_quant_arg_
.
out_act_max_
[
0
],
(
conv_param
->
conv_quant_arg_
.
filter_arg_num_
>
1
));
}
return
;
}
// int8 convolution 3x3
void
Conv3x3Int8
(
int16_t
*
input_data
,
int16_t
*
transed_weight
,
const
int32_t
*
bias_data
,
int8_t
*
output_data
,
int16_t
*
tile_buffer
,
int16_t
*
block_unit_buffer
,
int32_t
*
tmp_dst_buffer
,
int8_t
*
tmp_out
,
...
...
mindspore/lite/nnacl/int8/conv_int8.h
浏览文件 @
6cfcdaab
...
...
@@ -25,6 +25,8 @@
#include "nnacl/conv_parameter.h"
#include "nnacl/winograd_utils.h"
#include "nnacl/quantization/quantize.h"
#include "nnacl/matmul_parameter.h"
#include "nnacl/int8/matmul_int8.h"
typedef
void
(
*
GEMM_FUNC
)(
int8_t
*
dst
,
const
int8_t
*
src
,
const
int8_t
*
weight
,
const
int32_t
*
bias
,
size_t
ksize
,
size_t
ic4
,
size_t
output_channel
,
size_t
offset
,
const
int32_t
*
input_sum
,
size_t
act_min
,
...
...
@@ -51,6 +53,11 @@ void ConvInt8Opt(int8_t *input_data, int8_t *packed_input, int8_t *packed_weight
int32_t
*
tmp_dst
,
int8_t
*
tmp_out
,
int8_t
*
output_data
,
int32_t
*
input_sum
,
int
task_id
,
ConvParameter
*
conv_param
,
GEMM_FUNC
gemm_func
);
// int8 convolution 1x1
void
Conv1x1Int8
(
const
int8_t
*
packed_input
,
const
int8_t
*
packed_weight
,
int8_t
*
dst
,
const
int32_t
*
input_sum
,
const
int32_t
*
bias
,
int
row
,
int
col
,
int
deep16
,
ConvParameter
*
conv_param
,
MATMUL_OPT_R_FUNC
matmul_func
);
// int8 convolution 3x3
void
Conv3x3Int8
(
int16_t
*
input_data
,
int16_t
*
transed_weight
,
const
int32_t
*
bias_data
,
int8_t
*
output_data
,
int16_t
*
tile_buffer
,
int16_t
*
block_unit_buffer
,
int32_t
*
tmp_dst_buffer
,
int8_t
*
tmp_out
,
...
...
mindspore/lite/nnacl/int8/deconv.c
浏览文件 @
6cfcdaab
...
...
@@ -172,73 +172,7 @@ void DeConvPackWeightSum(int8_t *weight, int32_t *weight_sum, int32_t input_zp,
void
DeConvPackInputSum
(
const
int8_t
*
src
,
int32_t
*
dst
,
int32_t
filter_zp
,
size_t
row4
,
size_t
col16
,
bool
suppport_opt
)
{
/* optimize normal -> same layout */
#ifdef ENABLE_ARM64
asm
volatile
(
"mov x10, %[src]
\n
"
"mov x11, %[dst]
\n
"
"dup v15.4s, %w[filter_zp]
\n
"
"mov x0, #0
\n
"
"1:
\n
"
"cmp x0, %[row4]
\n
"
"beq 4f
\n
"
"add x0, x0, #4
\n
"
"dup v10.4s, wzr
\n
"
"mov x2, #0
\n
"
"2:
\n
"
"cmp x2, %[col16]
\n
"
"beq 3f
\n
"
"add x2, x2, #16
\n
"
"ld1 {v0.16b}, [x10], #16
\n
"
"ld1 {v1.16b}, [x10], #16
\n
"
"ld1 {v2.16b}, [x10], #16
\n
"
"ld1 {v3.16b}, [x10], #16
\n
"
"saddlp v4.8h, v0.16b
\n
"
"saddlp v5.8h, v1.16b
\n
"
"saddlp v6.8h, v2.16b
\n
"
"saddlp v7.8h, v3.16b
\n
"
"saddlp v0.4S, v4.8h
\n
"
"saddlp v1.4S, v5.8h
\n
"
"saddlp v2.4S, v6.8h
\n
"
"saddlp v3.4S, v7.8h
\n
"
"addv s4, v0.4S
\n
"
"addv s5, v1.4S
\n
"
"addv s6, v2.4S
\n
"
"addv s7, v3.4S
\n
"
"mov v0.s[0], v4.s[0]
\n
"
"mov v0.s[1], v5.s[0]
\n
"
"mov v0.s[2], v6.s[0]
\n
"
"mov v0.s[3], v7.s[0]
\n
"
"add v10.4s, v10.4s, v0.4s
\n
"
"b 2b
\n
"
"3:
\n
"
"mul v10.4s, v10.4s, v15.4s
\n
"
"st1 {v10.4s}, [x11], #16
\n
"
"beq 1b
\n
"
"4:
\n
"
:
:
[
dst
]
"r"
(
dst
),
[
src
]
"r"
(
src
),
[
row4
]
"r"
(
row4
),
[
col16
]
"r"
(
col16
),
[
filter_zp
]
"r"
(
filter_zp
)
:
"x0"
,
"x1"
,
"x2"
,
"x3"
,
"x10"
,
"x11"
,
"v0"
,
"v1"
,
"v2"
,
"v3"
,
"v4"
,
"v5"
,
"v6"
,
"v7"
,
"v10"
,
"v15"
);
#else
for
(
int
r
=
0
;
r
<
row4
;
r
++
)
{
int32_t
tmp_value
=
0
;
for
(
int
c
=
0
;
c
<
col16
;
c
++
)
{
int
r4div
=
r
/
C4NUM
,
r4mod
=
r
%
C4NUM
,
c16div
=
c
/
C16NUM
,
c16mod
=
c
%
C16NUM
;
int
src_index
=
r4div
*
C4NUM
*
col16
+
c16div
*
C16NUM
*
C4NUM
+
r4mod
*
C16NUM
+
c16mod
;
tmp_value
+=
src
[
src_index
];
}
}
#endif
PackInputSum16x4PerLater
(
src
,
dst
,
filter_zp
,
row4
,
col16
);
return
;
}
...
...
mindspore/lite/nnacl/int8/matmul_int8.c
浏览文件 @
6cfcdaab
...
...
@@ -28,6 +28,19 @@ void RowMajor2Row8MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col)
}
}
void
RowMajor2Row4x16MajorInt8
(
int8_t
*
src_ptr
,
int8_t
*
dst_ptr
,
int
row
,
int
col
)
{
int
col16
=
UP_ROUND
(
col
,
C16NUM
);
for
(
int
r
=
0
;
r
<
row
;
r
++
)
{
int
rd4
=
r
/
C4NUM
;
int
rm4
=
r
%
C4NUM
;
for
(
int
c
=
0
;
c
<
col
;
c
++
)
{
int
cd16
=
c
/
C16NUM
;
int
cm16
=
c
%
C16NUM
;
dst_ptr
[
cd16
*
col16
*
C4NUM
+
rd4
*
C4NUM
*
C16NUM
+
rm4
*
C16NUM
+
cm16
]
=
src_ptr
[
r
*
col16
+
c
];
}
}
}
void
MatrixPack4x16UnitInt8
(
int8_t
*
src
,
int8_t
*
dst
,
int
row
,
int
col
,
int
stride
)
{
for
(
int
r
=
0
;
r
<
row
;
r
++
)
{
int8_t
*
src_r
=
src
+
r
*
stride
;
...
...
@@ -145,7 +158,38 @@ void MatMulInt8_16x4(const int8_t *a, const int8_t *b, int *dst, int row_4, int
return
;
}
#ifdef ENABLE_ARM64
void
MatMulInt8_16x4_r
(
const
int8_t
*
a
,
const
int8_t
*
b
,
int8_t
*
dst
,
size_t
row
,
size_t
col
,
size_t
deep_16
,
size_t
stride
,
const
int32_t
*
input_sum
,
const
int32_t
*
bias
,
int32_t
*
left_shift
,
int32_t
*
right_shift
,
int32_t
*
multiplier
,
int32_t
output_zp
,
int32_t
mini
,
int32_t
maxi
,
bool
per_channel
)
{
/* row4x16-major * row16x4-major => (int8)row-major : per-channel */
for
(
int
r
=
0
;
r
<
row
;
r
++
)
{
for
(
int
c
=
0
;
c
<
col
;
c
++
)
{
int
r4div
=
r
/
C4NUM
,
r4mod
=
r
%
C4NUM
;
int
c4div
=
c
/
C4NUM
,
c4mod
=
c
%
C4NUM
;
size_t
ci
=
r
*
stride
+
c
;
int32_t
value
=
0
;
for
(
int
d
=
0
;
d
<
deep_16
;
d
++
)
{
int
d16div
=
d
/
C16NUM
,
d16mod
=
d
%
C16NUM
;
size_t
ai
=
r4div
*
deep_16
*
C4NUM
+
d16div
*
C4NUM
*
C16NUM
+
r4mod
*
C16NUM
+
d16mod
;
size_t
bi
=
c4div
*
deep_16
*
C4NUM
+
d16div
*
C4NUM
*
C16NUM
+
c4mod
*
C16NUM
+
d16mod
;
value
=
value
+
a
[
ai
]
*
b
[
bi
];
}
int32_t
cur_input_sum
=
per_channel
?
input_sum
[
c4div
*
UP_ROUND
(
row
,
C4NUM
)
+
r
*
C4NUM
+
c4mod
]
:
input_sum
[
r
];
value
-=
cur_input_sum
;
value
+=
bias
[
c
];
int32_t
cur_left_shift
=
per_channel
?
left_shift
[
c
]
:
left_shift
[
0
];
int32_t
cur_right_shift
=
per_channel
?
right_shift
[
c
]
:
right_shift
[
0
];
int32_t
cur_multiplier
=
per_channel
?
multiplier
[
c
]
:
multiplier
[
0
];
value
=
MultiplyByQuantizedMultiplier
(
value
,
cur_multiplier
,
cur_left_shift
,
cur_right_shift
)
+
output_zp
;
value
=
MSMIN
(
maxi
,
value
);
value
=
MSMAX
(
mini
,
value
);
dst
[
ci
]
=
(
int8_t
)
value
;
}
}
return
;
}
void
RowMajor2Row4x16Major
(
int8_t
*
src
,
int
row
,
int
col
,
int8_t
*
dst
,
int
col_16
)
{
int
stride
=
sizeof
(
int8_t
)
*
16
*
4
;
for
(
int
r
=
0
;
r
<
row
;
++
r
)
{
...
...
@@ -201,4 +245,3 @@ void Row4x4Major2RowMajor(int8_t *src, int row4, int8_t *dst, int row, int cow)
}
}
}
#endif
mindspore/lite/nnacl/int8/matmul_int8.h
浏览文件 @
6cfcdaab
...
...
@@ -28,17 +28,22 @@ void MatMulInt8(const int8_t *a, const int8_t *b, int *c, const int row8, const
const
int
a_zp
,
const
int
b_zp
);
void
MatMulInt8_16x4
(
const
int8_t
*
a
,
const
int8_t
*
b
,
int
*
dst
,
int
row_4
,
int
col_4
,
int
deep_16
,
const
int
*
input_sum
,
const
int
*
bias
);
void
MatMulInt8_16x4_r
(
const
int8_t
*
a
,
const
int8_t
*
b
,
int8_t
*
dst
,
size_t
row
,
size_t
col
,
size_t
deep_16
,
size_t
stride
,
const
int32_t
*
input_sum
,
const
int32_t
*
bias
,
int32_t
*
left_shift
,
int32_t
*
right_shift
,
int32_t
*
multiplier
,
int32_t
output_zp
,
int32_t
mini
,
int32_t
maxi
,
bool
per_channel
);
void
RowMajor2Row8MajorInt8
(
int8_t
*
src_ptr
,
int8_t
*
dst_ptr
,
int
row
,
int
col
);
void
RowMajor2Row4x16MajorInt8
(
int8_t
*
src_ptr
,
int8_t
*
dst_ptr
,
int
row
,
int
col
);
void
RowMajor2Col8MajorInt8
(
int8_t
*
src_ptr
,
int8_t
*
dst_ptr
,
int
row
,
int
col
);
void
RowMajor2Row16x4MajorInt8
(
void
*
src_ptr
,
void
*
dst_ptr
,
int
row
,
int
col
);
#ifdef ENABLE_ARM64
void
RowMajor2Row4x16Major
(
int8_t
*
src
,
int
row
,
int
col
,
int8_t
*
dst
,
int
col_16
);
void
RowMajor2Col16x4Major
(
int8_t
*
src
,
int
row
,
int
col
,
int8_t
*
dst
,
int
row_16
);
void
RowMajor2Asums
(
int8_t
*
a
,
int
row
,
int
col
,
int
b_zp
,
int
*
dst
);
void
RowMajor2Bbias
(
int8_t
*
b
,
int
row
,
int
col
,
int
a_zp
,
int
b_zp
,
int
*
bias
,
int
*
dst
);
void
Row4x4Major2RowMajor
(
int8_t
*
src
,
int
row4
,
int8_t
*
dst
,
int
row
,
int
cow
);
#ifdef ENABLE_ARM64
// bias = bias + depth * a_zp * b_zp - a_zp * b_sums
void
MatmulInt8Neon64
(
const
int8_t
*
a
,
const
int8_t
*
b
,
int8_t
*
dst
,
int
row4
,
int
col4
,
int
deep16
,
const
int
*
a_sums
,
const
int
*
bias
,
int
act_min
,
int
act_max
,
int
out_zp
,
int
multiplier
,
int
left_shift
,
...
...
mindspore/lite/nnacl/matmul_parameter.h
浏览文件 @
6cfcdaab
...
...
@@ -22,6 +22,11 @@
typedef
void
(
*
MATMUL_OPT_R4_FUNC
)(
const
int8_t
*
a
,
const
int8_t
*
b
,
int
*
dst
,
int
row_4
,
int
col_4
,
int
deep_16
,
const
int
*
input_sum
,
const
int
*
bias
);
typedef
void
(
*
MATMUL_OPT_R_FUNC
)(
const
int8_t
*
a
,
const
int8_t
*
b
,
int8_t
*
dst
,
size_t
row
,
size_t
col
,
size_t
deep_16
,
size_t
stride
,
const
int32_t
*
input_sum
,
const
int32_t
*
bias
,
int32_t
*
left_shift
,
int32_t
*
right_shift
,
int32_t
*
multiplier
,
int32_t
output_zp
,
int32_t
mini
,
int32_t
maxi
,
bool
per_channel
);
typedef
void
(
*
MAT_TRANS_FUNC
)(
void
*
dst
,
void
*
a
,
int
row
,
int
col
);
typedef
enum
ActType
{
ActType_No
,
ActType_Relu
,
ActType_Relu6
}
ActType
;
...
...
mindspore/lite/nnacl/opt_op_handler.c
浏览文件 @
6cfcdaab
...
...
@@ -15,6 +15,7 @@
*/
#include <stdlib.h>
#include <stdbool.h>
#ifdef __cplusplus
extern
"C"
{
...
...
@@ -45,4 +46,11 @@ void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, i
const
int
*
input_sum
,
const
int
*
bias
)
{
return
MatMulOptR4Int8Neon64
(
a
,
b
,
dst
,
row4
,
col4
,
deep16
,
input_sum
,
bias
);
}
void
MatMulRInt8_optimize_handler
(
const
int8_t
*
a
,
const
int8_t
*
b
,
int8_t
*
dst
,
size_t
row
,
size_t
col
,
size_t
deep_16
,
size_t
stride
,
const
int32_t
*
input_sum
,
const
int32_t
*
bias
,
int32_t
*
left_shift
,
int32_t
*
right_shift
,
int32_t
*
multiplier
,
int32_t
output_zp
,
int32_t
mini
,
int32_t
maxi
,
bool
per_channel
)
{
return
;
}
#endif
mindspore/lite/nnacl/pack.c
浏览文件 @
6cfcdaab
...
...
@@ -153,22 +153,24 @@ void PackWeightInt8Opt(int8_t *weight_data, ConvParameter *conv_param, int8_t *p
}
// kernel plane loop
}
void
Conv1x1InputPack
Fp32
(
const
float
*
src
,
float
*
dst
,
ConvParameter
*
conv_param
)
{
void
Conv1x1InputPack
(
const
void
*
src_ptr
,
void
*
dst_ptr
,
ConvParameter
*
conv_param
,
int
data_size
)
{
/* support nhwc */
char
*
src
=
(
char
*
)
src_ptr
;
char
*
dst
=
(
char
*
)
dst_ptr
;
for
(
int
dst_h
=
0
;
dst_h
<
conv_param
->
output_h_
;
dst_h
++
)
{
int
src_h
=
dst_h
*
conv_param
->
stride_h_
-
conv_param
->
pad_h_
;
if
(
src_h
<
0
||
src_h
>=
conv_param
->
input_h_
)
{
continue
;
}
const
float
*
src_h_ptr
=
src
+
src_h
*
conv_param
->
input_w_
*
conv_param
->
input_channel_
;
float
*
dst_h_ptr
=
dst
+
dst_h
*
conv_param
->
output_w_
*
conv_param
->
input_channel_
;
const
char
*
src_h_ptr
=
src
+
src_h
*
conv_param
->
input_w_
*
conv_param
->
input_channel_
*
data_size
;
char
*
dst_h_ptr
=
dst
+
dst_h
*
conv_param
->
output_w_
*
conv_param
->
input_channel_
*
data_size
;
for
(
int
dst_w
=
0
;
dst_w
<
conv_param
->
output_w_
;
dst_w
++
)
{
int
src_w
=
dst_w
*
conv_param
->
stride_w_
-
conv_param
->
pad_w_
;
if
(
src_w
<
0
||
src_w
>=
conv_param
->
input_w_
)
{
continue
;
}
memcpy
(
dst_h_ptr
+
dst_w
*
conv_param
->
input_channel_
,
src_h_ptr
+
src_w
*
conv_param
->
input_channel_
,
conv_param
->
input_channel_
*
sizeof
(
float
)
);
memcpy
(
dst_h_ptr
+
dst_w
*
conv_param
->
input_channel_
*
data_size
,
src_h_ptr
+
src_w
*
conv_param
->
input_channel_
*
data_size
,
conv_param
->
input_channel_
*
data_size
);
}
}
return
;
...
...
@@ -188,6 +190,105 @@ void Pack1x1WeightFp32(const float *weight_data, float *packed_weight, ConvParam
return
;
}
void
PackInputSum16x4PerLater
(
const
int8_t
*
src
,
int32_t
*
dst
,
int32_t
filter_zp
,
size_t
row4
,
size_t
col16
)
{
/* optimize normal -> same layout */
#ifdef ENABLE_ARM64
asm
volatile
(
"mov x10, %[src]
\n
"
"mov x11, %[dst]
\n
"
"dup v15.4s, %w[filter_zp]
\n
"
"mov x0, #0
\n
"
"1:
\n
"
"cmp x0, %[row4]
\n
"
"beq 4f
\n
"
"add x0, x0, #4
\n
"
"dup v10.4s, wzr
\n
"
"mov x2, #0
\n
"
"2:
\n
"
"cmp x2, %[col16]
\n
"
"beq 3f
\n
"
"add x2, x2, #16
\n
"
"ld1 {v0.16b}, [x10], #16
\n
"
"ld1 {v1.16b}, [x10], #16
\n
"
"ld1 {v2.16b}, [x10], #16
\n
"
"ld1 {v3.16b}, [x10], #16
\n
"
"saddlp v4.8h, v0.16b
\n
"
"saddlp v5.8h, v1.16b
\n
"
"saddlp v6.8h, v2.16b
\n
"
"saddlp v7.8h, v3.16b
\n
"
"saddlp v0.4S, v4.8h
\n
"
"saddlp v1.4S, v5.8h
\n
"
"saddlp v2.4S, v6.8h
\n
"
"saddlp v3.4S, v7.8h
\n
"
"addv s4, v0.4S
\n
"
"addv s5, v1.4S
\n
"
"addv s6, v2.4S
\n
"
"addv s7, v3.4S
\n
"
"mov v0.s[0], v4.s[0]
\n
"
"mov v0.s[1], v5.s[0]
\n
"
"mov v0.s[2], v6.s[0]
\n
"
"mov v0.s[3], v7.s[0]
\n
"
"add v10.4s, v10.4s, v0.4s
\n
"
"b 2b
\n
"
"3:
\n
"
"mul v10.4s, v10.4s, v15.4s
\n
"
"st1 {v10.4s}, [x11], #16
\n
"
"beq 1b
\n
"
"4:
\n
"
:
:
[
dst
]
"r"
(
dst
),
[
src
]
"r"
(
src
),
[
row4
]
"r"
(
row4
),
[
col16
]
"r"
(
col16
),
[
filter_zp
]
"r"
(
filter_zp
)
:
"x0"
,
"x1"
,
"x2"
,
"x3"
,
"x10"
,
"x11"
,
"v0"
,
"v1"
,
"v2"
,
"v3"
,
"v4"
,
"v5"
,
"v6"
,
"v7"
,
"v10"
,
"v15"
);
#else
for
(
int
r
=
0
;
r
<
row4
;
r
++
)
{
int32_t
tmp_value
=
0
;
for
(
int
c
=
0
;
c
<
col16
;
c
++
)
{
int
r4div
=
r
/
C4NUM
,
r4mod
=
r
%
C4NUM
,
c16div
=
c
/
C16NUM
,
c16mod
=
c
%
C16NUM
;
int
src_index
=
r4div
*
C4NUM
*
col16
+
c16div
*
C16NUM
*
C4NUM
+
r4mod
*
C16NUM
+
c16mod
;
tmp_value
+=
src
[
src_index
];
}
dst
[
r
]
=
tmp_value
*
filter_zp
;
}
#endif
return
;
}
void
PackInputSum16x4Int8
(
int8_t
*
input_value
,
int32_t
*
input_sum
,
size_t
input_channel
,
size_t
output_channel
,
size_t
plane_size
,
ConvParameter
*
conv_param
)
{
size_t
hw4
=
UP_ROUND
(
plane_size
,
C4NUM
);
size_t
ic16
=
UP_ROUND
(
input_channel
,
C16NUM
);
if
(
conv_param
->
conv_quant_arg_
.
filter_arg_num_
==
1
)
{
PackInputSum16x4PerLater
(
input_value
,
input_sum
,
conv_param
->
conv_quant_arg_
.
filter_quant_args_
[
0
].
zp_
,
hw4
,
ic16
);
}
else
{
for
(
int
ri
=
0
;
ri
<
plane_size
;
ri
++
)
{
int
ri4div
=
ri
/
C4NUM
,
ri4mod
=
ri
%
C4NUM
;
for
(
int
ci
=
0
;
ci
<
output_channel
;
ci
++
)
{
int32_t
tmp_sum_value
=
0
;
int
ci4div
=
ci
/
C4NUM
,
ci4mod
=
ci
%
C4NUM
;
int32_t
filter_zp
=
conv_param
->
conv_quant_arg_
.
filter_quant_args_
[
ci
].
zp_
;
for
(
int
di
=
0
;
di
<
input_channel
;
di
++
)
{
size_t
di16div
=
di
/
C16NUM
,
di16mod
=
di
%
C16NUM
;
int
src_index
=
ri4div
*
C4NUM
*
ic16
+
di16div
*
C16NUM
*
C4NUM
+
ri4mod
*
C16NUM
+
di16mod
;
tmp_sum_value
+=
input_value
[
src_index
];
}
int
dst_index
=
ci4div
*
C4NUM
*
hw4
+
ri
*
C4NUM
+
ci4mod
;
input_sum
[
dst_index
]
=
tmp_sum_value
*
filter_zp
;
}
}
}
return
;
}
void
Im2ColPackUnitFp32
(
const
float
*
input_data
,
ConvParameter
*
conv_param
,
float
*
packed_input
,
int
real_cal_num
,
int
block_index
)
{
// input format : nhwc
...
...
mindspore/lite/nnacl/pack.h
浏览文件 @
6cfcdaab
...
...
@@ -35,10 +35,15 @@ void Im2ColPackUnitInt8(const int8_t *input_data, int8_t *packed_input, int real
void
Im2ColPackUnitInt8Opt
(
const
int8_t
*
input_data
,
int8_t
*
packed_input
,
int
real_cal_num
,
int
block_index
,
int32_t
*
input_sum
,
ConvParameter
*
conv_param
);
void
Conv1x1InputPackFp32
(
const
float
*
src
,
float
*
dst
,
ConvParameter
*
conv_param
);
void
PackInputSum16x4PerLater
(
const
int8_t
*
src
,
int32_t
*
dst
,
int32_t
filter_zp
,
size_t
row4
,
size_t
col16
);
void
Conv1x1InputPack
(
const
void
*
src_ptr
,
void
*
dst_ptr
,
ConvParameter
*
conv_param
,
int
data_size
);
void
Pack1x1WeightFp32
(
const
float
*
weight_data
,
float
*
packed_weight
,
ConvParameter
*
conv_param
);
void
PackInputSum16x4Int8
(
int8_t
*
input_value
,
int32_t
*
input_sum
,
size_t
input_channel
,
size_t
output_channel
,
size_t
plane_size
,
ConvParameter
*
conv_param
);
void
MatrixPack
(
const
float
*
src
,
float
*
dst
,
int
row
,
int
ic4
,
int
stride
);
void
PackInputToC8Int8
(
const
int8_t
*
input_data
,
int16_t
*
packed_input
,
ConvParameter
*
conv_param
);
...
...
mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc
浏览文件 @
6cfcdaab
...
...
@@ -118,10 +118,13 @@ int ConvolutionBaseCPUKernel::CheckLayout(lite::tensor::Tensor *input_tensor) {
}
int
ConvolutionBaseCPUKernel
::
SetIfPerChannel
()
{
auto
filter_tensor
=
in_tensors_
.
at
(
kWeightIndex
);
auto
input_channel
=
filter_tensor
->
Channel
();
auto
output_channel
=
filter_tensor
->
Batch
();
uint8_t
per_channel
=
0b0
;
if
(
conv_quant_arg_
->
input_arg_num_
!=
kPerTensor
)
{
int
in_channel
=
conv_param_
->
input_channel_
;
if
(
static_cast
<
int
>
(
conv_quant_arg_
->
input_arg_num_
)
!=
in_channel
)
{
if
(
static_cast
<
int
>
(
conv_quant_arg_
->
input_arg_num_
)
!=
input_channel
)
{
MS_LOG
(
ERROR
)
<<
"input per channel quant param length is not equal to input channel."
;
return
RET_ERROR
;
}
...
...
@@ -129,8 +132,7 @@ int ConvolutionBaseCPUKernel::SetIfPerChannel() {
}
if
(
conv_quant_arg_
->
filter_arg_num_
!=
kPerTensor
)
{
int
filter_num
=
conv_param_
->
output_channel_
;
if
(
static_cast
<
int
>
(
conv_quant_arg_
->
filter_arg_num_
)
!=
filter_num
)
{
if
(
static_cast
<
int
>
(
conv_quant_arg_
->
filter_arg_num_
)
!=
output_channel
)
{
MS_LOG
(
ERROR
)
<<
"weight per channel quant param length is not equal to filter num."
;
return
RET_ERROR
;
}
...
...
@@ -138,8 +140,7 @@ int ConvolutionBaseCPUKernel::SetIfPerChannel() {
}
if
(
conv_quant_arg_
->
output_arg_num_
!=
kPerTensor
)
{
int
out_channel
=
conv_param_
->
output_channel_
;
if
(
static_cast
<
int
>
(
conv_quant_arg_
->
output_arg_num_
)
!=
out_channel
)
{
if
(
static_cast
<
int
>
(
conv_quant_arg_
->
output_arg_num_
)
!=
output_channel
)
{
MS_LOG
(
ERROR
)
<<
"output per channel quant param length is not equal to output channel."
;
return
RET_ERROR
;
}
...
...
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc
浏览文件 @
6cfcdaab
...
...
@@ -113,7 +113,7 @@ void Convolution1x1CPUKernel::Pre1x1Trans(float *src_input, float *src_output) {
output_ptr_
=
src_output
;
if
(
pre_trans_input_
)
{
Conv1x1InputPack
Fp32
(
src_input
,
input_ptr_
,
conv_param_
);
Conv1x1InputPack
(
src_input
,
input_ptr_
,
conv_param_
,
sizeof
(
float
)
);
}
else
{
input_ptr_
=
src_input
;
}
...
...
mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.cc
0 → 100644
浏览文件 @
6cfcdaab
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/kernel/arm/int8/convolution_1x1_int8.h"
#include "src/runtime/runtime_api.h"
using
mindspore
::
lite
::
RET_ERROR
;
using
mindspore
::
lite
::
RET_MEMORY_FAILED
;
using
mindspore
::
lite
::
RET_OK
;
namespace
mindspore
::
kernel
{
Convolution1x1Int8CPUKernel
::~
Convolution1x1Int8CPUKernel
()
{
if
(
matmul_param_
!=
nullptr
)
{
delete
matmul_param_
;
matmul_param_
=
nullptr
;
}
if
(
packed_weight_
!=
nullptr
)
{
delete
packed_weight_
;
packed_weight_
=
nullptr
;
}
FreeResizeBuf
();
FreeQuantParam
();
}
void
Convolution1x1Int8CPUKernel
::
FreeResizeBuf
()
{
if
(
packed_input_
!=
nullptr
)
{
free
(
packed_input_
);
packed_input_
=
nullptr
;
}
if
(
input_sum_
!=
nullptr
)
{
free
(
input_sum_
);
input_sum_
=
nullptr
;
}
return
;
}
void
Convolution1x1Int8CPUKernel
::
CheckSupportOptimize
()
{
support_optimize_
=
false
;
matmul_func_
=
MatMulInt8_16x4_r
;
#ifdef ENABLE_ARM64
void
*
optimize_op_handler
=
OptimizeModule
::
GetInstance
()
->
optimized_op_handler_
;
if
(
optimize_op_handler
!=
nullptr
)
{
dlerror
();
*
(
reinterpret_cast
<
void
**>
(
&
matmul_func_
))
=
dlsym
(
optimize_op_handler
,
"MatMulRInt8_optimize_handler"
);
auto
dlopen_error
=
dlerror
();
if
(
dlopen_error
!=
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"load matmul func failed! "
<<
dlopen_error
<<
"."
;
support_optimize_
=
false
;
matmul_func_
=
nullptr
;
}
else
{
support_optimize_
=
true
;
}
}
else
{
support_optimize_
=
false
;
matmul_func_
=
nullptr
;
}
#endif
matmul_func_
=
MatMulInt8_16x4_r
;
return
;
}
int
Convolution1x1Int8CPUKernel
::
InitWeightBias
()
{
auto
filter_tensor
=
in_tensors_
.
at
(
kWeightIndex
);
auto
input_channel
=
filter_tensor
->
Channel
();
auto
output_channel
=
filter_tensor
->
Batch
();
/* weight */
size_t
size
=
UP_ROUND
(
input_channel
,
C16NUM
)
*
UP_ROUND
(
output_channel
,
C4NUM
)
*
sizeof
(
int8_t
);
packed_weight_
=
reinterpret_cast
<
int8_t
*>
(
malloc
(
size
));
if
(
packed_weight_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Conv1x1 int8 Malloc weight error!"
;
return
RET_ERROR
;
}
memset
(
packed_weight_
,
0
,
size
);
RowMajor2Row4x16MajorInt8
(
reinterpret_cast
<
int8_t
*>
(
filter_tensor
->
Data
()),
packed_weight_
,
output_channel
,
input_channel
);
/* bias = bias - v2 x zp1 + zp1 x zp2 */
int
col4
=
UP_ROUND
(
output_channel
,
C4NUM
);
bias_data_
=
malloc
(
col4
*
sizeof
(
int32_t
));
if
(
bias_data_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Conv1x1 int8 Malloc bias_ptr_ error!"
;
return
RET_ERROR
;
}
memset
(
bias_data_
,
0
,
col4
*
sizeof
(
int32_t
));
if
(
in_tensors_
.
size
()
==
3
)
{
memcpy
(
bias_data_
,
in_tensors_
[
kBiasIndex
]
->
Data
(),
output_channel
*
sizeof
(
int32_t
));
}
int32_t
*
bias_data
=
reinterpret_cast
<
int32_t
*>
(
bias_data_
);
int8_t
*
weight
=
reinterpret_cast
<
int8_t
*>
(
filter_tensor
->
Data
());
int32_t
input_zp
=
conv_param_
->
conv_quant_arg_
.
input_quant_args_
[
0
].
zp_
;
for
(
int
oc
=
0
;
oc
<
output_channel
;
oc
++
)
{
int32_t
weight_sum_value
=
0
;
int32_t
filter_zp
=
(
conv_param_
->
conv_quant_arg_
.
filter_arg_num_
==
1
)
?
conv_param_
->
conv_quant_arg_
.
filter_quant_args_
[
0
].
zp_
:
conv_param_
->
conv_quant_arg_
.
filter_quant_args_
[
oc
].
zp_
;
for
(
int
ic
=
0
;
ic
<
input_channel
;
ic
++
)
{
weight_sum_value
+=
weight
[
oc
*
input_channel
+
ic
];
}
bias_data
[
oc
]
+=
filter_zp
*
input_zp
*
input_channel
-
weight_sum_value
*
input_zp
;
}
return
RET_OK
;
}
int
Convolution1x1Int8CPUKernel
::
Init
()
{
if
(
!
InferShapeDone
())
{
return
RET_OK
;
}
matmul_param_
=
new
(
std
::
nothrow
)
MatMulParameter
();
if
(
matmul_param_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Init matmul_param_ failed."
;
return
RET_ERROR
;
}
CheckSupportOptimize
();
auto
ret
=
SetQuantParam
();
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Set quant param failed."
;
return
ret
;
}
ret
=
InitWeightBias
();
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Init weight bias failed."
;
return
ret
;
}
return
ReSize
();
}
int
Convolution1x1Int8CPUKernel
::
InitParam
()
{
pre_trans_input_
=
(
conv_param_
->
pad_h_
!=
0
||
conv_param_
->
pad_w_
!=
0
||
conv_param_
->
stride_h_
!=
1
||
conv_param_
->
stride_w_
!=
1
);
matmul_param_
->
row_
=
conv_param_
->
output_h_
*
conv_param_
->
output_w_
;
matmul_param_
->
deep_
=
conv_param_
->
input_channel_
;
matmul_param_
->
col_
=
conv_param_
->
output_channel_
;
thread_count_
=
MSMIN
(
op_parameter_
->
thread_num_
,
UP_DIV
(
matmul_param_
->
col_
,
C4NUM
));
thread_stride_
=
UP_DIV
(
UP_DIV
(
matmul_param_
->
col_
,
C4NUM
),
thread_count_
);
size_t
size
=
UP_ROUND
(
matmul_param_
->
row_
,
C4NUM
)
*
UP_ROUND
(
matmul_param_
->
deep_
,
C16NUM
);
packed_input_
=
reinterpret_cast
<
int8_t
*>
(
malloc
(
size
*
sizeof
(
int8_t
)));
if
(
packed_input_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"conv1x1 int8 Malloc packed_input_ error!"
;
return
RET_ERROR
;
}
memset
(
packed_input_
,
0
,
size
*
sizeof
(
int8_t
));
if
(
conv_quant_arg_
->
per_channel_
&
FILTER_PER_CHANNEL
)
{
size
=
UP_ROUND
(
conv_param_
->
output_channel_
,
C4NUM
)
*
UP_ROUND
(
matmul_param_
->
row_
,
C4NUM
);
}
else
{
size
=
UP_ROUND
(
matmul_param_
->
row_
,
C4NUM
);
}
input_sum_
=
reinterpret_cast
<
int32_t
*>
(
malloc
(
size
*
sizeof
(
int32_t
)));
if
(
input_sum_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"malloc input_sum_ failed."
;
return
RET_ERROR
;
}
memset
(
input_sum_
,
0
,
size
*
sizeof
(
int32_t
));
return
RET_OK
;
}
int
Convolution1x1Int8CPUKernel
::
ReSize
()
{
FreeResizeBuf
();
ConvolutionBaseCPUKernel
::
Init
();
int
error_code
=
InitParam
();
if
(
error_code
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Convolution base init failed."
;
return
error_code
;
}
return
RET_OK
;
}
void
Convolution1x1Int8CPUKernel
::
Pre1x1Trans
(
int8_t
*
src_input
,
int8_t
*
src_output
)
{
output_ptr_
=
src_output
;
if
(
pre_trans_input_
)
{
Conv1x1InputPack
(
src_input
,
input_ptr_
,
conv_param_
,
sizeof
(
int8_t
));
}
else
{
input_ptr_
=
src_input
;
}
RowMajor2Row16x4MajorInt8
(
input_ptr_
,
packed_input_
,
matmul_param_
->
row_
,
matmul_param_
->
deep_
);
return
;
}
int
Convolution1x1Int8CPUKernel
::
RunImpl
(
int
task_id
)
{
int
cur_oc
=
MSMIN
(
thread_stride_
*
C4NUM
,
matmul_param_
->
col_
-
task_id
*
thread_stride_
*
C4NUM
);
if
(
cur_oc
<=
0
)
{
return
RET_OK
;
}
int32_t
*
bias
=
reinterpret_cast
<
int32_t
*>
(
bias_data_
)
+
thread_stride_
*
C4NUM
*
task_id
;
Conv1x1Int8
(
packed_input_
,
packed_weight_
+
task_id
*
thread_stride_
*
C4NUM
*
matmul_param_
->
deep_
,
output_ptr_
+
task_id
*
thread_stride_
*
C4NUM
,
input_sum_
,
bias
+
task_id
*
thread_stride_
*
C4NUM
,
matmul_param_
->
row_
,
cur_oc
,
UP_ROUND
(
matmul_param_
->
deep_
,
C16NUM
),
conv_param_
,
matmul_func_
);
return
RET_OK
;
}
int
Convolution1x1Int8Impl
(
int
task_id
,
LiteParallelGroupEnv
*
penv
,
void
*
cdata
)
{
auto
conv
=
reinterpret_cast
<
Convolution1x1Int8CPUKernel
*>
(
cdata
);
auto
error_code
=
conv
->
RunImpl
(
task_id
);
if
(
error_code
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"conv1x1 Int8 Run error task_id["
<<
task_id
<<
"] error_code["
<<
error_code
<<
"]"
;
return
RET_ERROR
;
}
return
RET_OK
;
}
int
Convolution1x1Int8CPUKernel
::
Run
()
{
auto
ret
=
Prepare
();
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Prepare failed."
;
return
RET_ERROR
;
}
if
(
pre_trans_input_
)
{
input_ptr_
=
reinterpret_cast
<
int8_t
*>
(
ctx_
->
allocator
->
Malloc
(
matmul_param_
->
row_
*
matmul_param_
->
deep_
*
sizeof
(
int8_t
)));
if
(
input_ptr_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Conv1x1 int8 Malloc input_ptr_ error!"
;
return
RET_MEMORY_FAILED
;
}
}
int8_t
*
src_in
=
reinterpret_cast
<
int8_t
*>
(
in_tensors_
[
0
]
->
Data
());
int8_t
*
src_out
=
reinterpret_cast
<
int8_t
*>
(
out_tensors_
[
0
]
->
Data
());
for
(
int
batch_index
=
0
;
batch_index
<
conv_param_
->
input_batch_
;
batch_index
++
)
{
Pre1x1Trans
(
src_in
+
batch_index
*
conv_param_
->
input_h_
*
conv_param_
->
input_w_
*
conv_param_
->
input_channel_
,
src_out
+
batch_index
*
matmul_param_
->
row_
*
matmul_param_
->
col_
);
PackInputSum16x4Int8
(
packed_input_
,
input_sum_
,
matmul_param_
->
deep_
,
matmul_param_
->
col_
,
matmul_param_
->
row_
,
conv_param_
);
int
error_code
=
LiteBackendParallelLaunch
(
Convolution1x1Int8Impl
,
this
,
thread_count_
);
if
(
error_code
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"conv1x1 fp16 error error_code["
<<
error_code
<<
"]"
;
return
RET_ERROR
;
}
}
if
(
pre_trans_input_
&&
input_ptr_
!=
nullptr
)
{
ctx_
->
allocator
->
Free
(
input_ptr_
);
input_ptr_
=
nullptr
;
}
return
RET_OK
;
}
}
// namespace mindspore::kernel
mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.h
0 → 100644
浏览文件 @
6cfcdaab
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_CONVOLUTION_1x1_INT8_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_CONVOLUTION_1x1_INT8_H_
#include <vector>
#include "src/lite_kernel.h"
#include "include/errorcode.h"
#include "schema/model_generated.h"
#include "src/runtime/kernel/arm/base/convolution_base.h"
#include "nnacl/int8/conv_int8.h"
#include "nnacl/int8/matmul_int8.h"
#include "nnacl/matmul_parameter.h"
#include "nnacl/optimized_kernel.h"
namespace
mindspore
::
kernel
{
class
Convolution1x1Int8CPUKernel
:
public
ConvolutionBaseCPUKernel
{
public:
Convolution1x1Int8CPUKernel
(
OpParameter
*
parameter
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
,
const
Context
*
ctx
,
const
mindspore
::
lite
::
PrimitiveC
*
primitive
)
:
ConvolutionBaseCPUKernel
(
parameter
,
inputs
,
outputs
,
ctx
,
primitive
)
{}
~
Convolution1x1Int8CPUKernel
()
override
;
int
Init
()
override
;
int
ReSize
()
override
;
int
Run
()
override
;
public:
int
RunImpl
(
int
task_id
);
private:
void
FreeResizeBuf
();
int
InitParam
();
int
InitWeightBias
();
void
Pre1x1Trans
(
int8_t
*
src_input
,
int8_t
*
src_output
);
void
CheckSupportOptimize
();
private:
int32_t
*
input_sum_
=
nullptr
;
/* per-channel: oc4 format */
int8_t
*
packed_weight_
=
nullptr
;
int8_t
*
packed_input_
=
nullptr
;
int8_t
*
input_ptr_
=
nullptr
;
int8_t
*
output_ptr_
=
nullptr
;
size_t
thread_count_
=
1
;
size_t
thread_stride_
=
0
;
bool
pre_trans_input_
=
false
;
MatMulParameter
*
matmul_param_
=
nullptr
;
MATMUL_OPT_R_FUNC
matmul_func_
=
nullptr
;
bool
support_optimize_
=
false
;
};
}
// namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_CONVOLUTION_1x1_INT8_H_
mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc
浏览文件 @
6cfcdaab
...
...
@@ -16,6 +16,7 @@
#include "src/runtime/kernel/arm/int8/convolution_int8.h"
#include "src/runtime/kernel/arm/int8/convolution_3x3_int8.h"
#include "src/runtime/kernel/arm/int8/convolution_1x1_int8.h"
#include "nnacl/int8/conv_int8.h"
#include "src/runtime/kernel/arm/base/layout_transform.h"
#include "schema/model_generated.h"
...
...
@@ -400,6 +401,9 @@ kernel::LiteKernel *CpuConvInt8KernelCreator(const std::vector<lite::tensor::Ten
kernel
::
LiteKernel
*
kernel
;
if
(
kernel_h
==
3
&&
kernel_w
==
3
&&
stride_h
==
1
&&
stride_w
==
1
&&
dilation_h
==
1
&&
dilation_w
==
1
)
{
kernel
=
new
(
std
::
nothrow
)
kernel
::
Convolution3x3Int8CPUKernel
(
opParameter
,
inputs
,
outputs
,
ctx
,
primitive
);
}
else
if
(
kernel_h
==
1
&&
kernel_w
==
1
)
{
/* Convolution1x1Int8CPUKernel */
kernel
=
new
(
std
::
nothrow
)
kernel
::
ConvolutionInt8CPUKernel
(
opParameter
,
inputs
,
outputs
,
ctx
,
primitive
);
}
else
{
kernel
=
new
(
std
::
nothrow
)
kernel
::
ConvolutionInt8CPUKernel
(
opParameter
,
inputs
,
outputs
,
ctx
,
primitive
);
}
...
...
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/conv1x1_fp32_tests.cc
浏览文件 @
6cfcdaab
...
...
@@ -54,7 +54,7 @@ TEST_F(TestConv1x1Fp32, Input1x1PrePack1) {
conv_param
->
pad_h_
=
conv_param
->
pad_w_
=
2
;
float
out
[
20
]
=
{
0
};
Conv1x1InputPack
Fp32
(
in
,
out
,
conv_param
);
Conv1x1InputPack
(
in
,
out
,
conv_param
,
sizeof
(
float
)
);
EXPECT_EQ
(
0
,
lite
::
CompareOutputData
(
out
,
correct
,
20
));
delete
conv_param
;
}
...
...
@@ -95,7 +95,7 @@ TEST_F(TestConv1x1Fp32, Input1x1PrePack2) {
conv_param
->
pad_h_
=
conv_param
->
pad_w_
=
0
;
float
out
[
28
]
=
{
0
};
Conv1x1InputPack
Fp32
(
in
,
out
,
conv_param
);
Conv1x1InputPack
(
in
,
out
,
conv_param
,
sizeof
(
float
)
);
CompareOutputData
(
out
,
correct
,
28
,
0.0001
);
delete
conv_param
;
}
...
...
@@ -114,7 +114,7 @@ TEST_F(TestConv1x1Fp32, Input1x1PrePack3) {
float
correct
[]
=
{
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
17.025112
,
-
5.052577
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
};
Conv1x1InputPack
Fp32
(
in
,
out
,
conv_param
);
Conv1x1InputPack
(
in
,
out
,
conv_param
,
sizeof
(
float
)
);
EXPECT_EQ
(
0
,
lite
::
CompareOutputData
(
out
,
correct
,
18
));
delete
conv_param
;
}
...
...
@@ -136,7 +136,7 @@ TEST_F(TestConv1x1Fp32, Input1x1PrePack4) {
-
1.770
,
41.903
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
};
float
out
[
54
]
=
{
0
};
Conv1x1InputPack
Fp32
(
in
,
out
,
conv_param
);
Conv1x1InputPack
(
in
,
out
,
conv_param
,
sizeof
(
float
)
);
EXPECT_EQ
(
0
,
lite
::
CompareOutputData
(
out
,
correct
,
54
));
delete
conv_param
;
}
...
...
mindspore/lite/test/ut/src/runtime/kernel/arm/int8/conv_1x1_int8_tests.cc
0 → 100644
浏览文件 @
6cfcdaab
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "utils/log_adapter.h"
#include "common/common_test.h"
#include "mindspore/lite/src/lite_kernel.h"
#include "src/common/file_utils.h"
#include "nnacl/quantization/quantize.h"
#include "nnacl/common_func.h"
#include "mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.h"
namespace
mindspore
{
using
lite
::
tensor
::
Tensor
;
class
TestConv1x1Int8
:
public
mindspore
::
CommonTest
{
public:
TestConv1x1Int8
()
{}
};
TEST_F
(
TestConv1x1Int8
,
Input1x1PrePack1
)
{
auto
conv_param
=
new
ConvParameter
();
conv_param
->
input_channel_
=
6
;
conv_param
->
input_h_
=
conv_param
->
input_w_
=
3
;
conv_param
->
output_h_
=
conv_param
->
output_w_
=
3
;
conv_param
->
stride_h_
=
conv_param
->
stride_w_
=
2
;
conv_param
->
pad_h_
=
conv_param
->
pad_w_
=
1
;
int8_t
in
[]
=
{
4
,
13
,
-
3
,
16
,
19
,
8
,
19
,
-
6
,
-
2
,
-
9
,
9
,
18
,
23
,
8
,
47
,
-
14
,
15
,
4
,
-
0
,
37
,
-
0
,
6
,
0
,
-
1
,
37
,
13
,
11
,
1
,
-
1
,
41
,
9
,
14
,
3
,
0
,
8
,
9
,
14
,
-
14
,
-
8
,
-
8
,
-
8
,
7
,
19
,
17
,
13
,
3
,
9
,
18
,
-
1
,
-
0
,
18
,
0
,
4
,
-
2
};
int8_t
correct
[]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
37
,
13
,
11
,
1
,
-
1
,
41
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
int8_t
out
[
54
]
=
{
0
};
Conv1x1InputPack
(
in
,
out
,
conv_param
,
sizeof
(
int8_t
));
CompareOutputData
(
out
,
correct
,
54
,
0
);
delete
conv_param
;
}
TEST_F
(
TestConv1x1Int8
,
Input1x1PrePack2
)
{
auto
conv_param
=
new
ConvParameter
();
int8_t
in
[]
=
{
-
0
,
-
0
,
-
7
,
-
0
,
-
6
,
4
,
9
,
9
,
12
,
-
0
,
6
,
2
,
13
,
15
,
16
,
-
7
,
9
,
1
,
10
,
13
,
17
,
17
,
4
,
13
,
-
6
,
5
,
7
,
-
7
,
15
,
0
,
1
,
-
5
,
-
7
,
18
,
15
,
19
,
-
7
,
13
,
7
,
-
0
,
16
,
-
5
,
16
,
-
7
,
6
,
10
,
-
5
,
10
,
9
,
12
,
-
9
,
-
8
,
-
4
,
18
,
-
5
,
0
,
7
,
12
,
13
,
16
,
-
9
,
-
4
,
18
,
-
0
,
8
,
6
,
2
,
10
,
16
,
1
,
-
1
,
2
,
9
,
8
,
9
,
13
,
7
,
-
0
,
15
,
-
7
,
0
,
-
0
,
17
,
19
,
9
,
17
,
-
6
,
-
2
,
7
,
-
0
,
10
,
-
6
,
-
6
,
18
,
-
0
,
9
,
9
,
6
,
3
,
-
1
,
-
8
,
10
,
17
,
-
9
,
17
,
6
,
-
3
,
7
,
-
2
,
-
0
,
-
9
,
1
,
-
3
,
15
,
13
,
4
,
18
};
int8_t
correct
[]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
15
,
-
7
,
-
7
,
0
,
0
,
0
,
9
,
7
,
0
,
0
,
0
,
0
,
0
,
0
};
conv_param
->
input_h_
=
9
;
conv_param
->
input_w_
=
13
;
conv_param
->
input_channel_
=
1
;
conv_param
->
output_h_
=
4
;
conv_param
->
output_w_
=
5
;
conv_param
->
stride_h_
=
conv_param
->
stride_w_
=
4
;
conv_param
->
pad_h_
=
conv_param
->
pad_w_
=
2
;
int8_t
out
[
20
]
=
{
0
};
Conv1x1InputPack
(
in
,
out
,
conv_param
,
sizeof
(
int8_t
));
CompareOutputData
(
out
,
correct
,
20
,
0
);
delete
conv_param
;
}
int
Conv1x1Int8TestInit1_perchannel
(
std
::
vector
<
lite
::
tensor
::
Tensor
*>
*
inputs_
,
std
::
vector
<
lite
::
tensor
::
Tensor
*>
*
outputs_
,
ConvParameter
*
conv_param
,
int8_t
**
correct
)
{
Tensor
*
in_t
=
new
Tensor
(
kNumberTypeInt8
,
{
1
,
2
,
3
,
4
},
schema
::
Format_NHWC
,
static_cast
<
schema
::
NodeType
>
(
1
));
auto
in_quant_arg
=
new
mindspore
::
lite
::
tensor
::
QuantArg
();
in_quant_arg
->
zeroPoint
=
-
42
,
in_quant_arg
->
scale
=
0.117647
;
in_t
->
AddQuantParam
(
*
in_quant_arg
);
in_t
->
MallocData
();
int8_t
in
[]
=
{
62
,
-
14
,
88
,
2
,
-
35
,
43
,
83
,
-
111
,
75
,
26
,
14
,
-
121
,
-
78
,
56
,
37
,
-
31
,
15
,
-
75
,
-
10
,
-
115
,
-
71
,
74
,
-
65
,
-
15
};
memcpy
(
in_t
->
Data
(),
in
,
in_t
->
ElementsNum
()
*
sizeof
(
int8_t
));
inputs_
->
push_back
(
in_t
);
Tensor
*
weight_t
=
new
Tensor
(
kNumberTypeInt8
,
{
3
,
1
,
1
,
4
},
schema
::
Format_NHWC
,
static_cast
<
schema
::
NodeType
>
(
1
));
weight_t
->
MallocData
();
auto
weight_quant_arg1
=
new
mindspore
::
lite
::
tensor
::
QuantArg
();
weight_quant_arg1
->
zeroPoint
=
66
,
weight_quant_arg1
->
scale
=
0.96439215686275
;
auto
weight_quant_arg2
=
new
mindspore
::
lite
::
tensor
::
QuantArg
();
weight_quant_arg2
->
zeroPoint
=
33
,
weight_quant_arg2
->
scale
=
0.76439215686275
;
auto
weight_quant_arg3
=
new
mindspore
::
lite
::
tensor
::
QuantArg
();
weight_quant_arg3
->
zeroPoint
=
-
20
,
weight_quant_arg3
->
scale
=
0.99117647
;
weight_t
->
AddQuantParam
(
*
weight_quant_arg1
);
weight_t
->
AddQuantParam
(
*
weight_quant_arg2
);
weight_t
->
AddQuantParam
(
*
weight_quant_arg3
);
int8_t
weight
[]
=
{
65
,
67
,
65
,
65
,
32
,
33
,
34
,
33
,
-
19
,
-
20
,
-
19
,
-
20
};
memcpy
(
weight_t
->
Data
(),
weight
,
weight_t
->
ElementsNum
()
*
sizeof
(
int8_t
));
inputs_
->
push_back
(
weight_t
);
Tensor
*
out_t
=
new
Tensor
(
kNumberTypeInt8
,
{
1
,
2
,
3
,
3
},
schema
::
Format_NHWC
,
static_cast
<
schema
::
NodeType
>
(
1
));
out_t
->
MallocData
();
auto
output_quant_arg
=
new
mindspore
::
lite
::
tensor
::
QuantArg
();
output_quant_arg
->
zeroPoint
=
7
,
output_quant_arg
->
scale
=
0.294321233
;
out_t
->
AddQuantParam
(
*
output_quant_arg
);
outputs_
->
push_back
(
out_t
);
*
correct
=
reinterpret_cast
<
int8_t
*>
(
malloc
(
out_t
->
ElementsNum
()
*
sizeof
(
int8_t
)));
int8_t
nchw_co
[]
=
{
-
83
,
34
,
100
,
10
,
113
,
55
,
3
,
16
,
63
,
6
,
93
,
20
,
5
,
6
,
42
,
35
,
28
,
-
24
};
memcpy
(
*
correct
,
nchw_co
,
out_t
->
ElementsNum
()
*
sizeof
(
int8_t
));
conv_param
->
kernel_h_
=
conv_param
->
kernel_w_
=
1
;
conv_param
->
stride_h_
=
conv_param
->
stride_w_
=
1
;
conv_param
->
dilation_h_
=
conv_param
->
dilation_w_
=
1
;
conv_param
->
pad_h_
=
conv_param
->
pad_w_
=
0
;
conv_param
->
is_relu_
=
conv_param
->
is_relu6_
=
false
;
return
out_t
->
ElementsNum
();
}
TEST_F
(
TestConv1x1Int8
,
Conv1x1TestPerChannel
)
{
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs_
;
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs_
;
auto
conv_param
=
new
ConvParameter
();
int8_t
*
correct
;
auto
ctx
=
new
lite
::
Context
;
ctx
->
thread_num_
=
1
;
int
total_size
=
Conv1x1Int8TestInit1_perchannel
(
&
inputs_
,
&
outputs_
,
conv_param
,
&
correct
);
kernel
::
Convolution1x1Int8CPUKernel
*
conv1x1
=
new
kernel
::
Convolution1x1Int8CPUKernel
(
reinterpret_cast
<
OpParameter
*>
(
conv_param
),
inputs_
,
outputs_
,
ctx
,
nullptr
);
conv1x1
->
Init
();
conv1x1
->
Run
();
CompareOutputData
(
reinterpret_cast
<
int8_t
*>
(
outputs_
[
0
]
->
Data
()),
correct
,
total_size
,
70
);
delete
conv1x1
;
for
(
auto
t
:
inputs_
)
delete
t
;
for
(
auto
t
:
outputs_
)
delete
t
;
free
(
correct
);
}
int
Conv1x1Int8TestInit1
(
std
::
vector
<
lite
::
tensor
::
Tensor
*>
*
inputs_
,
std
::
vector
<
lite
::
tensor
::
Tensor
*>
*
outputs_
,
ConvParameter
*
conv_param
,
int8_t
**
correct
)
{
Tensor
*
in_t
=
new
Tensor
(
kNumberTypeInt8
,
{
1
,
2
,
3
,
4
},
schema
::
Format_NHWC
,
static_cast
<
schema
::
NodeType
>
(
1
));
auto
in_quant_arg
=
new
mindspore
::
lite
::
tensor
::
QuantArg
();
in_quant_arg
->
zeroPoint
=
-
42
,
in_quant_arg
->
scale
=
0.117647
;
in_t
->
AddQuantParam
(
*
in_quant_arg
);
in_t
->
MallocData
();
float
in
[]
=
{
12.216284
,
3.3466918
,
15.327419
,
5.234958
,
0.804376
,
9.952188
,
14.727955
,
-
8.080715
,
13.71383
,
8.055829
,
6.5845337
,
-
9.25232
,
-
4.24519
,
11.550042
,
9.262012
,
1.2780352
,
6.7263746
,
-
3.9301445
,
3.764492
,
-
8.602078
,
-
3.3558068
,
13.619035
,
-
2.6694393
,
3.2008505
};
Quantize
(
in
,
in_t
->
ElementsNum
(),
in_quant_arg
->
scale
,
in_quant_arg
->
zeroPoint
,
reinterpret_cast
<
int8_t
*>
(
in_t
->
Data
()));
inputs_
->
push_back
(
in_t
);
Tensor
*
weight_t
=
new
Tensor
(
kNumberTypeInt8
,
{
3
,
1
,
1
,
4
},
schema
::
Format_NHWC
,
static_cast
<
schema
::
NodeType
>
(
1
));
auto
weight_quant_arg
=
new
mindspore
::
lite
::
tensor
::
QuantArg
();
weight_quant_arg
->
zeroPoint
=
66
,
weight_quant_arg
->
scale
=
0.036439215686275
;
weight_t
->
AddQuantParam
(
*
weight_quant_arg
);
weight_t
->
MallocData
();
float
weight
[]
=
{
-
0.7308652
,
0.5257509
,
-
0.87825793
,
-
1.123181
,
-
1.2206168
,
0.562695
,
1.5382664
,
-
0.5020635
,
0.8591602
,
-
0.26410004
,
1.1262615
,
0.073132955
};
Quantize
(
weight
,
weight_t
->
ElementsNum
(),
weight_quant_arg
->
scale
,
weight_quant_arg
->
zeroPoint
,
reinterpret_cast
<
int8_t
*>
(
weight_t
->
Data
()));
inputs_
->
push_back
(
weight_t
);
Tensor
*
out_t
=
new
Tensor
(
kNumberTypeInt8
,
{
1
,
2
,
3
,
3
},
schema
::
Format_NHWC
,
static_cast
<
schema
::
NodeType
>
(
1
));
out_t
->
MallocData
();
auto
output_quant_arg
=
new
mindspore
::
lite
::
tensor
::
QuantArg
();
output_quant_arg
->
zeroPoint
=
7
,
output_quant_arg
->
scale
=
0.234321233
;
out_t
->
AddQuantParam
(
*
output_quant_arg
);
outputs_
->
push_back
(
out_t
);
*
correct
=
reinterpret_cast
<
int8_t
*>
(
malloc
(
out_t
->
ElementsNum
()
*
sizeof
(
int8_t
)));
float
nchw_co
[]
=
{
-
26.51016327
,
7.92113757
,
27.25741343
,
0.785643655
,
31.3307619
,
14.05927672
,
-
1.178490666
,
2.5676252
,
16.39408946
,
-
0.394793726
,
25.2866881
,
3.827249175
,
-
0.626854507
,
-
0.3122176
,
10.42769169
,
8.362184085
,
6.04617807
,
-
9.252362384
};
Quantize
(
nchw_co
,
out_t
->
ElementsNum
(),
output_quant_arg
->
scale
,
output_quant_arg
->
zeroPoint
,
*
correct
);
conv_param
->
kernel_h_
=
conv_param
->
kernel_w_
=
1
;
conv_param
->
stride_h_
=
conv_param
->
stride_w_
=
1
;
conv_param
->
dilation_h_
=
conv_param
->
dilation_w_
=
1
;
conv_param
->
pad_h_
=
conv_param
->
pad_w_
=
0
;
conv_param
->
is_relu_
=
conv_param
->
is_relu6_
=
false
;
return
out_t
->
ElementsNum
();
}
TEST_F
(
TestConv1x1Int8
,
Conv1x1Int8Test1
)
{
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs_
;
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs_
;
auto
conv_param
=
new
ConvParameter
();
int8_t
*
correct
;
auto
ctx
=
new
lite
::
Context
;
ctx
->
thread_num_
=
1
;
int
total_size
=
Conv1x1Int8TestInit1
(
&
inputs_
,
&
outputs_
,
conv_param
,
&
correct
);
kernel
::
Convolution1x1Int8CPUKernel
*
conv1x1
=
new
kernel
::
Convolution1x1Int8CPUKernel
(
reinterpret_cast
<
OpParameter
*>
(
conv_param
),
inputs_
,
outputs_
,
ctx
,
nullptr
);
conv1x1
->
Init
();
conv1x1
->
Run
();
CompareOutputData
(
reinterpret_cast
<
int8_t
*>
(
outputs_
[
0
]
->
Data
()),
correct
,
total_size
,
2
);
delete
conv1x1
;
for
(
auto
t
:
inputs_
)
delete
t
;
for
(
auto
t
:
outputs_
)
delete
t
;
free
(
correct
);
}
int
Conv1x1Int8TestInit2
(
std
::
vector
<
lite
::
tensor
::
Tensor
*>
*
inputs_
,
std
::
vector
<
lite
::
tensor
::
Tensor
*>
*
outputs_
,
ConvParameter
*
conv_param
,
int8_t
**
correct
)
{
size_t
buffer_size
;
Tensor
*
in_t
=
new
Tensor
(
kNumberTypeInt8
,
{
1
,
2
,
3
,
4
},
schema
::
Format_NHWC
,
static_cast
<
schema
::
NodeType
>
(
1
));
auto
in_quant_arg
=
new
mindspore
::
lite
::
tensor
::
QuantArg
();
in_quant_arg
->
zeroPoint
=
-
42
,
in_quant_arg
->
scale
=
0.117647
;
in_t
->
AddQuantParam
(
*
in_quant_arg
);
in_t
->
MallocData
();
std
::
string
input_path
=
"./input"
;
auto
input
=
mindspore
::
lite
::
ReadFile
(
input_path
.
c_str
(),
&
buffer_size
);
memcpy
(
in_t
->
Data
(),
input
,
buffer_size
);
inputs_
->
push_back
(
in_t
);
delete
[]
input
;
Tensor
*
weight_t
=
new
Tensor
(
kNumberTypeInt8
,
{
3
,
1
,
1
,
4
},
schema
::
Format_NHWC
,
static_cast
<
schema
::
NodeType
>
(
1
));
auto
weight_quant_arg
=
new
mindspore
::
lite
::
tensor
::
QuantArg
();
weight_quant_arg
->
zeroPoint
=
66
,
weight_quant_arg
->
scale
=
0.036439215686275
;
weight_t
->
AddQuantParam
(
*
weight_quant_arg
);
weight_t
->
MallocData
();
std
::
string
weight_path
=
"./weight"
;
auto
weight
=
mindspore
::
lite
::
ReadFile
(
weight_path
.
c_str
(),
&
buffer_size
);
memcpy
(
weight_t
->
Data
(),
weight
,
buffer_size
);
inputs_
->
push_back
(
weight_t
);
delete
[]
weight
;
Tensor
*
bias_t
=
new
Tensor
(
kNumberTypeInt32
,
{
4
},
schema
::
Format_NHWC
,
static_cast
<
schema
::
NodeType
>
(
1
));
weight_t
->
MallocData
();
std
::
string
bias_path
=
"./bias"
;
auto
bias
=
mindspore
::
lite
::
ReadFile
(
bias_path
.
c_str
(),
&
buffer_size
);
memcpy
(
bias_t
->
Data
(),
bias
,
buffer_size
);
inputs_
->
push_back
(
bias_t
);
delete
[]
bias
;
Tensor
*
out_t
=
new
Tensor
(
kNumberTypeInt8
,
{
1
,
2
,
3
,
3
},
schema
::
Format_NHWC
,
static_cast
<
schema
::
NodeType
>
(
1
));
out_t
->
MallocData
();
auto
output_quant_arg
=
new
mindspore
::
lite
::
tensor
::
QuantArg
();
output_quant_arg
->
zeroPoint
=
7
,
output_quant_arg
->
scale
=
0.234321233
;
out_t
->
AddQuantParam
(
*
output_quant_arg
);
outputs_
->
push_back
(
out_t
);
*
correct
=
reinterpret_cast
<
int8_t
*>
(
malloc
(
out_t
->
ElementsNum
()
*
sizeof
(
int8_t
)));
std
::
string
output_path
=
"./output"
;
auto
output
=
mindspore
::
lite
::
ReadFile
(
output_path
.
c_str
(),
&
buffer_size
);
memcpy
(
*
correct
,
output
,
buffer_size
);
delete
[]
output
;
conv_param
->
kernel_h_
=
conv_param
->
kernel_w_
=
1
;
conv_param
->
stride_h_
=
conv_param
->
stride_w_
=
1
;
conv_param
->
dilation_h_
=
conv_param
->
dilation_w_
=
1
;
conv_param
->
pad_h_
=
conv_param
->
pad_w_
=
0
;
conv_param
->
is_relu_
=
conv_param
->
is_relu6_
=
false
;
return
out_t
->
ElementsNum
();
}
TEST_F
(
TestConv1x1Int8
,
Conv1x1Int8Test2
)
{
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs_
;
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs_
;
auto
conv_param
=
new
ConvParameter
();
int8_t
*
correct
;
auto
ctx
=
new
lite
::
Context
;
ctx
->
thread_num_
=
1
;
int
total_size
=
Conv1x1Int8TestInit2
(
&
inputs_
,
&
outputs_
,
conv_param
,
&
correct
);
kernel
::
Convolution1x1Int8CPUKernel
*
conv1x1
=
new
kernel
::
Convolution1x1Int8CPUKernel
(
reinterpret_cast
<
OpParameter
*>
(
conv_param
),
inputs_
,
outputs_
,
ctx
,
nullptr
);
conv1x1
->
Init
();
conv1x1
->
Run
();
CompareOutputData
(
reinterpret_cast
<
int8_t
*>
(
outputs_
[
0
]
->
Data
()),
correct
,
total_size
,
2
);
delete
conv1x1
;
for
(
auto
t
:
inputs_
)
delete
t
;
for
(
auto
t
:
outputs_
)
delete
t
;
free
(
correct
);
}
}
// namespace mindspore
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录