Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
da18781f
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
da18781f
编写于
8月 21, 2020
作者:
F
fuzhiye
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
modify input sum array
上级
93393849
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
26 addition
and
14 deletion
+26
-14
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
...pore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
+9
-3
mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc
...pore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc
+2
-2
mindspore/lite/src/runtime/kernel/arm/nnacl/int8/conv_depthwise_int8.c
...e/src/runtime/kernel/arm/nnacl/int8/conv_depthwise_int8.c
+3
-3
mindspore/lite/src/runtime/kernel/arm/nnacl/int8/conv_int8.c
mindspore/lite/src/runtime/kernel/arm/nnacl/int8/conv_int8.c
+8
-4
mindspore/lite/src/runtime/kernel/arm/nnacl/pack.c
mindspore/lite/src/runtime/kernel/arm/nnacl/pack.c
+4
-2
未找到文件。
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
浏览文件 @
da18781f
...
...
@@ -235,10 +235,13 @@ kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::tensor::Ten
conv_param
->
input_w_
=
inputs
.
front
()
->
Width
();
conv_param
->
output_h_
=
outputs
.
front
()
->
Height
();
conv_param
->
output_w_
=
outputs
.
front
()
->
Width
();
bool
prefer_flag
=
false
;
if
(
conv_param
->
output_h_
*
conv_param
->
output_w_
>
64
)
{
prefer_flag
=
true
;
}
kernel
::
LiteKernel
*
kernel
=
nullptr
;
if
(
kernel_h
==
3
&&
kernel_w
==
3
&&
stride_h
==
1
&&
stride_w
==
1
&&
dilation_h
==
1
&&
dilation_w
==
1
)
{
kernel
=
new
(
std
::
nothrow
)
kernel
::
Convolution3x3FP16CPUKernel
(
opParameter
,
inputs
,
outputs
,
ctx
,
primitive
);
}
else
if
(
kernel_h
==
1
&&
kernel_w
==
1
)
{
if
(
kernel_h
==
1
&&
kernel_w
==
1
)
{
kernel
=
new
(
std
::
nothrow
)
kernel
::
Convolution1x1FP16CPUKernel
(
opParameter
,
inputs
,
outputs
,
ctx
,
primitive
);
}
else
{
bool
use_winograd
=
false
;
...
...
@@ -249,6 +252,9 @@ kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::tensor::Ten
if
(
use_winograd
)
{
kernel
=
new
(
std
::
nothrow
)
kernel
::
ConvolutionWinogradFP16CPUKernel
(
opParameter
,
inputs
,
outputs
,
ctx
,
primitive
,
out_unit
);
}
else
if
(
prefer_flag
&&
kernel_h
==
3
&&
kernel_w
==
3
&&
stride_h
==
1
&&
stride_w
==
1
&&
dilation_h
==
1
&&
dilation_w
==
1
)
{
kernel
=
new
(
std
::
nothrow
)
kernel
::
Convolution3x3FP16CPUKernel
(
opParameter
,
inputs
,
outputs
,
ctx
,
primitive
);
}
if
(
kernel_h
!=
1
&&
kernel_w
!=
1
&&
!
use_winograd
)
{
kernel
=
new
(
std
::
nothrow
)
kernel
::
ConvolutionFP16CPUKernel
(
opParameter
,
inputs
,
outputs
,
ctx
,
primitive
);
...
...
mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc
浏览文件 @
da18781f
...
...
@@ -115,7 +115,7 @@ int ConvolutionInt8CPUKernel::InitWeightBias() {
size_t
input_sum_size
;
if
(
conv_quant_arg_
->
per_channel_
&
FILTER_PER_CHANNEL
)
{
input_sum_size
=
conv_param_
->
output_channel_
*
tile_num_
*
thread_count_
*
sizeof
(
int32_t
);
input_sum_size
=
oc4
*
C4NUM
*
tile_num_
*
thread_count_
*
sizeof
(
int32_t
);
}
else
{
input_sum_size
=
tile_num_
*
thread_count_
*
sizeof
(
int32_t
);
}
...
...
@@ -202,7 +202,7 @@ int ConvolutionInt8CPUKernel::InitWeightBiasOpt() {
size_t
input_sum_size
;
if
(
conv_quant_arg_
->
per_channel_
&
FILTER_PER_CHANNEL
)
{
input_sum_size
=
conv_param_
->
output_channel_
*
tile_num_
*
thread_count_
*
sizeof
(
int32_t
);
input_sum_size
=
oc4
*
C4NUM
*
tile_num_
*
thread_count_
*
sizeof
(
int32_t
);
}
else
{
input_sum_size
=
tile_num_
*
thread_count_
*
sizeof
(
int32_t
);
}
...
...
mindspore/lite/src/runtime/kernel/arm/nnacl/int8/conv_depthwise_int8.c
浏览文件 @
da18781f
...
...
@@ -169,9 +169,9 @@ void ConvDwInt8(int8_t *output_data, const int16_t *input_data, const int16_t *w
const
int32_t
*
bias
=
bias_data
+
oc
*
C4NUM
;
if
(
per_channel
)
{
out_multiplier
=
conv_param
->
conv_quant_arg_
.
quant_multiplier_
+
oc
;
left_shift
=
conv_param
->
conv_quant_arg_
.
left_shift_
+
oc
;
right_shift
=
conv_param
->
conv_quant_arg_
.
right_shift_
+
oc
;
out_multiplier
=
conv_param
->
conv_quant_arg_
.
quant_multiplier_
+
oc
*
C4NUM
;
left_shift
=
conv_param
->
conv_quant_arg_
.
left_shift_
+
oc
*
C4NUM
;
right_shift
=
conv_param
->
conv_quant_arg_
.
right_shift_
+
oc
*
C4NUM
;
}
DepthwiseBorderInt8
(
dst_data
,
src_data
,
weight
,
bias
,
0
,
sliding
->
top_
,
0
,
conv_param
->
output_w_
,
conv_param
,
...
...
mindspore/lite/src/runtime/kernel/arm/nnacl/int8/conv_int8.c
浏览文件 @
da18781f
...
...
@@ -28,6 +28,7 @@ void IndirectGemmInt8(int8_t *dst, int32_t *tmp_dst, const int8_t *src, const in
int32_t
out_zp
=
conv_param
->
conv_quant_arg_
.
output_quant_args_
[
0
].
zp_
;
int32_t
act_min
=
conv_param
->
conv_quant_arg_
.
out_act_min_
[
0
];
int32_t
act_max
=
conv_param
->
conv_quant_arg_
.
out_act_max_
[
0
];
int
oc4
=
UP_DIV
(
output_channel
,
C4NUM
);
#ifdef __aarch64__
IndirectGemmInt8_4x4
(
dst
,
src
,
weight
,
bias
,
UP_DIV
(
kernel_plane
,
C4NUM
),
ic4
,
output_channel
,
output_channel
*
sizeof
(
int8_t
),
input_sum
,
act_min
,
act_max
,
out_zp
,
out_multiplier
,
...
...
@@ -96,7 +97,7 @@ void IndirectGemmInt8(int8_t *dst, int32_t *tmp_dst, const int8_t *src, const in
dst
[
dst_tile_offset
]
=
(
int8_t
)
result
;
}
else
if
((
conv_param
->
conv_quant_arg_
.
asymmetric_
&
FILTER_ASYMMETRIC
)
&&
(
conv_param
->
conv_quant_arg_
.
per_channel_
&
FILTER_PER_CHANNEL
))
{
tmp_dst
[
dst_tile_offset
]
-=
input_sum
[
n
*
o
utput_channel
+
oc
];
tmp_dst
[
dst_tile_offset
]
-=
input_sum
[
n
*
o
c4
*
C4NUM
+
oc
];
int
result
=
tmp_dst
[
dst_tile_offset
]
+
bias
[
oc
];
result
=
RoundingDivideByPOT
(
SaturatingRoundingDoublingHighMul
(
result
*
(
1
<<
(
unsigned
int
)
shift_before
[
oc
]),
out_multiplier
[
oc
]),
...
...
@@ -120,6 +121,7 @@ void IndirectGemmInt8Opt(int8_t *dst, int32_t *tmp_dst, const int8_t *src, const
int32_t
out_zp
=
conv_param
->
conv_quant_arg_
.
output_quant_args_
[
0
].
zp_
;
int32_t
act_min
=
conv_param
->
conv_quant_arg_
.
out_act_min_
[
0
];
int32_t
act_max
=
conv_param
->
conv_quant_arg_
.
out_act_max_
[
0
];
int
oc4
=
UP_DIV
(
output_channel
,
C4NUM
);
if
(
gemm_func
!=
NULL
)
{
#ifdef __aarch64__
gemm_func
(
dst
,
src
,
weight
,
bias
,
kernel_plane
,
ic4
,
output_channel
,
output_channel
*
sizeof
(
int8_t
),
input_sum
,
...
...
@@ -181,7 +183,7 @@ void IndirectGemmInt8Opt(int8_t *dst, int32_t *tmp_dst, const int8_t *src, const
dst
[
dst_tile_offset
]
=
(
int8_t
)
result
;
}
else
if
((
conv_param
->
conv_quant_arg_
.
asymmetric_
&
FILTER_ASYMMETRIC
)
&&
(
conv_param
->
conv_quant_arg_
.
per_channel_
&
FILTER_PER_CHANNEL
))
{
tmp_dst
[
dst_tile_offset
]
-=
input_sum
[
n
*
o
utput_channel
+
oc
];
tmp_dst
[
dst_tile_offset
]
-=
input_sum
[
n
*
o
c4
*
C4NUM
+
oc
];
int
result
=
tmp_dst
[
dst_tile_offset
]
+
bias
[
oc
];
result
=
RoundingDivideByPOT
(
SaturatingRoundingDoublingHighMul
(
result
*
(
1
<<
(
unsigned
int
)
shift_before
[
oc
]),
out_multiplier
[
oc
]),
...
...
@@ -252,6 +254,7 @@ void ConvInt8(int8_t *input_data, int8_t *packed_input, int8_t *packed_weight, c
int
out_h
=
conv_param
->
output_h_
;
int
out_w
=
conv_param
->
output_w_
;
int
out_channel
=
conv_param
->
output_channel_
;
int
oc4
=
UP_DIV
(
out_channel
,
C4NUM
);
int32_t
input_zp
=
conv_param
->
conv_quant_arg_
.
input_quant_args_
[
0
].
zp_
;
int
tile_n
=
conv_param
->
tile_num_
;
...
...
@@ -264,7 +267,7 @@ void ConvInt8(int8_t *input_data, int8_t *packed_input, int8_t *packed_weight, c
int
packed_input_size
=
output_tile_count
*
tile_n
*
unit_size
;
int
input_sum_offset
;
if
(
conv_param
->
conv_quant_arg_
.
per_channel_
&
FILTER_PER_CHANNEL
)
{
input_sum_offset
=
tile_n
*
o
ut_channel
;
input_sum_offset
=
tile_n
*
o
c4
*
C4NUM
;
}
else
{
input_sum_offset
=
tile_n
;
}
...
...
@@ -314,6 +317,7 @@ void ConvInt8Opt(int8_t *input_data, int8_t *packed_input, int8_t *packed_weight
int
out_h
=
conv_param
->
output_h_
;
int
out_w
=
conv_param
->
output_w_
;
int
out_channel
=
conv_param
->
output_channel_
;
int
oc4
=
UP_DIV
(
out_channel
,
C4NUM
);
int32_t
input_zp
=
conv_param
->
conv_quant_arg_
.
input_quant_args_
[
0
].
zp_
;
int
tile_n
=
conv_param
->
tile_num_
;
int
thread_count
=
conv_param
->
thread_num_
;
...
...
@@ -325,7 +329,7 @@ void ConvInt8Opt(int8_t *input_data, int8_t *packed_input, int8_t *packed_weight
int
packed_input_size
=
output_tile_count
*
tile_n
*
unit_size
;
int
input_sum_offset
;
if
(
conv_param
->
conv_quant_arg_
.
per_channel_
&
FILTER_PER_CHANNEL
)
{
input_sum_offset
=
tile_n
*
o
ut_channel
;
input_sum_offset
=
tile_n
*
o
c4
*
C4NUM
;
}
else
{
input_sum_offset
=
tile_n
;
}
...
...
mindspore/lite/src/runtime/kernel/arm/nnacl/pack.c
浏览文件 @
da18781f
...
...
@@ -255,6 +255,7 @@ void Im2ColPackUnitInt8(const int8_t *input_data, int8_t *packed_input, int real
int
in_h
=
conv_param
->
input_h_
;
int
in_w
=
conv_param
->
input_w_
;
int
ic4
=
UP_DIV
(
in_channel
,
C4NUM
);
int
oc4
=
UP_DIV
(
conv_param
->
output_channel_
,
C4NUM
);
int
out_w
=
conv_param
->
output_w_
;
for
(
int
i
=
0
;
i
<
real_cal_num
;
i
++
)
{
...
...
@@ -297,7 +298,7 @@ void Im2ColPackUnitInt8(const int8_t *input_data, int8_t *packed_input, int real
continue
;
}
else
if
((
conv_param
->
conv_quant_arg_
.
asymmetric_
&
FILTER_ASYMMETRIC
)
&&
(
conv_param
->
conv_quant_arg_
.
per_channel_
&
FILTER_PER_CHANNEL
))
{
int
cal_num_offset
=
i
*
conv_param
->
output_channel_
;
int
cal_num_offset
=
i
*
oc4
*
C4NUM
;
for
(
int
l
=
0
;
l
<
conv_param
->
output_channel_
;
++
l
)
{
input_sum
[
cal_num_offset
+
l
]
=
input_accumulator
*
filter_arg
[
l
].
zp_
;
}
...
...
@@ -325,6 +326,7 @@ void Im2ColPackUnitInt8Opt(const int8_t *input_data, int8_t *packed_input, int r
int
in_h
=
conv_param
->
input_h_
;
int
in_w
=
conv_param
->
input_w_
;
int
ic4
=
UP_DIV
(
in_channel
,
C4NUM
);
int
oc4
=
UP_DIV
(
conv_param
->
output_channel_
,
C4NUM
);
int
out_w
=
conv_param
->
output_w_
;
int
block_size
=
kernel_h
*
kernel_w
;
...
...
@@ -368,7 +370,7 @@ void Im2ColPackUnitInt8Opt(const int8_t *input_data, int8_t *packed_input, int r
continue
;
}
else
if
((
conv_param
->
conv_quant_arg_
.
asymmetric_
&
FILTER_ASYMMETRIC
)
&&
(
conv_param
->
conv_quant_arg_
.
per_channel_
&
FILTER_PER_CHANNEL
))
{
int
cal_num_offset
=
i
*
conv_param
->
output_channel_
;
int
cal_num_offset
=
i
*
oc4
*
C4NUM
;
for
(
int
l
=
0
;
l
<
conv_param
->
output_channel_
;
++
l
)
{
input_sum
[
cal_num_offset
+
l
]
=
input_accumulator
*
filter_arg
[
l
].
zp_
;
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录