Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
f912b14b
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f912b14b
编写于
8月 18, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 18, 2020
浏览文件
操作
浏览文件
下载
差异文件
!4675 fix bug
Merge pull request !4675 from fuzhiye/tmp
上级
883957ac
127b089a
变更
9
展开全部
隐藏空白更改
内联
并排
Showing
9 changed file
with
142 addition
and
131 deletion
+142
-131
mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc
...pore/lite/src/runtime/kernel/arm/base/convolution_base.cc
+12
-10
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
...pore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
+5
-0
mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc
mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc
+2
-2
mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/winograd_transform_fp16.c
...c/runtime/kernel/arm/nnacl/fp16/winograd_transform_fp16.c
+6
-4
mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/winograd_utils_fp16.c
...e/src/runtime/kernel/arm/nnacl/fp16/winograd_utils_fp16.c
+3
-3
mindspore/lite/src/runtime/kernel/arm/nnacl/int8/conv_int8.c
mindspore/lite/src/runtime/kernel/arm/nnacl/int8/conv_int8.c
+0
-1
mindspore/lite/src/runtime/kernel/arm/nnacl/pack.c
mindspore/lite/src/runtime/kernel/arm/nnacl/pack.c
+9
-8
mindspore/lite/src/runtime/kernel/arm/nnacl/winograd_transform.c
...re/lite/src/runtime/kernel/arm/nnacl/winograd_transform.c
+6
-4
mindspore/lite/src/runtime/kernel/arm/nnacl/winograd_utils.c
mindspore/lite/src/runtime/kernel/arm/nnacl/winograd_utils.c
+99
-99
未找到文件。
mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc
浏览文件 @
f912b14b
...
...
@@ -296,16 +296,6 @@ int ConvolutionBaseCPUKernel::SetQuantParam() {
MS_LOG
(
ERROR
)
<<
"Set Output Tensor Quant Param Failed."
;
return
ret
;
}
ret
=
SetQuantMultiplier
();
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Set Quant Multiplier Failed."
;
return
ret
;
}
// now only consider per tensor for output
CalculateActivationRangeQuantized
(
conv_param_
->
is_relu_
,
conv_param_
->
is_relu6_
,
conv_param_
->
conv_quant_arg_
.
output_quant_args_
[
0
].
zp_
,
conv_param_
->
conv_quant_arg_
.
output_quant_args_
[
0
].
scale_
,
&
conv_param_
->
conv_quant_arg_
.
out_act_min_
[
0
],
&
conv_param_
->
conv_quant_arg_
.
out_act_max_
[
0
]);
ret
=
SetIfPerChannel
();
if
(
ret
!=
RET_OK
)
{
...
...
@@ -317,6 +307,18 @@ int ConvolutionBaseCPUKernel::SetQuantParam() {
MS_LOG
(
ERROR
)
<<
"Set if per asymmetric failed."
;
return
ret
;
}
ret
=
SetQuantMultiplier
();
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Set Quant Multiplier Failed."
;
return
ret
;
}
// now only consider per tensor for output
CalculateActivationRangeQuantized
(
conv_param_
->
is_relu_
,
conv_param_
->
is_relu6_
,
conv_param_
->
conv_quant_arg_
.
output_quant_args_
[
0
].
zp_
,
conv_param_
->
conv_quant_arg_
.
output_quant_args_
[
0
].
scale_
,
&
conv_param_
->
conv_quant_arg_
.
out_act_min_
[
0
],
&
conv_param_
->
conv_quant_arg_
.
out_act_max_
[
0
]);
return
RET_OK
;
}
}
// namespace mindspore::kernel
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
浏览文件 @
f912b14b
...
...
@@ -17,6 +17,7 @@
#include "src/runtime/kernel/arm/fp16/convolution_fp16.h"
#include <vector>
#include "src/runtime/kernel/arm/fp16/convolution_sw_fp16.h"
#include "src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h"
#include "src/runtime/kernel/arm/fp16/convolution_3x3_fp16.h"
#include "src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h"
#include "src/runtime/kernel/arm/nnacl/fp16/conv_fp16.h"
...
...
@@ -243,6 +244,10 @@ kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::tensor::Ten
InputTransformUnitFunc
input_trans_func
=
nullptr
;
OutputTransformUnitFunc
output_trans_func
=
nullptr
;
CheckIfUseWinograd
(
&
use_winograd
,
&
out_unit
,
conv_param
,
input_trans_func
,
output_trans_func
);
if
(
use_winograd
)
{
kernel
=
new
(
std
::
nothrow
)
kernel
::
ConvolutionWinogradFP16CPUKernel
(
opParameter
,
inputs
,
outputs
,
ctx
,
primitive
,
out_unit
);
}
if
(
kernel_h
!=
1
&&
kernel_w
!=
1
&&
!
use_winograd
)
{
kernel
=
new
(
std
::
nothrow
)
kernel
::
ConvolutionFP16CPUKernel
(
opParameter
,
inputs
,
outputs
,
ctx
,
primitive
);
}
...
...
mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc
浏览文件 @
f912b14b
...
...
@@ -51,7 +51,7 @@ int ConvolutionCPUKernel::InitWeightBias() {
// #endif
int
pack_weight_size
=
oc_block_num
*
oc_block
*
ic4
*
C4NUM
*
kernel_plane
;
//
init weight
//
=====================init weight==========================//
auto
origin_weight
=
reinterpret_cast
<
float
*>
(
in_tensors_
.
at
(
kWeightIndex
)
->
Data
());
packed_weight_
=
reinterpret_cast
<
float
*>
(
malloc
(
pack_weight_size
*
sizeof
(
float
)));
if
(
packed_weight_
==
nullptr
)
{
...
...
@@ -61,7 +61,7 @@ int ConvolutionCPUKernel::InitWeightBias() {
memset
(
packed_weight_
,
0
,
pack_weight_size
*
sizeof
(
float
));
PackWeightFp32
(
origin_weight
,
conv_param_
,
packed_weight_
,
oc_block
,
oc_block_num
);
//
init bias
//
=======================init bias==========================//
bias_data_
=
reinterpret_cast
<
float
*>
(
malloc
(
oc_block_num
*
oc_block
*
sizeof
(
float
)));
if
(
bias_data_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"malloc bias failed."
;
...
...
mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/winograd_transform_fp16.c
浏览文件 @
f912b14b
...
...
@@ -633,7 +633,9 @@ void WinogradOutputTransformFp16(const float16_t *gemm_out, float16_t *tmp_out_d
OutputTransformUnitFp16Func
output_trans_func
)
{
int
output_unit
=
conv_param
->
output_unit_
;
int
output_w
=
conv_param
->
output_w_
;
int
output_unit_block
=
UP_DIV
(
output_w
,
output_unit
);
int
output_h
=
conv_param
->
output_h_
;
int
output_w_unit_block
=
UP_DIV
(
output_w
,
output_unit
);
int
output_h_unit_block
=
UP_DIV
(
output_h
,
output_unit
);
int
output_channel
=
conv_param
->
output_channel_
;
int
oc8
=
UP_DIV
(
output_channel
,
C8NUM
);
int
input_unit
=
conv_param
->
input_unit_
;
...
...
@@ -644,16 +646,16 @@ void WinogradOutputTransformFp16(const float16_t *gemm_out, float16_t *tmp_out_d
int
dst_x_s
=
out_tile_index
%
output_unit_num
;
int
dst_y_s
=
out_tile_index
/
output_unit_num
;
int
src_tile_offset
=
i
*
oc8
*
C8NUM
*
input_unit
*
input_unit
;
int
dst_tile_offset
=
C8NUM
*
output_unit
*
(
dst_x_s
+
dst_y_s
*
output_unit_block
*
output_unit
);
int
dst_tile_offset
=
C8NUM
*
output_unit
*
(
dst_x_s
+
dst_y_s
*
output_
w_
unit_block
*
output_unit
);
for
(
int
j
=
0
;
j
<
oc8
;
j
++
)
{
int
src_oc8_offset
=
src_tile_offset
+
j
*
input_unit
*
input_unit
*
C8NUM
;
int
dst_oc8_offset
=
dst_tile_offset
+
j
*
C8NUM
*
output_
unit_block
*
output
_unit_block
*
output_unit
*
output_unit
;
dst_tile_offset
+
j
*
C8NUM
*
output_
h_unit_block
*
output_w
_unit_block
*
output_unit
*
output_unit
;
const
float16_t
*
src_ptr
=
gemm_out
+
src_oc8_offset
;
const
float16_t
*
bias_ptr
=
bias_data
+
j
*
C8NUM
;
float16_t
*
dst_ptr
=
tmp_out_data
+
dst_oc8_offset
;
output_trans_func
(
src_ptr
,
dst_ptr
,
bias_ptr
,
C8NUM
,
output_unit_block
*
output_unit
);
output_trans_func
(
src_ptr
,
dst_ptr
,
bias_ptr
,
C8NUM
,
output_
w_
unit_block
*
output_unit
);
}
out_tile_index
++
;
}
...
...
mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/winograd_utils_fp16.c
浏览文件 @
f912b14b
...
...
@@ -1066,7 +1066,7 @@ void OutputTransform4x3UnitFp16(const float16_t *src_data, float16_t *dst_data,
const
float16_t
t10
=
0
.
5
f
*
(
src_data_10
-
src_data_20
);
const
float16_t
t11
=
0
.
5
f
*
(
src_data_11
-
src_data_21
);
const
float16_t
t12
=
0
.
5
f
*
(
src_data_12
-
src_data_22
);
const
const
float16_t
t13
=
0
.
5
f
*
(
src_data_13
-
src_data_23
);
const
float16_t
t13
=
0
.
5
f
*
(
src_data_13
-
src_data_23
);
const
float16_t
t20
=
0
.
25
f
*
(
src_data_10
+
src_data_20
)
+
src_data_30
;
const
float16_t
t21
=
0
.
25
f
*
(
src_data_11
+
src_data_21
)
+
src_data_31
;
...
...
@@ -2232,7 +2232,7 @@ void OutputTransform8x4UnitFp16(const float16_t *src_data, float16_t *dst_data,
const
float16_t
t24
=
0
.
25
f
*
d35
+
d45
+
2
.
25
f
*
d55
;
const
float16_t
t25
=
0
.
25
f
*
d36
+
d46
+
2
.
25
f
*
d56
;
const
float16_t
t26
=
0
.
25
f
*
d37
+
d47
+
2
.
25
f
*
d57
;
const
const
float16_t
t27
=
0
.
25
f
*
d38
+
d48
+
2
.
25
f
*
d58
;
const
float16_t
t27
=
0
.
25
f
*
d38
+
d48
+
2
.
25
f
*
d58
;
const
float16_t
t30
=
0
.
125
f
*
d01
+
d11
+
3
.
375
f
*
d21
+
src_data_70
;
const
float16_t
t31
=
0
.
125
f
*
d02
+
d12
+
3
.
375
f
*
d22
+
src_data_71
;
...
...
@@ -3392,7 +3392,7 @@ void OutputTransform8x6UnitFp16(const float16_t *src_data, float16_t *dst_data,
const
float16_t
t52
=
0
.
03125
f
*
d03
+
d13
+
7
.
59375
f
*
d23
+
src_data_72
;
const
float16_t
t53
=
0
.
03125
f
*
d04
+
d14
+
7
.
59375
f
*
d24
+
src_data_73
;
const
float16_t
t54
=
0
.
03125
f
*
d05
+
d15
+
7
.
59375
f
*
d25
+
src_data_74
;
const
const
float16_t
t55
=
0
.
03125
f
*
d06
+
d16
+
7
.
59375
f
*
d26
+
src_data_75
;
const
float16_t
t55
=
0
.
03125
f
*
d06
+
d16
+
7
.
59375
f
*
d26
+
src_data_75
;
const
float16_t
t56
=
0
.
03125
f
*
d07
+
d17
+
7
.
59375
f
*
d27
+
src_data_76
;
const
float16_t
t57
=
0
.
03125
f
*
d08
+
d18
+
7
.
59375
f
*
d28
+
src_data_77
;
...
...
mindspore/lite/src/runtime/kernel/arm/nnacl/int8/conv_int8.c
浏览文件 @
f912b14b
...
...
@@ -325,7 +325,6 @@ void ConvInt8Opt(int8_t *input_data, int8_t *packed_input, int8_t *packed_weight
for
(
int
thread_id
=
task_id
;
thread_id
<
output_tile_count
;
thread_id
+=
thread_count
)
{
int
start_index
=
thread_id
*
tile_n
;
int
real_cal_num
=
(
output_count
-
start_index
)
<
tile_n
?
(
output_count
-
start_index
)
:
tile_n
;
// todo
int32_t
*
tmp_input_sum
=
input_sum
+
task_id
*
tile_n
;
int8_t
*
gemm_input
=
packed_input
+
thread_id
*
unit_size
*
tile_n
+
gemm_in_batch_offset
;
// clear tmp buffer before compute
...
...
mindspore/lite/src/runtime/kernel/arm/nnacl/pack.c
浏览文件 @
f912b14b
...
...
@@ -295,12 +295,12 @@ void Im2ColPackUnitInt8(const int8_t *input_data, int8_t *packed_input, int real
}
// kernel_w loop
}
// kernel_h loop
if
(
!
(
conv_param
->
conv_quant_arg_
.
asymmetric_
&
FILTER_ASYMMETRIC
))
{
return
;
continue
;
}
else
if
((
conv_param
->
conv_quant_arg_
.
asymmetric_
&
FILTER_ASYMMETRIC
)
&&
(
conv_param
->
conv_quant_arg_
.
per_channel_
&
FILTER_PER_CHANNEL
))
{
int
cal_num_offset
=
i
*
conv_param
->
output_channel_
;
for
(
int
l
=
0
;
l
<
conv_param
->
output_channel_
;
++
l
)
{
input_sum
[
cal_num_offset
+
l
]
=
input_accumulator
*
filter_arg
[
i
].
zp_
;
input_sum
[
cal_num_offset
+
l
]
=
input_accumulator
*
filter_arg
[
l
].
zp_
;
}
}
else
if
((
conv_param
->
conv_quant_arg_
.
asymmetric_
&
FILTER_ASYMMETRIC
)
&&
!
(
conv_param
->
conv_quant_arg_
.
per_channel_
&
FILTER_PER_CHANNEL
))
{
...
...
@@ -367,12 +367,12 @@ void Im2ColPackUnitInt8Opt(const int8_t *input_data, int8_t *packed_input, int r
}
}
if
(
!
(
conv_param
->
conv_quant_arg_
.
asymmetric_
&
FILTER_ASYMMETRIC
))
{
return
;
continue
;
}
else
if
((
conv_param
->
conv_quant_arg_
.
asymmetric_
&
FILTER_ASYMMETRIC
)
&&
(
conv_param
->
conv_quant_arg_
.
per_channel_
&
FILTER_PER_CHANNEL
))
{
int
cal_num_offset
=
i
*
conv_param
->
output_channel_
;
for
(
int
l
=
0
;
l
<
conv_param
->
output_channel_
;
++
l
)
{
input_sum
[
cal_num_offset
+
l
]
=
input_accumulator
*
filter_arg
[
i
].
zp_
;
input_sum
[
cal_num_offset
+
l
]
=
input_accumulator
*
filter_arg
[
l
].
zp_
;
}
}
else
if
((
conv_param
->
conv_quant_arg_
.
asymmetric_
&
FILTER_ASYMMETRIC
)
&&
!
(
conv_param
->
conv_quant_arg_
.
per_channel_
&
FILTER_PER_CHANNEL
))
{
...
...
@@ -870,8 +870,8 @@ void PackNHWCToNCHWFp32(const void *src, void *dst, int batches, int plane, int
int
c8
=
channel
/
C8NUM
*
C8NUM
;
int
batch
=
plane
*
channel
;
for
(
int
n
=
0
;
n
<
batches
;
n
++
)
{
const
float
*
src_batch
=
(
const
float
*
)
src
+
n
*
batch
;
float
*
dst_batch
=
(
float
*
)
dst
+
n
*
batch
;
const
float
*
src_batch
=
(
const
float
*
)
src
+
n
*
batch
;
float
*
dst_batch
=
(
float
*
)
dst
+
n
*
batch
;
int
hw
=
0
;
for
(;
hw
<
hw8
;
hw
+=
C8NUM
)
{
int
c
=
0
;
...
...
@@ -947,9 +947,10 @@ void PackNHWCToNCHWFp32(const void *src, void *dst, int batches, int plane, int
"st1 {v30.4s, v31.4s}, [x11], %[dstStride]
\n
"
:
:
[
dst_ptr
]
"r"
(
dst_ptr
),
[
src_ptr
]
"r"
(
src_ptr
),
[
srcStride
]
"r"
(
srcStride
),
[
dstStride
]
"r"
(
dstStride
)
:
[
dst_ptr
]
"r"
(
dst_ptr
),
[
src_ptr
]
"r"
(
src_ptr
),
[
srcStride
]
"r"
(
srcStride
),
[
dstStride
]
"r"
(
dstStride
)
:
"x10"
,
"x11"
,
"v0"
,
"v1"
,
"v2"
,
"v3"
,
"v4"
,
"v5"
,
"v6"
,
"v7"
,
"v8"
,
"v9"
,
"v10"
,
"v11"
,
"v12"
,
"v13"
,
"v14"
,
"v15"
,
"v16"
,
"v17"
,
"v18"
,
"v19"
,
"v20"
,
"v21"
,
"v22"
,
"v23"
,
"v24"
,
"v25"
,
"v26"
,
"v27"
,
"v28"
,
"v29"
,
"v15"
,
"v16"
,
"v17"
,
"v18"
,
"v19"
,
"v20"
,
"v21"
,
"v22"
,
"v23"
,
"v24"
,
"v25"
,
"v26"
,
"v27"
,
"v28"
,
"v29"
,
"v30"
,
"v31"
);
#else
for
(
int
tr
=
0
;
tr
<
C8NUM
;
tr
++
)
{
...
...
mindspore/lite/src/runtime/kernel/arm/nnacl/winograd_transform.c
浏览文件 @
f912b14b
...
...
@@ -81,7 +81,9 @@ void WinogradOutputTransform(const float *gemm_out, float *tmp_out_data, const f
OutputTransformUnitFunc
output_trans_func
)
{
int
output_unit
=
conv_param
->
output_unit_
;
int
output_w
=
conv_param
->
output_w_
;
int
output_unit_block
=
UP_DIV
(
output_w
,
output_unit
);
int
output_h
=
conv_param
->
output_h_
;
int
output_w_unit_block
=
UP_DIV
(
output_w
,
output_unit
);
int
output_h_unit_block
=
UP_DIV
(
output_h
,
output_unit
);
int
output_channel
=
conv_param
->
output_channel_
;
int
oc4
=
UP_DIV
(
output_channel
,
C4NUM
);
int
input_unit
=
conv_param
->
input_unit_
;
...
...
@@ -92,16 +94,16 @@ void WinogradOutputTransform(const float *gemm_out, float *tmp_out_data, const f
int
dst_x_s
=
out_tile_index
%
output_unit_num
;
int
dst_y_s
=
out_tile_index
/
output_unit_num
;
int
src_tile_offset
=
i
*
oc4
*
C4NUM
*
input_unit
*
input_unit
;
int
dst_tile_offset
=
C4NUM
*
output_unit
*
(
dst_x_s
+
dst_y_s
*
output_unit_block
*
output_unit
);
int
dst_tile_offset
=
C4NUM
*
output_unit
*
(
dst_x_s
+
dst_y_s
*
output_
w_
unit_block
*
output_unit
);
for
(
int
j
=
0
;
j
<
oc4
;
j
++
)
{
int
src_oc4_offset
=
src_tile_offset
+
j
*
input_unit
*
input_unit
*
C4NUM
;
int
dst_oc4_offset
=
dst_tile_offset
+
j
*
C4NUM
*
output_
unit_block
*
output
_unit_block
*
output_unit
*
output_unit
;
dst_tile_offset
+
j
*
C4NUM
*
output_
h_unit_block
*
output_w
_unit_block
*
output_unit
*
output_unit
;
const
float
*
src_ptr
=
gemm_out
+
src_oc4_offset
;
const
float
*
bias_ptr
=
bias_data
+
j
*
C4NUM
;
float
*
dst_ptr
=
tmp_out_data
+
dst_oc4_offset
;
output_trans_func
(
src_ptr
,
dst_ptr
,
bias_ptr
,
C4NUM
,
output_unit_block
*
output_unit
);
output_trans_func
(
src_ptr
,
dst_ptr
,
bias_ptr
,
C4NUM
,
output_
w_
unit_block
*
output_unit
);
}
out_tile_index
++
;
}
...
...
mindspore/lite/src/runtime/kernel/arm/nnacl/winograd_utils.c
浏览文件 @
f912b14b
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录