Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
f0988c7b
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f0988c7b
编写于
8月 15, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 15, 2020
浏览文件
操作
浏览文件
下载
差异文件
!4439 change deconv post func to neon
Merge pull request !4439 from ling/deconv
上级
b5afbd8a
ed3ab685
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
805 addition
and
98 deletion
+805
-98
mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.cc
mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.cc
+15
-43
mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.h
mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.h
+0
-1
mindspore/lite/src/runtime/kernel/arm/nnacl/assembly/arm64/PostFuncBiasReluC8.S
...time/kernel/arm/nnacl/assembly/arm64/PostFuncBiasReluC8.S
+532
-0
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/common_func.c
...pore/lite/src/runtime/kernel/arm/nnacl/fp32/common_func.c
+9
-0
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/common_func.h
...pore/lite/src/runtime/kernel/arm/nnacl/fp32/common_func.h
+2
-0
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/deconv.c
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/deconv.c
+36
-16
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/deconv.h
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/deconv.h
+0
-3
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/conv1x1_fp32_tests.cc
...test/ut/src/runtime/kernel/arm/fp32/conv1x1_fp32_tests.cc
+19
-28
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/deconvolution_fp32_tests.cc
...t/src/runtime/kernel/arm/fp32/deconvolution_fp32_tests.cc
+192
-7
未找到文件。
mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.cc
浏览文件 @
f0988c7b
...
...
@@ -64,16 +64,14 @@ int DeConvolutionCPUKernel::ReSize() {
}
int
DeConvolutionCPUKernel
::
InitWeightBias
()
{
bias_data_
=
malloc
(
UP_ROUND
(
conv_param_
->
output_channel_
,
C4NUM
)
*
sizeof
(
float
));
if
(
bias_data_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"deconv malloc bias_data_ error!"
;
return
RET_ERROR
;
}
memset
(
bias_data_
,
0
,
UP_ROUND
(
conv_param_
->
output_channel_
,
C4NUM
)
*
sizeof
(
float
));
if
(
in_tensors_
.
size
()
==
3
)
{
bias_data_
=
malloc
(
UP_ROUND
(
conv_param_
->
output_channel_
,
C4NUM
)
*
sizeof
(
float
));
if
(
bias_data_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"deconv malloc bias_data_ error!"
;
return
RET_ERROR
;
}
memset
(
bias_data_
,
0
,
UP_ROUND
(
conv_param_
->
output_channel_
,
C4NUM
)
*
sizeof
(
float
));
memcpy
(
bias_data_
,
in_tensors_
[
2
]
->
Data
(),
conv_param_
->
output_channel_
*
sizeof
(
float
));
}
else
{
bias_data_
=
nullptr
;
}
size_t
weight_pack_size
=
conv_param_
->
input_channel_
*
conv_param_
->
kernel_w_
*
conv_param_
->
kernel_h_
*
...
...
@@ -134,41 +132,21 @@ int DeConvFp32Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
return
RET_OK
;
}
int
DeConvFp32PostRun
(
int
task_id
,
LiteParallelGroupEnv
*
penv
,
void
*
cdata
)
{
auto
deconv
=
reinterpret_cast
<
DeConvolutionCPUKernel
*>
(
cdata
);
auto
error_code
=
deconv
->
DoPostFunc
(
task_id
);
if
(
error_code
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"DeConvFp32PostRun error task_id["
<<
task_id
<<
"] error_code["
<<
error_code
<<
"]"
;
return
RET_ERROR
;
}
return
RET_OK
;
}
int
DeConvolutionCPUKernel
::
DoDeconv
(
int
task_id
)
{
int
oc
=
MSMIN
(
thread_stride_
,
UP_DIV
(
conv_param_
->
output_channel_
,
C8NUM
)
-
task_id
*
thread_stride_
);
if
(
oc
<=
0
)
{
int
oc_res
=
MSMIN
(
thread_stride_
*
C8NUM
,
conv_param_
->
output_channel_
-
task_id
*
thread_stride_
*
C8NUM
);
if
(
oc
<=
0
||
oc_res
<=
0
)
{
return
RET_OK
;
}
MatMul
(
pack_input_
,
weight_ptr_
+
task_id
*
thread_stride_
*
C8NUM
*
kernel_plane_
*
matmul_param_
->
deep_
,
tmp_buffer_
+
task_id
*
thread_stride_
*
C8NUM
*
kernel_plane_
*
matmul_param_
->
row_8_
,
nullptr
,
ActType_No
,
matmul_param_
->
deep_
,
matmul_param_
->
row_8_
,
oc
*
C8NUM
*
kernel_plane_
,
matmul_param_
->
col_
,
false
);
auto
tmp_buffer
=
tmp_buffer_
+
task_id
*
thread_stride_
*
C8NUM
*
kernel_plane_
*
matmul_param_
->
row_8_
;
MatMul
(
pack_input_
,
weight_ptr_
+
task_id
*
thread_stride_
*
C8NUM
*
kernel_plane_
*
matmul_param_
->
deep_
,
tmp_buffer
,
nullptr
,
ActType_No
,
matmul_param_
->
deep_
,
matmul_param_
->
row_8_
,
oc
*
C8NUM
*
kernel_plane_
,
matmul_param_
->
col_
,
false
);
return
RET_OK
;
}
int
DeConvolutionCPUKernel
::
DoPostFunc
(
int
task_id
)
{
int
oc
=
MSMIN
(
thread_stride_
*
C8NUM
,
conv_param_
->
output_channel_
-
task_id
*
thread_stride_
*
C8NUM
);
if
(
oc
<=
0
)
{
return
RET_OK
;
}
float
*
bias
=
(
bias_data_
==
nullptr
)
?
nullptr
:
reinterpret_cast
<
float
*>
(
bias_data_
)
+
thread_stride_
*
task_id
*
C8NUM
;
DeConvPostFp32C8x8
(
tmp_buffer_
+
task_id
*
thread_stride_
*
C8NUM
*
kernel_plane_
*
matmul_param_
->
row_8_
,
pack_output_
+
task_id
*
thread_stride_
*
C8NUM
*
output_plane_
,
bias
,
output_ptr_
+
task_id
*
thread_stride_
*
C8NUM
,
oc
,
conv_param_
);
DeConvPostFp32C8x8
(
tmp_buffer
,
pack_output_
+
task_id
*
thread_stride_
*
C8NUM
*
output_plane_
,
reinterpret_cast
<
float
*>
(
bias_data_
)
+
thread_stride_
*
task_id
*
C8NUM
,
output_ptr_
+
task_id
*
thread_stride_
*
C8NUM
,
oc_res
,
conv_param_
);
return
RET_OK
;
}
...
...
@@ -213,12 +191,6 @@ int DeConvolutionCPUKernel::Run() {
MS_LOG
(
ERROR
)
<<
"deconv fp32 run error! error_code["
<<
error_code
<<
"]"
;
return
RET_ERROR
;
}
error_code
=
LiteBackendParallelLaunch
(
DeConvFp32PostRun
,
this
,
thread_count_
);
if
(
error_code
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"deconv fp32 postrun error! error_code["
<<
error_code
<<
"]"
;
return
RET_ERROR
;
}
}
return
RET_OK
;
}
...
...
mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.h
浏览文件 @
f0988c7b
...
...
@@ -43,7 +43,6 @@ class DeConvolutionCPUKernel : public ConvolutionBaseCPUKernel {
public:
int
DoDeconv
(
int
task_id
);
int
DoPostFunc
(
int
task_id
);
private:
int
InitParam
();
...
...
mindspore/lite/src/runtime/kernel/arm/nnacl/assembly/arm64/PostFuncBiasReluC8.S
0 → 100644
浏览文件 @
f0988c7b
#ifdef __aarch64__
.
text
.
align
5
//.
p2align
5
,,
15
.
global
PostFuncBiasReluC8
#ifndef __APPLE__
.
type
PostFuncBiasReluC8
,
%
function
#endif
//
void
PostFuncBiasReluC8
(
float
*
dst
,
const
float
*
src
,
const
float
*
bias
,
size_t
oc8div
,
size_t
oc8mod
//
size_t
plane_size
,
size_t
stride
,
int
relu_type
)
;
//
x0
dst
x1
srx
x2
bias
//
x3
oc8div
x4
oc8mod
x5
plane_size
//
x6
stride
x7
relu_type
//
v0
~
v15
value
//
v16
v17
bias
data
//
x24
x25
weite
loop
tmp
buf
//
x26
relu6
#
6
; x27 relu #0
//
w10
oc8
loop
control
//
w13
hw
loop
control
PostFuncBiasReluC8
:
movi
v26
.4
s
,
#
6
scvtf
v26
.4
s
,
v26
.4
s
dup
v27
.4
s
,
wzr
mov
w10
,
#
0
Loop_C8
:
cmp
w10
,
w3
beq
Loop_C1
mov
x25
,
#
4
mul
x24
,
x10
,
x25
add
x25
,
x0
,
x24
add
w10
,
w10
,
#
8
mov
w13
,
w5
ld1
{
v16
.4
s
,
v17
.4
s
},
[
x2
],
#
32
Loop8x8
:
cmp
w13
,
#
8
blt
Loop_4x8
sub
w13
,
w13
,
#
8
ld1
{
v0
.4
s
,
v1
.4
s
,
v2
.4
s
,
v3
.4
s
},
[
x1
],
#
64
ld1
{
v4
.4
s
,
v5
.4
s
,
v6
.4
s
,
v7
.4
s
},
[
x1
],
#
64
ld1
{
v8
.4
s
,
v9
.4
s
,
v10
.4
s
,
v11
.4
s
},
[
x1
],
#
64
ld1
{
v12
.4
s
,
v13
.4
s
,
v14
.4
s
,
v15
.4
s
},
[
x1
],
#
64
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
fadd
v1
.4
s
,
v1
.4
s
,
v17
.4
s
fadd
v2
.4
s
,
v2
.4
s
,
v16
.4
s
fadd
v3
.4
s
,
v3
.4
s
,
v17
.4
s
fadd
v4
.4
s
,
v4
.4
s
,
v16
.4
s
fadd
v5
.4
s
,
v5
.4
s
,
v17
.4
s
fadd
v6
.4
s
,
v6
.4
s
,
v16
.4
s
fadd
v7
.4
s
,
v7
.4
s
,
v17
.4
s
fadd
v8
.4
s
,
v8
.4
s
,
v16
.4
s
fadd
v9
.4
s
,
v9
.4
s
,
v17
.4
s
fadd
v10
.4
s
,
v10
.4
s
,
v16
.4
s
fadd
v11
.4
s
,
v11
.4
s
,
v17
.4
s
fadd
v12
.4
s
,
v12
.4
s
,
v16
.4
s
fadd
v13
.4
s
,
v13
.4
s
,
v17
.4
s
fadd
v14
.4
s
,
v14
.4
s
,
v16
.4
s
fadd
v15
.4
s
,
v15
.4
s
,
v17
.4
s
cmp
w7
,
#
2
beq
Relu6_8x8
cmp
w7
,
#
1
beq
Relu_8x8
b
Write_8x8
Relu6_8x8
:
fmin
v0
.4
s
,
v0
.4
s
,
v26
.4
s
fmin
v1
.4
s
,
v1
.4
s
,
v26
.4
s
fmin
v2
.4
s
,
v2
.4
s
,
v26
.4
s
fmin
v3
.4
s
,
v3
.4
s
,
v26
.4
s
fmin
v4
.4
s
,
v4
.4
s
,
v26
.4
s
fmin
v5
.4
s
,
v5
.4
s
,
v26
.4
s
fmin
v6
.4
s
,
v6
.4
s
,
v26
.4
s
fmin
v7
.4
s
,
v7
.4
s
,
v26
.4
s
fmin
v8
.4
s
,
v8
.4
s
,
v26
.4
s
fmin
v9
.4
s
,
v9
.4
s
,
v26
.4
s
fmin
v10
.4
s
,
v10
.4
s
,
v26
.4
s
fmin
v11
.4
s
,
v11
.4
s
,
v26
.4
s
fmin
v12
.4
s
,
v12
.4
s
,
v26
.4
s
fmin
v13
.4
s
,
v13
.4
s
,
v26
.4
s
fmin
v14
.4
s
,
v14
.4
s
,
v26
.4
s
fmin
v15
.4
s
,
v15
.4
s
,
v26
.4
s
Relu_8x8
:
fmax
v0
.4
s
,
v0
.4
s
,
v27
.4
s
fmax
v1
.4
s
,
v1
.4
s
,
v27
.4
s
fmax
v2
.4
s
,
v2
.4
s
,
v27
.4
s
fmax
v3
.4
s
,
v3
.4
s
,
v27
.4
s
fmax
v4
.4
s
,
v4
.4
s
,
v27
.4
s
fmax
v5
.4
s
,
v5
.4
s
,
v27
.4
s
fmax
v6
.4
s
,
v6
.4
s
,
v27
.4
s
fmax
v7
.4
s
,
v7
.4
s
,
v27
.4
s
fmax
v8
.4
s
,
v8
.4
s
,
v27
.4
s
fmax
v9
.4
s
,
v9
.4
s
,
v27
.4
s
fmax
v10
.4
s
,
v10
.4
s
,
v27
.4
s
fmax
v11
.4
s
,
v11
.4
s
,
v27
.4
s
fmax
v12
.4
s
,
v12
.4
s
,
v27
.4
s
fmax
v13
.4
s
,
v13
.4
s
,
v27
.4
s
fmax
v14
.4
s
,
v14
.4
s
,
v27
.4
s
fmax
v15
.4
s
,
v15
.4
s
,
v27
.4
s
Write_8x8
:
st1
{
v0
.4
s
,
v1
.4
s
},
[
x25
],
x6
st1
{
v2
.4
s
,
v3
.4
s
},
[
x25
],
x6
st1
{
v4
.4
s
,
v5
.4
s
},
[
x25
],
x6
st1
{
v6
.4
s
,
v7
.4
s
},
[
x25
],
x6
st1
{
v8
.4
s
,
v9
.4
s
},
[
x25
],
x6
st1
{
v10
.4
s
,
v11
.4
s
},
[
x25
],
x6
st1
{
v12
.4
s
,
v13
.4
s
},
[
x25
],
x6
st1
{
v14
.4
s
,
v15
.4
s
},
[
x25
],
x6
b
Loop8x8
Loop_4x8
:
cmp
w13
,
#
4
blt
Loop_1x8
sub
w13
,
w13
,
#
4
ld1
{
v0
.4
s
,
v1
.4
s
,
v2
.4
s
,
v3
.4
s
},
[
x1
],
#
64
ld1
{
v4
.4
s
,
v5
.4
s
,
v6
.4
s
,
v7
.4
s
},
[
x1
],
#
64
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
fadd
v1
.4
s
,
v1
.4
s
,
v17
.4
s
fadd
v2
.4
s
,
v2
.4
s
,
v16
.4
s
fadd
v3
.4
s
,
v3
.4
s
,
v17
.4
s
fadd
v4
.4
s
,
v4
.4
s
,
v16
.4
s
fadd
v5
.4
s
,
v5
.4
s
,
v17
.4
s
fadd
v6
.4
s
,
v6
.4
s
,
v16
.4
s
fadd
v7
.4
s
,
v7
.4
s
,
v17
.4
s
cmp
w7
,
#
2
beq
Relu6_4x8
cmp
w7
,
#
1
beq
Relu_4x8
b
Write_4x8
Relu6_4x8
:
fmin
v0
.4
s
,
v0
.4
s
,
v26
.4
s
fmin
v1
.4
s
,
v1
.4
s
,
v26
.4
s
fmin
v2
.4
s
,
v2
.4
s
,
v26
.4
s
fmin
v3
.4
s
,
v3
.4
s
,
v26
.4
s
fmin
v4
.4
s
,
v4
.4
s
,
v26
.4
s
fmin
v5
.4
s
,
v5
.4
s
,
v26
.4
s
fmin
v6
.4
s
,
v6
.4
s
,
v26
.4
s
fmin
v7
.4
s
,
v7
.4
s
,
v26
.4
s
Relu_4x8
:
fmax
v0
.4
s
,
v0
.4
s
,
v27
.4
s
fmax
v1
.4
s
,
v1
.4
s
,
v27
.4
s
fmax
v2
.4
s
,
v2
.4
s
,
v27
.4
s
fmax
v3
.4
s
,
v3
.4
s
,
v27
.4
s
fmax
v4
.4
s
,
v4
.4
s
,
v27
.4
s
fmax
v5
.4
s
,
v5
.4
s
,
v27
.4
s
fmax
v6
.4
s
,
v6
.4
s
,
v27
.4
s
fmax
v7
.4
s
,
v7
.4
s
,
v27
.4
s
Write_4x8
:
st1
{
v0
.4
s
,
v1
.4
s
},
[
x25
],
x6
st1
{
v2
.4
s
,
v3
.4
s
},
[
x25
],
x6
st1
{
v4
.4
s
,
v5
.4
s
},
[
x25
],
x6
st1
{
v6
.4
s
,
v7
.4
s
},
[
x25
],
x6
Loop_1x8
:
cmp
w7
,
#
2
beq
Relu6_1x8
cmp
w7
,
#
1
beq
Relu_1x8
b
Write_1x8
Relu6_1x8
:
cmp
w13
,
#
0
beq
Loop_C8
sub
w13
,
w13
,
#
1
ld1
{
v0
.4
s
,
v1
.4
s
},
[
x1
],
#
32
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
fadd
v1
.4
s
,
v1
.4
s
,
v17
.4
s
fmin
v0
.4
s
,
v0
.4
s
,
v26
.4
s
fmin
v1
.4
s
,
v1
.4
s
,
v26
.4
s
fmax
v0
.4
s
,
v0
.4
s
,
v27
.4
s
fmax
v1
.4
s
,
v1
.4
s
,
v27
.4
s
st1
{
v0
.4
s
,
v1
.4
s
},
[
x25
],
x6
b
Relu6_1x8
Relu_1x8
:
cmp
w13
,
#
0
beq
Loop_C8
sub
w13
,
w13
,
#
1
ld1
{
v0
.4
s
,
v1
.4
s
},
[
x1
],
#
32
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
fadd
v1
.4
s
,
v1
.4
s
,
v17
.4
s
fmax
v0
.4
s
,
v0
.4
s
,
v27
.4
s
fmax
v1
.4
s
,
v1
.4
s
,
v27
.4
s
st1
{
v0
.4
s
,
v1
.4
s
},
[
x25
],
x6
b
Relu_1x8
Write_1x8
:
cmp
w13
,
#
0
beq
Loop_C8
sub
w13
,
w13
,
#
1
ld1
{
v0
.4
s
,
v1
.4
s
},
[
x1
],
#
32
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
fadd
v1
.4
s
,
v1
.4
s
,
v17
.4
s
st1
{
v0
.4
s
,
v1
.4
s
},
[
x25
],
x6
b
Write_1x8
Loop_C1
:
cmp
x4
,
#
0
beq
End
mov
w13
,
w5
ld1
{
v16
.4
s
,
v17
.4
s
},
[
x2
],
#
32
cmp
x4
,
#
1
beq
Loop_C1_1
cmp
x4
,
#
2
beq
Loop_C1_2
cmp
x4
,
#
3
beq
Loop_C1_3
cmp
x4
,
#
4
beq
Loop_C1_4
cmp
x4
,
#
5
beq
Loop_C1_5
cmp
x4
,
#
6
beq
Loop_C1_6
cmp
x4
,
#
7
beq
Loop_C1_7
Loop_C1_1
:
cmp
w7
,
#
2
beq
Loop_C1_1_Relu6
cmp
w7
,
#
1
beq
Loop_C1_1_Relu
b
Loop_C1_1_Write
Loop_C1_1_Relu6
:
cmp
w13
,
#
0
beq
End
sub
w13
,
w13
,
#
1
ld1
{
v0
.4
s
,
v1
.4
s
},
[
x1
],
#
32
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
fmin
v0
.4
s
,
v0
.4
s
,
v26
.4
s
fmax
v0
.4
s
,
v0
.4
s
,
v27
.4
s
str
s0
,
[
x0
]
add
x0
,
x0
,
x6
b
Loop_C1_1_Relu6
Loop_C1_1_Relu
:
cmp
w13
,
#
0
beq
End
sub
w13
,
w13
,
#
1
ld1
{
v0
.4
s
,
v1
.4
s
},
[
x1
],
#
32
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
fmax
v0
.4
s
,
v0
.4
s
,
v27
.4
s
str
s0
,
[
x0
]
add
x0
,
x0
,
x6
b
Loop_C1_1_Relu
Loop_C1_1_Write
:
cmp
w13
,
#
0
beq
End
sub
w13
,
w13
,
#
1
ld1
{
v0
.4
s
,
v1
.4
s
},
[
x1
],
#
32
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
str
s0
,
[
x0
]
add
x0
,
x0
,
x6
b
Loop_C1_1_Write
Loop_C1_2
:
cmp
w7
,
#
2
beq
Loop_C1_2_Relu6
cmp
w7
,
#
1
beq
Loop_C1_2_Relu
b
Loop_C1_2_Write
Loop_C1_2_Relu6
:
cmp
w13
,
#
0
beq
End
sub
w13
,
w13
,
#
1
ld1
{
v0
.4
s
,
v1
.4
s
},
[
x1
],
#
32
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
fmin
v0
.4
s
,
v0
.4
s
,
v26
.4
s
fmax
v0
.4
s
,
v0
.4
s
,
v27
.4
s
dup
s1
,
v0
.
s
[
1
]
stp
s0
,
s1
,
[
x0
]
add
x0
,
x0
,
x6
b
Loop_C1_2_Relu6
Loop_C1_2_Relu
:
cmp
w13
,
#
0
beq
End
sub
w13
,
w13
,
#
1
ld1
{
v0
.4
s
,
v1
.4
s
},
[
x1
],
#
32
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
fmax
v0
.4
s
,
v0
.4
s
,
v27
.4
s
dup
s1
,
v0
.
s
[
1
]
stp
s0
,
s1
,
[
x0
]
add
x0
,
x0
,
x6
b
Loop_C1_2_Relu
Loop_C1_2_Write
:
cmp
w13
,
#
0
beq
End
sub
w13
,
w13
,
#
1
ld1
{
v0
.4
s
,
v1
.4
s
},
[
x1
],
#
32
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
dup
s1
,
v0
.
s
[
1
]
stp
s0
,
s1
,
[
x0
]
add
x0
,
x0
,
x6
b
Loop_C1_2_Write
Loop_C1_3
:
add
x25
,
x0
,
#
8
cmp
w7
,
#
2
beq
Loop_C1_3_Relu6
cmp
w7
,
#
1
beq
Loop_C1_3_Relu
b
Loop_C1_3_Write
Loop_C1_3_Relu6
:
cmp
w13
,
#
0
beq
End
sub
w13
,
w13
,
#
1
ld1
{
v0
.4
s
,
v1
.4
s
},
[
x1
],
#
32
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
fmin
v0
.4
s
,
v0
.4
s
,
v26
.4
s
fmax
v0
.4
s
,
v0
.4
s
,
v27
.4
s
dup
s1
,
v0
.
s
[
1
]
stp
s0
,
s1
,
[
x0
]
add
x0
,
x0
,
x6
st1
{
v0
.
s
}[
2
],
[
x25
],
x6
b
Loop_C1_3_Relu6
Loop_C1_3_Relu
:
cmp
w13
,
#
0
beq
End
sub
w13
,
w13
,
#
1
ld1
{
v0
.4
s
,
v1
.4
s
},
[
x1
],
#
32
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
fmax
v0
.4
s
,
v0
.4
s
,
v27
.4
s
dup
s1
,
v0
.
s
[
1
]
stp
s0
,
s1
,
[
x0
]
add
x0
,
x0
,
x6
st1
{
v0
.
s
}[
2
],
[
x25
],
x6
b
Loop_C1_3_Relu
Loop_C1_3_Write
:
cmp
w13
,
#
0
beq
End
sub
w13
,
w13
,
#
1
ld1
{
v0
.4
s
,
v1
.4
s
},
[
x1
],
#
32
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
dup
s1
,
v0
.
s
[
1
]
stp
s0
,
s1
,
[
x0
]
add
x0
,
x0
,
x6
st1
{
v0
.
s
}[
2
],
[
x25
],
x6
b
Loop_C1_3_Write
Loop_C1_4
:
cmp
w7
,
#
2
beq
Loop_C1_4_Relu6
cmp
w7
,
#
1
beq
Loop_C1_4_Relu
b
Loop_C1_4_Write
Loop_C1_4_Relu6
:
cmp
w13
,
#
0
beq
End
sub
w13
,
w13
,
#
1
ld1
{
v0
.4
s
,
v1
.4
s
},
[
x1
],
#
32
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
fmin
v0
.4
s
,
v0
.4
s
,
v26
.4
s
fmax
v0
.4
s
,
v0
.4
s
,
v27
.4
s
st1
{
v0
.4
s
},
[
x0
],
x6
b
Loop_C1_4_Relu6
Loop_C1_4_Relu
:
cmp
w13
,
#
0
beq
End
sub
w13
,
w13
,
#
1
ld1
{
v0
.4
s
,
v1
.4
s
},
[
x1
],
#
32
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
fmax
v0
.4
s
,
v0
.4
s
,
v27
.4
s
st1
{
v0
.4
s
},
[
x0
],
x6
b
Loop_C1_4_Relu6
Loop_C1_4_Write
:
cmp
w13
,
#
0
beq
End
sub
w13
,
w13
,
#
1
ld1
{
v0
.4
s
,
v1
.4
s
},
[
x1
],
#
32
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
st1
{
v0
.4
s
},
[
x0
],
x6
b
Loop_C1_4_Write
Loop_C1_5
:
add
x25
,
x0
,
#
16
cmp
w7
,
#
2
beq
Loop_C1_5_Relu6
cmp
w7
,
#
1
beq
Loop_C1_5_Relu
b
Loop_C1_5_Write
Loop_C1_5_Relu6
:
cmp
w13
,
#
0
beq
End
sub
w13
,
w13
,
#
1
ld1
{
v0
.4
s
,
v1
.4
s
},
[
x1
],
#
32
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
fadd
v1
.4
s
,
v1
.4
s
,
v17
.4
s
fmin
v0
.4
s
,
v0
.4
s
,
v26
.4
s
fmin
v1
.4
s
,
v1
.4
s
,
v26
.4
s
fmax
v0
.4
s
,
v0
.4
s
,
v27
.4
s
fmax
v1
.4
s
,
v1
.4
s
,
v27
.4
s
st1
{
v0
.4
s
},
[
x0
],
x6
str
s1
,
[
x25
]
add
x25
,
x25
,
x6
b
Loop_C1_5_Relu6
Loop_C1_5_Relu
:
cmp
w13
,
#
0
beq
End
sub
w13
,
w13
,
#
1
ld1
{
v0
.4
s
,
v1
.4
s
},
[
x1
],
#
32
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
fadd
v1
.4
s
,
v1
.4
s
,
v17
.4
s
fmax
v0
.4
s
,
v0
.4
s
,
v27
.4
s
fmax
v1
.4
s
,
v1
.4
s
,
v27
.4
s
st1
{
v0
.4
s
},
[
x0
],
x6
str
s1
,
[
x25
]
add
x25
,
x25
,
x6
b
Loop_C1_5_Relu
Loop_C1_5_Write
:
cmp
w13
,
#
0
beq
End
sub
w13
,
w13
,
#
1
ld1
{
v0
.4
s
,
v1
.4
s
},
[
x1
],
#
32
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
fadd
v1
.4
s
,
v1
.4
s
,
v17
.4
s
st1
{
v0
.4
s
},
[
x0
],
x6
str
s1
,
[
x25
]
add
x25
,
x25
,
x6
b
Loop_C1_5_Write
Loop_C1_6
:
add
x25
,
x0
,
#
16
cmp
w7
,
#
2
beq
Loop_C1_6_Relu6
cmp
w7
,
#
1
beq
Loop_C1_6_Relu
b
Loop_C1_6_Write
Loop_C1_6_Relu6
:
cmp
w13
,
#
0
beq
End
sub
w13
,
w13
,
#
1
ld1
{
v0
.4
s
,
v1
.4
s
},
[
x1
],
#
32
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
fadd
v1
.4
s
,
v1
.4
s
,
v17
.4
s
fmin
v0
.4
s
,
v0
.4
s
,
v26
.4
s
fmin
v1
.4
s
,
v1
.4
s
,
v26
.4
s
fmax
v0
.4
s
,
v0
.4
s
,
v27
.4
s
fmax
v1
.4
s
,
v1
.4
s
,
v27
.4
s
st1
{
v0
.4
s
},
[
x0
],
x6
dup
s0
,
v1
.
s
[
1
]
stp
s1
,
s0
,
[
x25
]
add
x25
,
x25
,
x6
b
Loop_C1_6_Relu6
Loop_C1_6_Relu
:
cmp
w13
,
#
0
beq
End
sub
w13
,
w13
,
#
1
ld1
{
v0
.4
s
,
v1
.4
s
},
[
x1
],
#
32
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
fadd
v1
.4
s
,
v1
.4
s
,
v17
.4
s
fmax
v0
.4
s
,
v0
.4
s
,
v27
.4
s
fmax
v1
.4
s
,
v1
.4
s
,
v27
.4
s
st1
{
v0
.4
s
},
[
x0
],
x6
dup
s0
,
v1
.
s
[
1
]
stp
s1
,
s0
,
[
x25
]
add
x25
,
x25
,
x6
b
Loop_C1_6_Relu
Loop_C1_6_Write
:
cmp
w13
,
#
0
beq
End
sub
w13
,
w13
,
#
1
ld1
{
v0
.4
s
,
v1
.4
s
},
[
x1
],
#
32
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
fadd
v1
.4
s
,
v1
.4
s
,
v17
.4
s
st1
{
v0
.4
s
},
[
x0
],
x6
dup
s0
,
v1
.
s
[
1
]
stp
s1
,
s0
,
[
x25
]
add
x25
,
x25
,
x6
b
Loop_C1_6_Write
Loop_C1_7
:
add
x25
,
x0
,
#
16
add
x24
,
x0
,
#
24
cmp
w7
,
#
2
beq
Loop_C1_7_Relu6
cmp
w7
,
#
1
beq
Loop_C1_7_Relu
b
Loop_C1_7_Write
Loop_C1_7_Relu6
:
cmp
w13
,
#
0
beq
End
sub
w13
,
w13
,
#
1
ld1
{
v0
.4
s
,
v1
.4
s
},
[
x1
],
#
32
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
fadd
v1
.4
s
,
v1
.4
s
,
v17
.4
s
fmin
v0
.4
s
,
v0
.4
s
,
v26
.4
s
fmin
v1
.4
s
,
v1
.4
s
,
v26
.4
s
fmax
v0
.4
s
,
v0
.4
s
,
v27
.4
s
fmax
v1
.4
s
,
v1
.4
s
,
v27
.4
s
st1
{
v0
.4
s
},
[
x0
],
x6
dup
s0
,
v1
.
s
[
1
]
stp
s1
,
s0
,
[
x25
]
add
x25
,
x25
,
x6
st1
{
v1
.
s
}[
2
],
[
x24
],
x6
b
Loop_C1_7_Relu6
Loop_C1_7_Relu
:
cmp
w13
,
#
0
beq
End
sub
w13
,
w13
,
#
1
ld1
{
v0
.4
s
,
v1
.4
s
},
[
x1
],
#
32
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
fadd
v1
.4
s
,
v1
.4
s
,
v17
.4
s
fmax
v0
.4
s
,
v0
.4
s
,
v27
.4
s
fmax
v1
.4
s
,
v1
.4
s
,
v27
.4
s
st1
{
v0
.4
s
},
[
x0
],
x6
dup
s0
,
v1
.
s
[
1
]
stp
s1
,
s0
,
[
x25
]
add
x25
,
x25
,
x6
st1
{
v1
.
s
}[
2
],
[
x24
],
x6
b
Loop_C1_7_Relu
Loop_C1_7_Write
:
cmp
w13
,
#
0
beq
End
sub
w13
,
w13
,
#
1
ld1
{
v0
.4
s
,
v1
.4
s
},
[
x1
],
#
32
fadd
v0
.4
s
,
v0
.4
s
,
v16
.4
s
fadd
v1
.4
s
,
v1
.4
s
,
v17
.4
s
st1
{
v0
.4
s
},
[
x0
],
x6
dup
s0
,
v1
.
s
[
1
]
stp
s1
,
s0
,
[
x25
]
add
x25
,
x25
,
x6
st1
{
v1
.
s
}[
2
],
[
x24
],
x6
b
Loop_C1_7_Write
End
:
ret
#endif
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/common_func.c
浏览文件 @
f0988c7b
...
...
@@ -113,6 +113,15 @@ void PostConvFuncFp32C4(const float *c4_out_ptr, float *out_ptr, const float *bi
void
PostConvFuncFp32C8
(
const
float
*
c8_out_ptr
,
float
*
out_ptr
,
const
float
*
bias_ptr
,
size_t
output_channel
,
size_t
plane_size
,
size_t
stride
,
bool
is_relu
,
bool
is_relu6
)
{
#ifndef ENABLE_ARM64
PostConvFuncComm
(
c8_out_ptr
,
out_ptr
,
bias_ptr
,
output_channel
,
plane_size
,
stride
,
is_relu
,
is_relu6
,
C8NUM
);
#else
size_t
oc8mod
=
output_channel
%
C8NUM
;
size_t
oc8div
=
output_channel
-
oc8mod
;
size_t
stride_size
=
stride
*
sizeof
(
float
);
size_t
relu_type
=
is_relu
?
1
:
0
;
relu_type
=
is_relu6
?
2
:
relu_type
;
PostFuncBiasReluC8
(
out_ptr
,
c8_out_ptr
,
bias_ptr
,
oc8div
,
oc8mod
,
plane_size
,
stride_size
,
relu_type
);
#endif
return
;
}
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/common_func.h
浏览文件 @
f0988c7b
...
...
@@ -61,6 +61,8 @@ void C4Relu6(float *dst, const float *input, size_t oc, size_t plane_size, size_
void
ConvDwFp32Border
(
float
*
dst
,
const
float
*
src
,
const
float
*
weight
,
const
float
*
bias
,
size_t
height
,
size_t
width
,
size_t
in_kh_step
,
size_t
in_kw_step
,
size_t
kernel_w
,
size_t
relu
,
size_t
relu6
);
void
PostFuncBiasReluC8
(
float
*
dst
,
const
float
*
src
,
const
float
*
bias
,
size_t
oc8div
,
size_t
oc8mod
,
size_t
plane_size
,
size_t
stride
,
size_t
relu_type
);
#endif
#ifdef __cplusplus
...
...
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/deconv.c
浏览文件 @
f0988c7b
...
...
@@ -33,24 +33,27 @@ void PackDeConvWeightFp32(const float *weight, float *dst, int input_channel, in
return
;
}
int
DeConvFp32
(
const
float
*
input
,
const
float
*
weight
,
float
*
output
,
float
*
tmp_buffer
,
StrassenMatMulParameter
matmul_param
)
{
return
StrassenMatmul
(
input
,
weight
,
output
,
&
matmul_param
,
FP32_STRASSEN_MAX_RECURSION
,
0
,
tmp_buffer
);
}
int
DeConvPostFp32C8x8
(
const
float
*
src
,
float
*
tmp
,
const
float
*
bias
,
float
*
dst
,
int
output_channel
,
ConvParameter
*
conv_param
)
{
/* row8x8-major(ih*iw x oc*kh*kw) -> row8-major(oh*ow x oc) */
size_t
input_plane
=
conv_param
->
input_w_
*
conv_param
->
input_h_
;
size_t
kernel_plane
=
conv_param
->
kernel_w_
*
conv_param
->
kernel_h_
;
size_t
output_plane
=
conv_param
->
output_w_
*
conv_param
->
output_h_
;
int
oc8
=
UP_
DIV
(
output_channel
,
C8NUM
);
int
oc8
=
UP_
ROUND
(
output_channel
,
C8NUM
);
int
in_plane8
=
UP_ROUND
(
input_plane
,
C8NUM
);
int
src_iw_stride
=
C8NUM
;
int
src_ih_stride
=
conv_param
->
input_w_
*
C8NUM
;
int
src_kw_stride
=
in_plane8
*
C8NUM
;
int
src_kh_stride
=
in_plane8
*
conv_param
->
kernel_w_
*
C8NUM
;
int
dst_oh_stride
=
conv_param
->
output_w_
*
C8NUM
;
int
dst_ow_stride
=
C8NUM
;
int
dst_kh_stride
=
conv_param
->
dilation_h_
*
conv_param
->
output_w_
*
C8NUM
;
int
dst_kw_stride
=
conv_param
->
dilation_w_
*
C8NUM
;
for
(
int
c
=
0
;
c
<
oc8
;
c
++
)
{
float
*
dst_ptr
=
tmp
+
c
*
output_plane
*
C8NUM
;
const
float
*
src_ptr
=
src
+
c
*
in_plane8
*
kernel_plane
*
C8NUM
;
memset
(
dst_ptr
,
0
,
output_plane
*
C8NUM
*
sizeof
(
int32_
t
));
for
(
int
c
=
0
;
c
<
oc8
;
c
+=
8
)
{
float
*
dst_ptr
=
tmp
+
c
*
output_plane
;
const
float
*
src_ptr
=
src
+
c
*
in_plane8
*
kernel_plane
;
memset
(
dst_ptr
,
0
,
output_plane
*
C8NUM
*
sizeof
(
floa
t
));
for
(
int
ih
=
0
;
ih
<
conv_param
->
input_h_
;
ih
++
)
{
for
(
int
iw
=
0
;
iw
<
conv_param
->
input_w_
;
iw
++
)
{
...
...
@@ -63,14 +66,31 @@ int DeConvPostFp32C8x8(const float *src, float *tmp, const float *bias, float *d
int
kw_end
=
MSMIN
(
conv_param
->
kernel_w_
,
UP_DIV
(
conv_param
->
output_w_
-
ow
,
conv_param
->
dilation_w_
));
for
(
int
kh
=
kh_start
;
kh
<
kh_end
;
kh
++
)
{
for
(
int
kw
=
kw_start
;
kw
<
kw_end
;
kw
++
)
{
int
src_index
=
ih
*
conv_param
->
input_w_
*
C8NUM
+
iw
*
C8NUM
+
kh
*
in_plane8
*
conv_param
->
kernel_w_
*
C8NUM
+
kw
*
in_plane8
*
C8NUM
;
int
dst_index
=
oh
*
conv_param
->
output_w_
*
C8NUM
+
ow
*
C8NUM
+
kh
*
conv_param
->
dilation_h_
*
conv_param
->
output_w_
*
C8NUM
+
kw
*
conv_param
->
dilation_w_
*
C8NUM
;
int
src_index
=
ih
*
src_ih_stride
+
iw
*
src_iw_stride
+
kh
*
src_kh_stride
+
kw
*
src_kw_stride
;
int
dst_index
=
oh
*
dst_oh_stride
+
ow
*
dst_ow_stride
+
kh
*
dst_kh_stride
+
kw
*
dst_kw_stride
;
float
*
tmp_dst
=
dst_ptr
+
dst_index
;
float
*
tmp_src
=
src_ptr
+
src_index
;
#ifdef ENABLE_ARM64
asm
volatile
(
"mov x0, %[tmp_src]
\n
"
"mov x1, %[tmp_dst]
\n
"
"ld1 {v0.4s, v1.4s}, [x0]
\n
"
"ld1 {v2.4s, v3.4s}, [x1]
\n
"
"fadd v0.4s, v0.4s, v2.4s
\n
"
"fadd v1.4s, v1.4s, v3.4s
\n
"
"st1 {v0.4s, v1.4s}, [x1]
\n
"
:
:
[
tmp_src
]
"r"
(
tmp_src
),
[
tmp_dst
]
"r"
(
tmp_dst
)
:
"x0"
,
"x1"
,
"v0"
,
"v1"
,
"v2"
,
"v3"
);
#else
for
(
int
i
=
0
;
i
<
C8NUM
;
i
++
)
{
dst_ptr
[
dst_index
+
i
]
+=
src_ptr
[
src_index
+
i
];
tmp_dst
[
i
]
+=
tmp_src
[
i
];
}
#endif
}
/*kw*/
}
/*kh*/
}
/*iw*/
...
...
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/deconv.h
浏览文件 @
f0988c7b
...
...
@@ -26,9 +26,6 @@ extern "C" {
#endif
void
PackDeConvWeightFp32
(
const
float
*
weight
,
float
*
dst
,
int
input_channel
,
int
output_channel
,
int
plane
);
int
DeConvFp32
(
const
float
*
input
,
const
float
*
weight
,
float
*
output
,
float
*
tmp_buffer
,
StrassenMatMulParameter
matmul_param
);
int
DeConvPostFp32C4
(
const
float
*
src
,
float
*
tmp_c4
,
float
*
dst
,
const
float
*
bias
,
int
output_channel
,
int
input_plane
,
int
kernel_plane
,
int
output_plane
,
ConvParameter
*
conv_param
);
int
DeConvPostFp32C8x8
(
const
float
*
src
,
float
*
tmp_out
,
const
float
*
bias
,
float
*
dst
,
int
output_channel
,
...
...
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/conv1x1_fp32_tests.cc
浏览文件 @
f0988c7b
...
...
@@ -370,35 +370,26 @@ TEST_F(TestConv1x1Fp32, Conv1x1Test2) {
conv1x1
->
Run
();
CompareOutputData
(
reinterpret_cast
<
float
*>
(
outputs_
[
0
]
->
Data
()),
correct
,
total_size
,
0.0001
);
auto
ptr
=
reinterpret_cast
<
float
*>
(
outputs_
[
0
]
->
Data
());
bool
first
=
true
;
for
(
int
i
=
0
;
i
<
total_size
;
i
++
)
{
if
(
fabs
(
ptr
[
i
]
-
correct
[
i
])
>
0.001
&&
first
)
{
printf
(
"%d %f %f
\n
"
,
i
,
ptr
[
i
],
correct
[
i
]);
first
=
false
;
}
/* running warm up */
for
(
int
i
=
0
;
i
<
0
;
i
++
)
{
conv1x1
->
Run
();
}
// /* running warm up */
// for (int i = 0; i < 0; i++) {
// conv1x1->Run();
// }
//
// /* running time cost */
// int loop_count = 1;
// auto time_start = mindspore::lite::GetTimeUs();
// for (int i = 0; i < loop_count; i++) {
// conv1x1->Run();
// }
// auto time_end = mindspore::lite::GetTimeUs();
// auto cost = time_end - time_start;
// uint64_t time_avg = cost / loop_count;
// printf("1x1 average time : %f ms\n", time_avg / 1000.0f);
//
// delete conv_param;
// delete conv1x1;
// for (auto t : inputs_) delete t;
// for (auto t : outputs_) delete t;
// free(correct);
/* running time cost */
int
loop_count
=
1
;
auto
time_start
=
mindspore
::
lite
::
GetTimeUs
();
for
(
int
i
=
0
;
i
<
loop_count
;
i
++
)
{
conv1x1
->
Run
();
}
auto
time_end
=
mindspore
::
lite
::
GetTimeUs
();
auto
cost
=
time_end
-
time_start
;
uint64_t
time_avg
=
cost
/
loop_count
;
printf
(
"1x1 average time : %f ms
\n
"
,
time_avg
/
1000.0
f
);
delete
conv_param
;
delete
conv1x1
;
for
(
auto
t
:
inputs_
)
delete
t
;
for
(
auto
t
:
outputs_
)
delete
t
;
free
(
correct
);
}
}
// namespace mindspore
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/deconvolution_fp32_tests.cc
浏览文件 @
f0988c7b
...
...
@@ -95,6 +95,99 @@ TEST_F(TestDeConvolutionFp32, DeConvWeightC4x4Pack2) {
}
TEST_F
(
TestDeConvolutionFp32
,
PostConvFuncC8Test1
)
{
float
in
[]
=
{
-
9.389655
,
-
5.83877
,
7.5724425
,
-
1.4675674
,
-
2.6300175
,
0
,
0
,
0
,
-
5.456284
,
0.7406984
,
16.965645
,
10.888806
,
-
7.2690716
,
0
,
0
,
0
,
-
0.8614793
,
-
4.404605
,
10.917422
,
0.11158327
,
11.1863365
,
0
,
0
,
0
,
-
5.2733865
,
-
0.96367484
,
-
4.731118
,
-
7.576815
,
-
3.4595785
,
0
,
0
,
0
,
-
6.1621623
,
-
0.6315082
,
-
9.140878
,
9.266748
,
-
8.344107
,
0
,
0
,
0
,
13.644127
,
8.206812
,
7.091153
,
-
0.50162584
,
-
3.792715
,
0
,
0
,
0
,
2.0889723
,
6.6916203
,
-
5.3981733
,
11.997365
,
-
7.0394287
,
0
,
0
,
0
,
-
9.254076
,
-
5.5964484
,
-
5.981469
,
-
0.51114964
,
-
2.7693212
,
0
,
0
,
0
};
float
bias
[]
=
{
0.7429814
,
0.4863214
,
0.9888875
,
0.19727881
,
0.009881007
,
0
,
0
,
0
};
float
out
[
8
]
=
{
0
};
float
no
[]
=
{
-
8.646674
,
-
4.7133026
,
-
0.11849791
,
-
4.530405
,
-
5.419181
,
14.387108
,
2.8319538
,
-
8.511095
};
PostConvFuncFp32C8
(
in
,
out
,
bias
,
1
,
8
,
1
,
false
,
false
);
CompareOutputData
(
out
,
no
,
8
,
0.0001
);
float
relu
[]
=
{
0
,
0
,
0
,
0
,
0
,
14.387108
,
2.8319538
,
0
};
PostConvFuncFp32C8
(
in
,
out
,
bias
,
1
,
8
,
1
,
true
,
false
);
CompareOutputData
(
out
,
relu
,
8
,
0.0001
);
float
corr_relu6
[]
=
{
0
,
0
,
0
,
0
,
0
,
6
,
2.8319538
,
0
};
PostConvFuncFp32C8
(
in
,
out
,
bias
,
1
,
8
,
1
,
false
,
true
);
CompareOutputData
(
out
,
corr_relu6
,
8
,
0.0001
);
}
TEST_F
(
TestDeConvolutionFp32
,
PostConvFuncC8Test2
)
{
float
in
[]
=
{
-
9.389655
,
-
5.83877
,
7.5724425
,
-
1.4675674
,
-
2.6300175
,
0
,
0
,
0
,
-
5.456284
,
0.7406984
,
16.965645
,
10.888806
,
-
7.2690716
,
0
,
0
,
0
,
-
0.8614793
,
-
4.404605
,
10.917422
,
0.11158327
,
11.1863365
,
0
,
0
,
0
,
-
5.2733865
,
-
0.96367484
,
-
4.731118
,
-
7.576815
,
-
3.4595785
,
0
,
0
,
0
,
-
6.1621623
,
-
0.6315082
,
-
9.140878
,
9.266748
,
-
8.344107
,
0
,
0
,
0
,
13.644127
,
8.206812
,
7.091153
,
-
0.50162584
,
-
3.792715
,
0
,
0
,
0
,
2.0889723
,
6.6916203
,
-
5.3981733
,
11.997365
,
-
7.0394287
,
0
,
0
,
0
,
-
9.254076
,
-
5.5964484
,
-
5.981469
,
-
0.51114964
,
-
2.7693212
,
0
,
0
,
0
};
float
bias
[]
=
{
0.7429814
,
0.4863214
,
0.9888875
,
0.19727881
,
0.009881007
,
0
,
0
,
0
};
float
out
[
16
]
=
{
0
};
float
no
[]
=
{
-
8.646674
,
0
,
-
4.7133026
,
0
,
-
0.11849791
,
0
,
-
4.530405
,
0
,
-
5.419181
,
0
,
14.387108
,
0
,
2.8319538
,
0
,
-
8.511095
,
0
};
PostConvFuncFp32C8
(
in
,
out
,
bias
,
1
,
8
,
2
,
false
,
false
);
CompareOutputData
(
out
,
no
,
16
,
0.0001
);
float
relu
[]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
14.387108
,
0
,
2.8319538
,
0
,
0
,
0
};
PostConvFuncFp32C8
(
in
,
out
,
bias
,
1
,
8
,
2
,
true
,
false
);
CompareOutputData
(
out
,
relu
,
16
,
0.0001
);
float
corr_relu6
[]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
6
,
0
,
2.8319538
,
0
,
0
,
0
};
PostConvFuncFp32C8
(
in
,
out
,
bias
,
1
,
8
,
2
,
false
,
true
);
CompareOutputData
(
out
,
corr_relu6
,
16
,
0.0001
);
}
TEST_F
(
TestDeConvolutionFp32
,
PostConvFuncC8Test3
)
{
float
in
[]
=
{
-
9.389655
,
-
5.83877
,
7.5724425
,
-
1.4675674
,
-
2.6300175
,
0
,
0
,
0
,
-
5.456284
,
0.7406984
,
16.965645
,
10.888806
,
-
7.2690716
,
0
,
0
,
0
,
-
0.8614793
,
-
4.404605
,
10.917422
,
0.11158327
,
11.1863365
,
0
,
0
,
0
,
-
5.2733865
,
-
0.96367484
,
-
4.731118
,
-
7.576815
,
-
3.4595785
,
0
,
0
,
0
,
-
6.1621623
,
-
0.6315082
,
-
9.140878
,
9.266748
,
-
8.344107
,
0
,
0
,
0
,
13.644127
,
8.206812
,
7.091153
,
-
0.50162584
,
-
3.792715
,
0
,
0
,
0
,
2.0889723
,
6.6916203
,
-
5.3981733
,
11.997365
,
-
7.0394287
,
0
,
0
,
0
,
-
9.254076
,
-
5.5964484
,
-
5.981469
,
-
0.51114964
,
-
2.7693212
,
0
,
0
,
0
};
float
bias
[]
=
{
0.7429814
,
0.4863214
,
0.9888875
,
0.19727881
,
0.009881007
,
0
,
0
,
0
};
float
out
[
24
]
=
{
0
};
float
no
[]
=
{
-
8.646674
,
-
5.3524485
,
8.56133
,
-
4.7133026
,
1.2270198
,
17.954533
,
-
0.11849791
,
-
3.9182835
,
11.90631
,
-
4.530405
,
-
0.47735345
,
-
3.7422307
,
-
5.419181
,
-
0.14518678
,
-
8.15199
,
14.387108
,
8.693133
,
8.080041
,
2.8319538
,
7.177942
,
-
4.409286
,
-
8.511095
,
-
5.110127
,
-
4.992582
};
PostConvFuncFp32C8
(
in
,
out
,
bias
,
3
,
8
,
3
,
false
,
false
);
CompareOutputData
(
out
,
no
,
24
,
0.0001
);
}
TEST_F
(
TestDeConvolutionFp32
,
PostConvFuncC8Test4
)
{
float
in
[]
=
{
-
9.389655
,
-
5.83877
,
7.5724425
,
-
1.4675674
,
-
2.6300175
,
0
,
0
,
0
,
-
5.456284
,
0.7406984
,
16.965645
,
10.888806
,
-
7.2690716
,
0
,
0
,
0
,
-
0.8614793
,
-
4.404605
,
10.917422
,
0.11158327
,
11.1863365
,
0
,
0
,
0
,
-
5.2733865
,
-
0.96367484
,
-
4.731118
,
-
7.576815
,
-
3.4595785
,
0
,
0
,
0
,
-
6.1621623
,
-
0.6315082
,
-
9.140878
,
9.266748
,
-
8.344107
,
0
,
0
,
0
,
13.644127
,
8.206812
,
7.091153
,
-
0.50162584
,
-
3.792715
,
0
,
0
,
0
,
2.0889723
,
6.6916203
,
-
5.3981733
,
11.997365
,
-
7.0394287
,
0
,
0
,
0
,
-
9.254076
,
-
5.5964484
,
-
5.981469
,
-
0.51114964
,
-
2.7693212
,
0
,
0
,
0
};
float
bias
[]
=
{
0.7429814
,
0.4863214
,
0.9888875
,
0.19727881
,
0.009881007
,
0
,
0
,
0
};
float
out
[
32
]
=
{
0
};
float
co32
[]
=
{
0
,
0
,
0
,
0
,
0
,
1.2270198
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
14.387108
,
8.693133
,
0
,
0
,
2.8319538
,
7.177942
,
0
,
0
,
0
,
0
,
0
,
0
};
PostConvFuncFp32C8
(
in
,
out
,
bias
,
2
,
8
,
4
,
true
,
false
);
CompareOutputData
(
out
,
co32
,
32
,
0.0001
);
float
co32_relu6
[]
=
{
0
,
0
,
6
,
0
,
0
,
1.2270198
,
6
,
6
,
0
,
0
,
6
,
0.3088621
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
6
,
6
,
6
,
6
,
0
,
2.8319538
,
6
,
0
,
6
,
0
,
0
,
0
,
0
};
PostConvFuncFp32C8
(
in
,
out
,
bias
,
4
,
8
,
4
,
false
,
true
);
CompareOutputData
(
out
,
co32_relu6
,
32
,
0.0001
);
}
TEST_F
(
TestDeConvolutionFp32
,
PostConvFuncC8Test5
)
{
float
in
[]
=
{
-
9.389655
,
-
5.83877
,
7.5724425
,
-
1.4675674
,
-
2.6300175
,
0
,
0
,
0
,
-
5.456284
,
0.7406984
,
16.965645
,
10.888806
,
-
7.2690716
,
0
,
0
,
0
,
-
0.8614793
,
-
4.404605
,
10.917422
,
0.11158327
,
11.1863365
,
0
,
0
,
0
,
...
...
@@ -125,14 +218,106 @@ TEST_F(TestDeConvolutionFp32, PostConvFuncC8Test1) {
0
,
0
,
0
,
6
,
0
,
6
,
6
,
6
,
0
,
0
,
2.8319538
,
6
,
0
,
6
,
0
,
0
,
0
,
0
,
0
,
0
};
PostConvFuncFp32C8
(
in
,
out
,
bias
,
5
,
8
,
5
,
false
,
true
);
CompareOutputData
(
out
,
corr_relu6
,
40
,
0.0001
);
}
TEST_F
(
TestDeConvolutionFp32
,
PostConvFuncC8Test6
)
{
float
in
[]
=
{
-
9.389655
,
-
5.83877
,
7.5724425
,
-
1.4675674
,
-
5.456284
,
0.7406984
,
16.965645
,
10.888806
,
-
0.8614793
,
-
4.404605
,
10.917422
,
0.11158327
,
-
5.2733865
,
-
0.96367484
,
-
4.731118
,
-
7.576815
,
-
6.1621623
,
-
0.6315082
,
-
9.140878
,
9.266748
,
13.644127
,
8.206812
,
7.091153
,
-
0.50162584
,
2.0889723
,
6.6916203
,
-
5.3981733
,
11.997365
,
-
9.254076
,
-
5.5964484
,
-
5.981469
,
-
0.51114964
};
float
bias
[]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
float
out
[
24
]
=
{
0
};
float
no_3
[]
=
{
-
9.389655
,
-
5.83877
,
7.5724425
,
0
,
0
,
0
,
-
0.8614793
,
-
4.404605
,
10.917422
,
0
,
0
,
0
,
-
6.1621623
,
-
0.6315082
,
-
9.140878
,
0
,
0
,
0
,
2.0889723
,
6.6916203
,
-
5.3981733
,
0
,
0
,
0
};
PostConvFuncFp32C8
(
in
,
out
,
bias
,
3
,
4
,
6
,
false
,
false
);
CompareOutputData
(
out
,
no_3
,
24
,
0.0001
);
float
no_6
[]
=
{
-
9.389655
,
-
5.83877
,
7.5724425
,
-
1.4675674
,
-
5.456284
,
0.7406984
,
-
0.8614793
,
-
4.404605
,
10.917422
,
0.11158327
,
-
5.2733865
,
-
0.96367484
,
-
6.1621623
,
-
0.6315082
,
-
9.140878
,
9.266748
,
13.644127
,
8.206812
,
2.0889723
,
6.6916203
,
-
5.3981733
,
11.997365
,
-
9.254076
,
-
5.5964484
};
PostConvFuncFp32C8
(
in
,
out
,
bias
,
6
,
4
,
6
,
false
,
false
);
CompareOutputData
(
out
,
no_6
,
24
,
0.0001
);
}
TEST_F
(
TestDeConvolutionFp32
,
PostConvFuncC8Test7
)
{
float
in
[]
=
{
-
9.389655
,
-
5.83877
,
7.5724425
,
-
1.4675674
,
-
5.456284
,
0.7406984
,
16.965645
,
10.888806
,
-
0.8614793
,
-
4.404605
,
10.917422
,
0.11158327
,
-
5.2733865
,
-
0.96367484
,
-
4.731118
,
-
7.576815
,
-
6.1621623
,
-
0.6315082
,
-
9.140878
,
9.266748
,
13.644127
,
8.206812
,
7.091153
,
-
0.50162584
,
2.0889723
,
6.6916203
,
-
5.3981733
,
11.997365
,
-
9.254076
,
-
5.5964484
,
-
5.981469
,
-
0.51114964
};
float
bias
[]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
float
out
[
28
]
=
{
0
};
float
no
[]
=
{
-
9.389655
,
-
5.83877
,
7.5724425
,
-
1.4675674
,
-
5.456284
,
0.7406984
,
16.965645
,
-
0.8614793
,
-
4.404605
,
10.917422
,
0.11158327
,
-
5.2733865
,
-
0.96367484
,
-
4.731118
,
-
6.1621623
,
-
0.6315082
,
-
9.140878
,
9.266748
,
13.644127
,
8.206812
,
7.091153
,
2.0889723
,
6.6916203
,
-
5.3981733
,
11.997365
,
-
9.254076
,
-
5.5964484
,
-
5.981469
};
PostConvFuncFp32C8
(
in
,
out
,
bias
,
7
,
4
,
7
,
false
,
false
);
CompareOutputData
(
out
,
no
,
28
,
0.0001
);
}
TEST_F
(
TestDeConvolutionFp32
,
PostConvFuncC8Test8_2
)
{
float
in
[]
=
{
-
9.389655
,
-
5.83877
,
7.5724425
,
-
1.4675674
,
-
5.456284
,
0.7406984
,
16.965645
,
10.888806
,
-
0.8614793
,
-
4.404605
,
10.917422
,
0.11158327
,
-
5.2733865
,
-
0.96367484
,
-
4.731118
,
-
7.576815
,
-
6.1621623
,
-
0.6315082
,
-
9.140878
,
9.266748
,
13.644127
,
8.206812
,
7.091153
,
-
0.50162584
,
2.0889723
,
6.6916203
,
-
5.3981733
,
11.997365
,
-
9.254076
,
-
5.5964484
,
-
5.981469
,
-
0.51114964
};
float
bias
[]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
float
out
[
28
]
=
{
0
};
float
no
[]
=
{
-
9.389655
,
-
5.83877
,
7.5724425
,
-
1.4675674
,
-
5.456284
,
0.7406984
,
16.965645
,
10.888806
,
-
6.1621623
,
-
0.6315082
,
-
9.140878
,
9.266748
,
13.644127
,
8.206812
,
7.091153
,
-
0.50162584
,
-
0.8614793
,
-
4.404605
,
10.917422
,
0.11158327
,
-
5.2733865
,
-
0.96367484
,
-
4.731118
,
-
7.576815
,
2.0889723
,
6.6916203
,
-
5.3981733
,
11.997365
,
-
9.254076
,
-
5.5964484
,
-
5.981469
,
-
0.51114964
};
PostConvFuncFp32C8
(
in
,
out
,
bias
,
16
,
2
,
16
,
false
,
false
);
CompareOutputData
(
out
,
no
,
28
,
0.0001
);
}
TEST_F
(
TestDeConvolutionFp32
,
PostConvFuncC8Test8_4
)
{
float
in
[]
=
{
-
9.389655
,
-
5.83877
,
7.5724425
,
-
1.4675674
,
-
5.456284
,
0.7406984
,
16.965645
,
10.888806
,
-
0.8614793
,
-
4.404605
,
10.917422
,
0.11158327
,
-
5.2733865
,
-
0.96367484
,
-
4.731118
,
-
7.576815
,
-
6.1621623
,
-
0.6315082
,
-
9.140878
,
9.266748
,
13.644127
,
8.206812
,
7.091153
,
-
0.50162584
,
2.0889723
,
6.6916203
,
-
5.3981733
,
11.997365
,
-
9.254076
,
-
5.5964484
,
-
5.981469
,
-
0.51114964
,
-
9.389655
,
-
5.83877
,
7.5724425
,
-
1.4675674
,
-
5.456284
,
0.7406984
,
16.965645
,
10.888806
,
-
0.8614793
,
-
4.404605
,
10.917422
,
0.11158327
,
-
5.2733865
,
-
0.96367484
,
-
4.731118
,
-
7.576815
,
-
6.1621623
,
-
0.6315082
,
-
9.140878
,
9.266748
,
13.644127
,
8.206812
,
7.091153
,
-
0.50162584
,
2.0889723
,
6.6916203
,
-
5.3981733
,
11.997365
,
-
9.254076
,
-
5.5964484
,
-
5.981469
,
-
0.51114964
};
float
bias
[]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
float
out
[
64
]
=
{
0
};
float
no
[]
=
{
-
9.389655
,
-
5.83877
,
7.5724425
,
-
1.4675674
,
-
5.456284
,
0.7406984
,
16.965645
,
10.888806
,
-
9.389655
,
-
5.83877
,
7.5724425
,
-
1.4675674
,
-
5.456284
,
0.7406984
,
16.965645
,
10.888806
,
-
0.8614793
,
-
4.404605
,
10.917422
,
0.11158327
,
-
5.2733865
,
-
0.96367484
,
-
4.731118
,
-
7.576815
,
-
0.8614793
,
-
4.404605
,
10.917422
,
0.11158327
,
-
5.2733865
,
-
0.96367484
,
-
4.731118
,
-
7.576815
,
-
6.1621623
,
-
0.6315082
,
-
9.140878
,
9.266748
,
13.644127
,
8.206812
,
7.091153
,
-
0.50162584
,
-
6.1621623
,
-
0.6315082
,
-
9.140878
,
9.266748
,
13.644127
,
8.206812
,
7.091153
,
-
0.50162584
,
2.0889723
,
6.6916203
,
-
5.3981733
,
11.997365
,
-
9.254076
,
-
5.5964484
,
-
5.981469
,
-
0.51114964
,
2.0889723
,
6.6916203
,
-
5.3981733
,
11.997365
,
-
9.254076
,
-
5.5964484
,
-
5.981469
,
-
0.51114964
};
PostConvFuncFp32C8
(
in
,
out
,
bias
,
16
,
4
,
16
,
false
,
false
);
CompareOutputData
(
out
,
no
,
64
,
0.0001
);
}
float
nob_relu
[]
=
{
0
,
0
,
7.5724425
,
0
,
0
,
0
,
0.7406984
,
16.965645
,
10.888806
,
0
,
0
,
0
,
10.917422
,
0.11158327
,
11.1863365
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
9.266748
,
0
,
13.644127
,
8.206812
,
7.091153
,
0
,
0
,
2.0889723
,
6.6916203
,
0
,
11.997365
,
0
,
0
,
0
,
0
,
0
,
0
};
PostConvFuncFp32C8
(
in
,
out
,
nullptr
,
5
,
8
,
5
,
true
,
false
);
CompareOutputData
(
out
,
nob_relu
,
40
,
0.0001
);
TEST_F
(
TestDeConvolutionFp32
,
PostConvFuncC8Test8_8
)
{
float
in
[]
=
{
-
9.389655
,
-
5.83877
,
7.5724425
,
-
1.4675674
,
-
5.456284
,
0.7406984
,
16.965645
,
10.888806
,
-
0.8614793
,
-
4.404605
,
10.917422
,
0.11158327
,
-
5.2733865
,
-
0.96367484
,
-
4.731118
,
-
7.576815
,
-
6.1621623
,
-
0.6315082
,
-
9.140878
,
9.266748
,
13.644127
,
8.206812
,
7.091153
,
-
0.50162584
,
2.0889723
,
6.6916203
,
-
5.3981733
,
11.997365
,
-
9.254076
,
-
5.5964484
,
-
5.981469
,
-
0.51114964
,
-
9.389655
,
-
5.83877
,
7.5724425
,
-
1.4675674
,
-
5.456284
,
0.7406984
,
16.965645
,
10.888806
,
-
0.8614793
,
-
4.404605
,
10.917422
,
0.11158327
,
-
5.2733865
,
-
0.96367484
,
-
4.731118
,
-
7.576815
,
-
6.1621623
,
-
0.6315082
,
-
9.140878
,
9.266748
,
13.644127
,
8.206812
,
7.091153
,
-
0.50162584
,
2.0889723
,
6.6916203
,
-
5.3981733
,
11.997365
,
-
9.254076
,
-
5.5964484
,
-
5.981469
,
-
0.51114964
};
float
bias
[]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
float
out
[
64
]
=
{
0
};
float
no
[]
=
{
-
9.389655
,
-
5.83877
,
7.5724425
,
-
1.4675674
,
-
5.456284
,
0.7406984
,
16.965645
,
10.888806
,
-
0.8614793
,
-
4.404605
,
10.917422
,
0.11158327
,
-
5.2733865
,
-
0.96367484
,
-
4.731118
,
-
7.576815
,
-
6.1621623
,
-
0.6315082
,
-
9.140878
,
9.266748
,
13.644127
,
8.206812
,
7.091153
,
-
0.50162584
,
2.0889723
,
6.6916203
,
-
5.3981733
,
11.997365
,
-
9.254076
,
-
5.5964484
,
-
5.981469
,
-
0.51114964
,
-
9.389655
,
-
5.83877
,
7.5724425
,
-
1.4675674
,
-
5.456284
,
0.7406984
,
16.965645
,
10.888806
,
-
0.8614793
,
-
4.404605
,
10.917422
,
0.11158327
,
-
5.2733865
,
-
0.96367484
,
-
4.731118
,
-
7.576815
,
-
6.1621623
,
-
0.6315082
,
-
9.140878
,
9.266748
,
13.644127
,
8.206812
,
7.091153
,
-
0.50162584
,
2.0889723
,
6.6916203
,
-
5.3981733
,
11.997365
,
-
9.254076
,
-
5.5964484
,
-
5.981469
,
-
0.51114964
};
PostConvFuncFp32C8
(
in
,
out
,
bias
,
8
,
8
,
8
,
false
,
false
);
CompareOutputData
(
out
,
no
,
64
,
0.0001
);
}
int
DeConvTestInit1
(
std
::
vector
<
lite
::
tensor
::
Tensor
*>
*
inputs_
,
std
::
vector
<
lite
::
tensor
::
Tensor
*>
*
outputs_
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录