Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
36ebe588
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
36ebe588
编写于
8月 31, 2020
作者:
L
ling
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[MS][LITE][Develop]Pooling optimize
上级
0c5f7377
变更
8
展开全部
隐藏空白更改
内联
并排
Showing
8 changed file
with
123 addition
and
823 deletion
+123
-823
mindspore/lite/nnacl/assembly/arm64/matrix_add.S
mindspore/lite/nnacl/assembly/arm64/matrix_add.S
+0
-103
mindspore/lite/nnacl/assembly/arm64/matrix_sub.S
mindspore/lite/nnacl/assembly/arm64/matrix_sub.S
+0
-105
mindspore/lite/nnacl/fp16/pooling_fp16.c
mindspore/lite/nnacl/fp16/pooling_fp16.c
+51
-70
mindspore/lite/nnacl/fp32/common_func.c
mindspore/lite/nnacl/fp32/common_func.c
+0
-48
mindspore/lite/nnacl/fp32/common_func.h
mindspore/lite/nnacl/fp32/common_func.h
+0
-6
mindspore/lite/nnacl/fp32/pooling.c
mindspore/lite/nnacl/fp32/pooling.c
+57
-460
mindspore/lite/nnacl/fp32/pooling.h
mindspore/lite/nnacl/fp32/pooling.h
+4
-11
mindspore/lite/src/runtime/kernel/arm/fp32/pooling.cc
mindspore/lite/src/runtime/kernel/arm/fp32/pooling.cc
+11
-20
未找到文件。
mindspore/lite/nnacl/assembly/arm64/matrix_add.S
已删除
100644 → 0
浏览文件 @
0c5f7377
#ifdef __aarch64__
.
text
.
align
5
//.
p2align
5
,,
15
.
global
MatrixAdd
#ifndef __APPLE__
.
type
MatrixAdd
,
%
function
#endif
//
void
MatrixAdd
(
const
float
*
matDataA
,
const
float
*
matDataB
,
float
*
matDataC
,
//
size_t
aStride
,
size_t
bStride
,
size_t
cStride
,
size_t
width
,
size_t
height
)
//
Auto
:
x0
:
matDataA
,
x1
:
matDataB
,
x2
:
matDatac
,
//
x3
:
aStride
,
x4
:
bStride
,
x5
:
cStride
,
x6
:
width
,
x7
:
height
MatrixAdd
:
mov
x12
,
#
4
//
sizeof
(
float
)
mul
x3
,
x12
,
x3
mul
x4
,
x12
,
x4
mul
x5
,
x12
,
x5
loopH
:
mov
x8
,
x0
mov
x9
,
x1
mov
x10
,
x2
mov
x11
,
x6
loop16LineIn
:
cmp
x11
,
#
4
blt
L8
sub
x11
,
x11
,
#
4
ld1
{
v0.
4
s
,
v1
.4
s
},
[
x0
],
#
32
ld1
{
v2.
4
s
,
v3
.4
s
},
[
x1
],
#
32
fadd
v4.
4
s
,
v0
.4
s
,
v2
.4
s
fadd
v5.
4
s
,
v1
.4
s
,
v3
.4
s
ld1
{
v6.
4
s
,
v7
.4
s
},
[
x0
],
#
32
ld1
{
v8.
4
s
,
v9
.4
s
},
[
x1
],
#
32
cmp
x11
,
#
4
blt
loop16LineOut
loop16
:
st1
{
v4.
4
s
,
v5
.4
s
},
[
x2
],
#
32
fadd
v10.
4
s
,
v6
.4
s
,
v8
.4
s
fadd
v11.
4
s
,
v7
.4
s
,
v9
.4
s
ld1
{
v0.
4
s
,
v1
.4
s
},
[
x0
],
#
32
ld1
{
v2.
4
s
,
v3
.4
s
},
[
x1
],
#
32
st1
{
v10.
4
s
,
v11
.4
s
},
[
x2
],
#
32
fadd
v4.
4
s
,
v0
.4
s
,
v2
.4
s
fadd
v5.
4
s
,
v1
.4
s
,
v3
.4
s
ld1
{
v6.
4
s
,
v7
.4
s
},
[
x0
],
#
32
ld1
{
v8.
4
s
,
v9
.4
s
},
[
x1
],
#
32
sub
x11
,
x11
,
#
4
cmp
x11
,
#
4
bge
loop16
loop16LineOut
:
st1
{
v4.
4
s
,
v5
.4
s
},
[
x2
],
#
32
fadd
v10.
4
s
,
v6
.4
s
,
v8
.4
s
fadd
v11.
4
s
,
v7
.4
s
,
v9
.4
s
st1
{
v10.
4
s
,
v11
.4
s
},
[
x2
],
#
32
L8
:
cmp
x11
,
#
2
blt
L4
ld1
{
v0.
4
s
,
v1
.4
s
},
[
x0
],
#
32
ld1
{
v2.
4
s
,
v3
.4
s
},
[
x1
],
#
32
fadd
v4.
4
s
,
v0
.4
s
,
v2
.4
s
fadd
v5.
4
s
,
v1
.4
s
,
v3
.4
s
sub
x11
,
x11
,
#
2
st1
{
v4.
4
s
,
v5
.4
s
},
[
x2
],
#
32
cmp
x11
,
#
0
beq
loop16EndLine
L4
:
ld1
{
v0.
4
s
},
[
x0
],
#
16
ld1
{
v1.
4
s
},
[
x1
],
#
16
fadd
v0.
4
s
,
v0
.4
s
,
v1
.4
s
sub
x11
,
x11
,
#
1
st1
{
v0.
4
s
},
[
x2
],
#
16
//
bne
L4
loop16EndLine
:
add
x0
,
x8
,
x3
add
x1
,
x9
,
x4
add
x2
,
x10
,
x5
subs
x7
,
x7
,
#
1
bne
loopH
ret
#endif
mindspore/lite/nnacl/assembly/arm64/matrix_sub.S
已删除
100644 → 0
浏览文件 @
0c5f7377
#ifdef __aarch64__
.
text
.
align
5
//.
p2align
5
,,
15
.
global
MatrixSub
#ifndef __APPLE__
.
type
MatrixSub
,
%
function
#endif
//
void
MatrixSub
(
const
float
*
matDataA
,
const
float
*
matDataB
,
float
*
matDataC
,
//
size_t
aStride
,
size_t
bStride
,
size_t
cStride
,
size_t
width
,
size_t
height
)
//
Auto
:
x0
:
matDataA
,
x1
:
matDataB
,
x2
:
matDatac
,
//
x3
:
aStride
,
x4
:
bStride
,
x5
:
cStride
,
x6
:
width
,
x7
:
height
MatrixSub
:
mov
x12
,
#
4
//
sizeof
(
float
)
mul
x3
,
x12
,
x3
mul
x4
,
x12
,
x4
mul
x5
,
x12
,
x5
loopH
:
mov
x8
,
x0
mov
x9
,
x1
mov
x10
,
x2
mov
x11
,
x6
loop16LineIn
:
cmp
x11
,
#
4
blt
L8
sub
x11
,
x11
,
#
4
ld1
{
v0.
4
s
,
v1
.4
s
},
[
x0
],
#
32
ld1
{
v2.
4
s
,
v3
.4
s
},
[
x1
],
#
32
fsub
v4.
4
s
,
v0
.4
s
,
v2
.4
s
fsub
v5.
4
s
,
v1
.4
s
,
v3
.4
s
ld1
{
v6.
4
s
,
v7
.4
s
},
[
x0
],
#
32
ld1
{
v8.
4
s
,
v9
.4
s
},
[
x1
],
#
32
cmp
x11
,
#
4
blt
loop16LineOut
loop16
:
st1
{
v4.
4
s
,
v5
.4
s
},
[
x2
],
#
32
fsub
v10.
4
s
,
v6
.4
s
,
v8
.4
s
fsub
v11.
4
s
,
v7
.4
s
,
v9
.4
s
ld1
{
v0.
4
s
,
v1
.4
s
},
[
x0
],
#
32
ld1
{
v2.
4
s
,
v3
.4
s
},
[
x1
],
#
32
st1
{
v10.
4
s
,
v11
.4
s
},
[
x2
],
#
32
fsub
v4.
4
s
,
v0
.4
s
,
v2
.4
s
fsub
v5.
4
s
,
v1
.4
s
,
v3
.4
s
ld1
{
v6.
4
s
,
v7
.4
s
},
[
x0
],
#
32
ld1
{
v8.
4
s
,
v9
.4
s
},
[
x1
],
#
32
sub
x11
,
x11
,
#
4
cmp
x11
,
#
4
bge
loop16
loop16LineOut
:
st1
{
v4.
4
s
,
v5
.4
s
},
[
x2
],
#
32
fsub
v10.
4
s
,
v6
.4
s
,
v8
.4
s
fsub
v11.
4
s
,
v7
.4
s
,
v9
.4
s
st1
{
v10.
4
s
,
v11
.4
s
},
[
x2
],
#
32
L8
:
cmp
x11
,
#
2
blt
L4
ld1
{
v0.
4
s
,
v1
.4
s
},
[
x0
],
#
32
ld1
{
v2.
4
s
,
v3
.4
s
},
[
x1
],
#
32
fsub
v4.
4
s
,
v0
.4
s
,
v2
.4
s
fsub
v5.
4
s
,
v1
.4
s
,
v3
.4
s
sub
x11
,
x11
,
#
2
st1
{
v4.
4
s
,
v5
.4
s
},
[
x2
],
#
32
cmp
x11
,
#
0
beq
loop16EndLine
L4
:
ld1
{
v0.
4
s
},
[
x0
],
#
16
ld1
{
v1.
4
s
},
[
x1
],
#
16
fsub
v0.
4
s
,
v0
.4
s
,
v1
.4
s
sub
x11
,
x11
,
#
1
st1
{
v0.
4
s
},
[
x2
],
#
16
loop16EndLine
:
add
x0
,
x8
,
x3
add
x1
,
x9
,
x4
add
x2
,
x10
,
x5
subs
x7
,
x7
,
#
1
bne
loopH
ret
#endif
mindspore/lite/nnacl/fp16/pooling_fp16.c
浏览文件 @
36ebe588
...
...
@@ -51,6 +51,12 @@ void AvgPoolingFp16(const float16_t *input_ptr, float16_t *output_ptr, PoolingPa
int
in_w_index
=
out_w_index
*
stride_w
-
pad_w
;
int
in_h_index
=
out_h_index
*
stride_h
-
pad_h
;
int
out_plane_offset
=
out_batch_offset
+
index
*
channel
;
int
real_win_h_start
=
MSMAX
(
0
,
-
in_h_index
);
int
real_win_h_end
=
MSMIN
(
win_h
,
in_h
-
in_h_index
);
int
resl_win_w_start
=
MSMAX
(
0
,
-
in_w_index
);
int
real_win_w_end
=
MSMIN
(
win_w
,
in_w
-
in_w_index
);
for
(
int
j
=
0
;
j
<
c8
;
j
++
)
{
int
in_channel_offset
=
in_batch_offset
+
j
*
C8NUM
;
int
out_channel_offset
=
out_plane_offset
+
j
*
C8NUM
;
...
...
@@ -60,22 +66,17 @@ void AvgPoolingFp16(const float16_t *input_ptr, float16_t *output_ptr, PoolingPa
float16_t
tmp_avg
[
8
]{
0
};
#endif
int
real_count
=
0
;
for
(
int
h
=
0
;
h
<
win_h
;
h
++
)
{
for
(
int
w
=
0
;
w
<
win_w
;
w
++
)
{
if
((
in_h_index
+
h
)
<
0
||
(
in_h_index
+
h
)
>=
in_h
||
(
in_w_index
+
w
)
<
0
||
(
in_w_index
+
w
)
>=
in_w
)
{
continue
;
}
else
{
int
in_offset
=
in_channel_offset
+
((
in_h_index
+
h
)
*
in_w
+
in_w_index
+
w
)
*
channel
;
for
(
int
h
=
real_win_h_start
;
h
<
real_win_h_end
;
h
++
)
{
for
(
int
w
=
resl_win_w_start
;
w
<
real_win_w_end
;
w
++
)
{
int
in_offset
=
in_channel_offset
+
((
in_h_index
+
h
)
*
in_w
+
in_w_index
+
w
)
*
channel
;
#ifdef ENABLE_NEON
tmp_avg
=
vaddq_f16
(
tmp_avg
,
vld1q_f16
(
input_ptr
+
in_offset
));
tmp_avg
=
vaddq_f16
(
tmp_avg
,
vld1q_f16
(
input_ptr
+
in_offset
));
#else
for
(
int
t
=
0
;
t
<
8
;
t
++
)
{
tmp_avg
[
t
]
+=
*
(
input_ptr
+
in_offset
+
t
);
}
#endif
++
real_count
;
for
(
int
t
=
0
;
t
<
8
;
t
++
)
{
tmp_avg
[
t
]
+=
*
(
input_ptr
+
in_offset
+
t
);
}
#endif
++
real_count
;
}
// win_w loop
}
// win_h loop
#ifdef ENABLE_NEON
...
...
@@ -97,22 +98,17 @@ void AvgPoolingFp16(const float16_t *input_ptr, float16_t *output_ptr, PoolingPa
float16_t
tmp_avg
[
4
]{
0
};
#endif
int
real_count
=
0
;
for
(
int
h
=
0
;
h
<
win_h
;
h
++
)
{
for
(
int
w
=
0
;
w
<
win_w
;
w
++
)
{
if
((
in_h_index
+
h
)
<
0
||
(
in_h_index
+
h
)
>=
in_h
||
(
in_w_index
+
w
)
<
0
||
(
in_w_index
+
w
)
>=
in_w
)
{
continue
;
}
else
{
int
in_offset
=
in_channel_offset
+
((
in_h_index
+
h
)
*
in_w
+
in_w_index
+
w
)
*
channel
;
for
(
int
h
=
real_win_h_start
;
h
<
real_win_h_end
;
h
++
)
{
for
(
int
w
=
resl_win_w_start
;
w
<
real_win_w_end
;
w
++
)
{
int
in_offset
=
in_channel_offset
+
((
in_h_index
+
h
)
*
in_w
+
in_w_index
+
w
)
*
channel
;
#ifdef ENABLE_NEON
tmp_avg
=
vadd_f16
(
tmp_avg
,
vld1_f16
(
input_ptr
+
in_offset
));
tmp_avg
=
vadd_f16
(
tmp_avg
,
vld1_f16
(
input_ptr
+
in_offset
));
#else
for
(
int
j
=
0
;
j
<
C4NUM
;
++
j
)
{
tmp_avg
[
j
]
+=
*
(
input_ptr
+
in_offset
);
}
#endif
++
real_count
;
for
(
int
j
=
0
;
j
<
C4NUM
;
++
j
)
{
tmp_avg
[
j
]
+=
*
(
input_ptr
+
in_offset
);
}
#endif
++
real_count
;
}
// win_w loop
}
// win_h loop
#ifdef ENABLE_NEON
...
...
@@ -130,16 +126,11 @@ void AvgPoolingFp16(const float16_t *input_ptr, float16_t *output_ptr, PoolingPa
int
out_channel_offset
=
out_plane_offset
+
k
;
float16_t
tmp_avg
=
0
;
int
real_count
=
0
;
for
(
int
h
=
0
;
h
<
win_h
;
h
++
)
{
for
(
int
w
=
0
;
w
<
win_w
;
w
++
)
{
if
((
in_h_index
+
h
)
<
0
||
(
in_h_index
+
h
)
>=
in_h
||
(
in_w_index
+
w
)
<
0
||
(
in_w_index
+
w
)
>=
in_w
)
{
continue
;
}
else
{
int
in_offset
=
in_channel_offset
+
((
in_h_index
+
h
)
*
in_w
+
in_w_index
+
w
)
*
channel
;
tmp_avg
+=
*
(
input_ptr
+
in_offset
);
++
real_count
;
}
for
(
int
h
=
real_win_h_start
;
h
<
real_win_h_end
;
h
++
)
{
for
(
int
w
=
resl_win_w_start
;
w
<
real_win_w_end
;
w
++
)
{
int
in_offset
=
in_channel_offset
+
((
in_h_index
+
h
)
*
in_w
+
in_w_index
+
w
)
*
channel
;
tmp_avg
+=
*
(
input_ptr
+
in_offset
);
++
real_count
;
}
// win_w loop
}
// win_h loop
*
(
output_ptr
+
out_channel_offset
)
=
tmp_avg
/
(
float16_t
)
real_count
;
...
...
@@ -148,7 +139,6 @@ void AvgPoolingFp16(const float16_t *input_ptr, float16_t *output_ptr, PoolingPa
}
// out_plane loop
}
// out_batch loop
}
void
MaxPoolingFp16
(
const
float16_t
*
input_ptr
,
float16_t
*
output_ptr
,
PoolingParameter
*
pooling_param
,
int
task_id
)
{
int
stride_w
=
pooling_param
->
stride_w_
;
int
stride_h
=
pooling_param
->
stride_h_
;
...
...
@@ -183,6 +173,12 @@ void MaxPoolingFp16(const float16_t *input_ptr, float16_t *output_ptr, PoolingPa
int
in_w_index
=
out_w_index
*
stride_w
-
pad_w
;
int
in_h_index
=
out_h_index
*
stride_h
-
pad_h
;
int
out_plane_offset
=
out_batch_offset
+
index
*
channel
;
int
real_win_h_start
=
MSMAX
(
0
,
-
in_h_index
);
int
real_win_h_end
=
MSMIN
(
win_h
,
in_h
-
in_h_index
);
int
resl_win_w_start
=
MSMAX
(
0
,
-
in_w_index
);
int
real_win_w_end
=
MSMIN
(
win_w
,
in_w
-
in_w_index
);
for
(
int
j
=
0
;
j
<
c8
;
j
++
)
{
int
in_channel_offset
=
in_batch_offset
+
j
*
C8NUM
;
int
out_channel_offset
=
out_plane_offset
+
j
*
C8NUM
;
...
...
@@ -191,21 +187,16 @@ void MaxPoolingFp16(const float16_t *input_ptr, float16_t *output_ptr, PoolingPa
#else
float16_t
tmp_max
[
8
]{
-
FLT_MAX
};
#endif
for
(
int
h
=
0
;
h
<
win_h
;
h
++
)
{
for
(
int
w
=
0
;
w
<
win_w
;
w
++
)
{
if
((
in_h_index
+
h
)
<
0
||
(
in_h_index
+
h
)
>=
in_h
||
(
in_w_index
+
w
)
<
0
||
(
in_w_index
+
w
)
>=
in_w
)
{
continue
;
}
else
{
int
in_offset
=
in_channel_offset
+
((
in_h_index
+
h
)
*
in_w
+
in_w_index
+
w
)
*
channel
;
for
(
int
h
=
real_win_h_start
;
h
<
real_win_h_end
;
h
++
)
{
for
(
int
w
=
resl_win_w_start
;
w
<
real_win_w_end
;
w
++
)
{
int
in_offset
=
in_channel_offset
+
((
in_h_index
+
h
)
*
in_w
+
in_w_index
+
w
)
*
channel
;
#ifdef ENABLE_NEON
tmp_max
=
vmaxq_f16
(
tmp_max
,
vld1q_f16
(
input_ptr
+
in_offset
));
tmp_max
=
vmaxq_f16
(
tmp_max
,
vld1q_f16
(
input_ptr
+
in_offset
));
#else
for
(
int
k
=
0
;
k
<
C8NUM
;
k
++
)
{
tmp_max
[
k
]
=
fmax
(
tmp_max
[
k
],
*
(
input_ptr
+
in_offset
+
k
));
}
#endif
for
(
int
k
=
0
;
k
<
C8NUM
;
k
++
)
{
tmp_max
[
k
]
=
fmax
(
tmp_max
[
k
],
*
(
input_ptr
+
in_offset
+
k
));
}
#endif
}
// win_w loop
}
// win_h loop
#ifdef ENABLE_NEON
...
...
@@ -226,21 +217,16 @@ void MaxPoolingFp16(const float16_t *input_ptr, float16_t *output_ptr, PoolingPa
#else
float16_t
tmp_max
[
4
]{
-
FLT_MAX
};
#endif
for
(
int
h
=
0
;
h
<
win_h
;
h
++
)
{
for
(
int
w
=
0
;
w
<
win_w
;
w
++
)
{
if
((
in_h_index
+
h
)
<
0
||
(
in_h_index
+
h
)
>=
in_h
||
(
in_w_index
+
w
)
<
0
||
(
in_w_index
+
w
)
>=
in_w
)
{
continue
;
}
else
{
int
in_offset
=
in_channel_offset
+
((
in_h_index
+
h
)
*
in_w
+
in_w_index
+
w
)
*
channel
;
for
(
int
h
=
real_win_h_start
;
h
<
real_win_h_end
;
h
++
)
{
for
(
int
w
=
resl_win_w_start
;
w
<
real_win_w_end
;
w
++
)
{
int
in_offset
=
in_channel_offset
+
((
in_h_index
+
h
)
*
in_w
+
in_w_index
+
w
)
*
channel
;
#ifdef ENABLE_NEON
tmp_max
=
vmax_f16
(
tmp_max
,
vld1_f16
(
input_ptr
+
in_offset
));
tmp_max
=
vmax_f16
(
tmp_max
,
vld1_f16
(
input_ptr
+
in_offset
));
#else
for
(
int
k
=
0
;
k
<
C4NUM
;
k
++
)
{
tmp_max
[
k
]
=
fmax
(
tmp_max
[
k
],
*
(
input_ptr
+
in_offset
+
k
));
}
#endif
for
(
int
k
=
0
;
k
<
C4NUM
;
k
++
)
{
tmp_max
[
k
]
=
fmax
(
tmp_max
[
k
],
*
(
input_ptr
+
in_offset
+
k
));
}
#endif
}
// win_w loop
}
// win_h loop
#ifdef ENABLE_NEON
...
...
@@ -257,15 +243,10 @@ void MaxPoolingFp16(const float16_t *input_ptr, float16_t *output_ptr, PoolingPa
int
in_channel_offset
=
in_batch_offset
+
k
;
int
out_channel_offset
=
out_plane_offset
+
k
;
float16_t
tmp_max
=
-
FLT_MAX
;
for
(
int
h
=
0
;
h
<
win_h
;
h
++
)
{
for
(
int
w
=
0
;
w
<
win_w
;
w
++
)
{
if
((
in_h_index
+
h
)
<
0
||
(
in_h_index
+
h
)
>=
in_h
||
(
in_w_index
+
w
)
<
0
||
(
in_w_index
+
w
)
>=
in_w
)
{
continue
;
}
else
{
int
in_offset
=
in_channel_offset
+
((
in_h_index
+
h
)
*
in_w
+
in_w_index
+
w
)
*
channel
;
tmp_max
=
fmax
(
tmp_max
,
*
(
input_ptr
+
in_offset
));
}
for
(
int
h
=
real_win_h_start
;
h
<
real_win_h_end
;
h
++
)
{
for
(
int
w
=
resl_win_w_start
;
w
<
real_win_w_end
;
w
++
)
{
int
in_offset
=
in_channel_offset
+
((
in_h_index
+
h
)
*
in_w
+
in_w_index
+
w
)
*
channel
;
tmp_max
=
fmax
(
tmp_max
,
*
(
input_ptr
+
in_offset
));
}
// win_w loop
}
// win_h loop
*
(
output_ptr
+
out_channel_offset
)
=
tmp_max
;
...
...
mindspore/lite/nnacl/fp32/common_func.c
浏览文件 @
36ebe588
...
...
@@ -15,54 +15,6 @@
*/
#include "nnacl/fp32/common_func.h"
#ifndef ENABLE_ARM64
void
MatrixAdd
(
const
float
*
a_ptr
,
const
float
*
b_ptr
,
float
*
dst
,
size_t
a_stride
,
size_t
b_stride
,
size_t
c_stride
,
size_t
row
,
size_t
col
)
{
for
(
int
r
=
0
;
r
<
row
;
r
++
)
{
for
(
int
c
=
0
;
c
<
col
;
c
++
)
{
int
a_index
=
c
*
a_stride
+
r
*
C4NUM
;
int
b_index
=
c
*
b_stride
+
r
*
C4NUM
;
int
c_index
=
c
*
c_stride
+
r
*
C4NUM
;
for
(
int
i
=
0
;
i
<
C4NUM
;
i
++
)
{
dst
[
c_index
+
i
]
=
a_ptr
[
a_index
+
i
]
+
b_ptr
[
b_index
+
i
];
}
}
}
return
;
}
void
MatrixSub
(
const
float
*
a_ptr
,
const
float
*
b_ptr
,
float
*
dst
,
size_t
a_stride
,
size_t
b_stride
,
size_t
c_stride
,
size_t
row
,
size_t
col
)
{
for
(
int
r
=
0
;
r
<
row
;
r
++
)
{
for
(
int
c
=
0
;
c
<
col
;
c
++
)
{
int
a_index
=
c
*
a_stride
+
r
*
C4NUM
;
int
b_index
=
c
*
b_stride
+
r
*
C4NUM
;
int
c_index
=
c
*
c_stride
+
r
*
C4NUM
;
for
(
int
i
=
0
;
i
<
C4NUM
;
i
++
)
{
dst
[
c_index
+
i
]
=
a_ptr
[
a_index
+
i
]
-
b_ptr
[
b_index
+
i
];
}
}
}
return
;
}
#endif
void
MatrixMultiAdd
(
float
*
c11
,
float
*
c12
,
float
*
c21
,
float
*
c22
,
float
*
x_ptr
,
size_t
row
,
size_t
col
,
size_t
c_stride
,
size_t
x_stride
)
{
/* U2 = P1 + P6 */
MatrixAdd
(
x_ptr
,
c12
,
c12
,
x_stride
,
c_stride
,
c_stride
,
row
,
col
);
/* U3 = U2 + P7 */
MatrixAdd
(
c12
,
c21
,
c21
,
c_stride
,
c_stride
,
c_stride
,
row
,
col
);
/* U4 = U2 + P5 */
MatrixAdd
(
c12
,
c22
,
c12
,
c_stride
,
c_stride
,
c_stride
,
row
,
col
);
/* U7 = U3 + P5 */
MatrixAdd
(
c21
,
c22
,
c22
,
c_stride
,
c_stride
,
c_stride
,
row
,
col
);
/* U5 = U4 + P3 */
MatrixAdd
(
c12
,
c11
,
c12
,
c_stride
,
c_stride
,
c_stride
,
row
,
col
);
return
;
}
void
PostConvFuncComm
(
const
float
*
src_ptr_
,
float
*
out_ptr
,
const
float
*
bias_ptr
,
size_t
output_channel
,
size_t
plane_size
,
size_t
stride
,
bool
is_relu
,
bool
is_relu6
,
int
size
)
{
for
(
int
oc
=
0
;
oc
<
output_channel
;
oc
++
)
{
...
...
mindspore/lite/nnacl/fp32/common_func.h
浏览文件 @
36ebe588
...
...
@@ -31,12 +31,6 @@ void PostConvFuncFp32C4(const float *c4_out_ptr, float *out_ptr, const float *bi
size_t
plane_size
,
size_t
stride
,
bool
is_relu
,
bool
is_relu6
);
void
PostConvFuncFp32C8
(
const
float
*
c8_out_ptr
,
float
*
out_ptr
,
const
float
*
bias_ptr
,
size_t
output_channel
,
size_t
plane_size
,
size_t
stride
,
bool
is_relu
,
bool
is_relu6
);
void
MatrixAdd
(
const
float
*
a_ptr
,
const
float
*
b_ptr
,
float
*
dst
,
size_t
a_stride
,
size_t
b_stride
,
size_t
c_stride
,
size_t
row
,
size_t
col
);
void
MatrixSub
(
const
float
*
a_ptr
,
const
float
*
b_ptr
,
float
*
dst
,
size_t
a_stride
,
size_t
b_stride
,
size_t
c_stride
,
size_t
row
,
size_t
col
);
void
MatrixMultiAdd
(
float
*
c11
,
float
*
c12
,
float
*
c21
,
float
*
c22
,
float
*
x_ptr
,
size_t
row
,
size_t
col
,
size_t
c_stride
,
size_t
x_stride
);
float
ShortToFloat32
(
uint16_t
srcValue
);
uint16_t
Float32ToShort
(
float
srcValue
);
...
...
mindspore/lite/nnacl/fp32/pooling.c
浏览文件 @
36ebe588
此差异已折叠。
点击以展开。
mindspore/lite/nnacl/fp32/pooling.h
浏览文件 @
36ebe588
...
...
@@ -27,17 +27,10 @@
#ifdef __cplusplus
extern
"C"
{
#endif
void
AvgPooling
(
const
float
*
input_ptr
,
float
*
output_ptr
,
PoolingParameter
*
pooling_param
,
int
task_id
);
void
MaxPooling
(
const
float
*
input_ptr
,
float
*
output_ptr
,
PoolingParameter
*
pooling_param
,
int
task_id
);
void
AvgPoolingRelu
(
const
float
*
input_ptr
,
float
*
output_ptr
,
PoolingParameter
*
pooling_param
,
int
task_id
);
void
MaxPoolingRelu
(
const
float
*
input_ptr
,
float
*
output_ptr
,
PoolingParameter
*
pooling_param
,
int
task_id
);
void
AvgPoolingRelu6
(
const
float
*
input_ptr
,
float
*
output_ptr
,
PoolingParameter
*
pooling_param
,
int
task_id
);
void
MaxPoolingRelu6
(
const
float
*
input_ptr
,
float
*
output_ptr
,
PoolingParameter
*
pooling_param
,
int
task_id
);
void
AvgPooling
(
const
float
*
input_ptr
,
float
*
output_ptr
,
PoolingParameter
*
pooling_param
,
int
task_id
,
float
minf
,
float
maxf
);
void
MaxPooling
(
const
float
*
input_ptr
,
float
*
output_ptr
,
PoolingParameter
*
pooling_param
,
int
task_id
,
float
minf
,
float
maxf
);
#ifdef __cplusplus
}
#endif
...
...
mindspore/lite/src/runtime/kernel/arm/fp32/pooling.cc
浏览文件 @
36ebe588
...
...
@@ -15,6 +15,7 @@
*/
#include "src/runtime/kernel/arm/fp32/pooling.h"
#include <float.h>
#include "nnacl/fp32/pooling.h"
#include "src/kernel_registry.h"
#include "src/runtime/runtime_api.h"
...
...
@@ -52,28 +53,18 @@ int PoolingCPUKernel::ReSize() {
int
PoolingCPUKernel
::
RunImpl
(
int
task_id
)
{
auto
input_ptr
=
reinterpret_cast
<
float
*>
(
in_tensors_
.
at
(
kInputIndex
)
->
Data
());
auto
output_ptr
=
reinterpret_cast
<
float
*>
(
out_tensors_
.
at
(
kOutputIndex
)
->
Data
());
float
minf
=
-
FLT_MAX
;
float
maxf
=
FLT_MAX
;
if
(
pooling_param_
->
act_type_
==
ActType_Relu
)
{
minf
=
0.
f
;
}
else
if
(
pooling_param_
->
act_type_
==
ActType_Relu6
)
{
minf
=
0.
f
;
maxf
=
6.
f
;
}
if
(
pooling_param_
->
pool_mode_
==
PoolMode_MaxPool
)
{
switch
(
pooling_param_
->
act_type_
)
{
case
ActType_Relu
:
MaxPoolingRelu
(
input_ptr
,
output_ptr
,
pooling_param_
,
task_id
);
break
;
case
ActType_Relu6
:
MaxPoolingRelu6
(
input_ptr
,
output_ptr
,
pooling_param_
,
task_id
);
break
;
default:
MaxPooling
(
input_ptr
,
output_ptr
,
pooling_param_
,
task_id
);
}
MaxPooling
(
input_ptr
,
output_ptr
,
pooling_param_
,
task_id
,
minf
,
maxf
);
}
else
{
switch
(
pooling_param_
->
act_type_
)
{
case
ActType_Relu
:
AvgPoolingRelu
(
input_ptr
,
output_ptr
,
pooling_param_
,
task_id
);
break
;
case
ActType_Relu6
:
AvgPoolingRelu6
(
input_ptr
,
output_ptr
,
pooling_param_
,
task_id
);
break
;
default:
AvgPooling
(
input_ptr
,
output_ptr
,
pooling_param_
,
task_id
);
}
AvgPooling
(
input_ptr
,
output_ptr
,
pooling_param_
,
task_id
,
minf
,
maxf
);
}
return
RET_OK
;
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录