Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
3afa3893
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
3afa3893
编写于
7月 27, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
perf(arm_common): optimize arm common pooling 9x9 and 13x13
GitOrigin-RevId: 33d5a624784a5dde61b6c9cfe461297a0f2950fe
上级
d16c5caf
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
53 addition
and
8 deletion
+53
-8
dnn/src/arm_common/intrinsic_helper.h
dnn/src/arm_common/intrinsic_helper.h
+2
-1
dnn/src/arm_common/pooling/algo_fp32_pooling_nchw44.cpp
dnn/src/arm_common/pooling/algo_fp32_pooling_nchw44.cpp
+22
-5
dnn/src/arm_common/pooling/kern_fp32_pooling_nchw44.h
dnn/src/arm_common/pooling/kern_fp32_pooling_nchw44.h
+3
-1
dnn/test/arm_common/pooling_multi_thread.cpp
dnn/test/arm_common/pooling_multi_thread.cpp
+25
-0
scripts/cmake-build/cross_build_android_arm_inference.sh
scripts/cmake-build/cross_build_android_arm_inference.sh
+1
-1
未找到文件。
dnn/src/arm_common/intrinsic_helper.h
浏览文件 @
3afa3893
...
...
@@ -124,4 +124,5 @@ __ai void load_helper_x(T& weight, T2 ptr, int oc_offset, XT... args) {
}
// namespace
}
// namespace megdnn
#undef __ai
// vim: syntax=cpp.doxygen
dnn/src/arm_common/pooling/algo_fp32_pooling_nchw44.cpp
浏览文件 @
3afa3893
...
...
@@ -30,10 +30,12 @@ bool PoolingImpl::AlgoFp32ModexStridexNCHW44::usable(
bool
avaible
=
param
.
src_type
.
enumv
()
==
DTypeEnum
::
Float32
&&
param
.
format
==
Param
::
Format
::
NCHW44
&&
(
param
.
mode
==
Mode
::
MAX
||
param
.
mode
==
Mode
::
AVERAGE
)
&&
fh
==
fw
&&
sh
==
sw
&&
(
fh
==
2
||
fh
==
3
||
fh
==
4
||
fh
==
5
)
&&
(
sh
==
1
||
sh
==
2
);
return
avaible
;
fh
==
fw
&&
sh
==
sw
;
bool
size_ok
=
((
fh
==
2
||
fh
==
3
||
fh
==
4
||
fh
==
5
)
&&
(
sh
==
1
||
sh
==
2
));
size_ok
|=
((
fh
==
9
||
fh
==
13
)
&&
(
sh
==
1
));
return
avaible
&&
size_ok
;
}
void
PoolingImpl
::
AlgoFp32ModexStridexNCHW44
::
exec
(
...
...
@@ -94,6 +96,15 @@ void PoolingImpl::AlgoFp32ModexStridexNCHW44::exec(
megdnn_assert(0, "invalid stride %d", sh); \
}
#define DISPATCH_STRIDE_1(filter) \
switch (sh) { \
case 1: \
DISPATCH_MODE(filter, 1); \
break; \
default: \
megdnn_assert(0, "invalid stride %d", sh); \
}
#define DISPATCH_FILTER() \
switch (fh) { \
case 2: \
...
...
@@ -108,6 +119,12 @@ void PoolingImpl::AlgoFp32ModexStridexNCHW44::exec(
case 5: \
DISPATCH_STRIDE(5); \
break; \
case 9: \
DISPATCH_STRIDE_1(9); \
break; \
case 13: \
DISPATCH_STRIDE_1(13); \
break; \
default: \
megdnn_assert(0, "invalid filter %d", fh); \
}
...
...
dnn/src/arm_common/pooling/kern_fp32_pooling_nchw44.h
浏览文件 @
3afa3893
...
...
@@ -64,6 +64,8 @@ INSTANCE_CAL(2)
INSTANCE_CAL
(
3
)
INSTANCE_CAL
(
4
)
INSTANCE_CAL
(
5
)
INSTANCE_CAL
(
9
)
INSTANCE_CAL
(
13
)
#undef INSTANCE_CAL
#undef CALCULATE_AVG_CB
...
...
dnn/test/arm_common/pooling_multi_thread.cpp
浏览文件 @
3afa3893
...
...
@@ -116,6 +116,31 @@ TEST_F(ARM_COMMON_MULTI_THREADS, POOLING_NCHW44_FP32) {
}
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
POOLING_W9_w13_NCHW44
)
{
UniformIntRNG
rng
{
-
10
,
10
};
Checker
<
Pooling
>
checker
(
handle
());
checker
.
set_rng
(
0
,
&
rng
);
// clang-format off
for
(
size_t
ih
:
{
20
,
15
})
for
(
size_t
iw
:
{
15
,
20
})
for
(
size_t
kernel
:
{
9
,
13
})
for
(
size_t
pad
:
{
4
,
6
})
for
(
auto
mode
:
{
param
::
Pooling
::
Mode
::
MAX
,
param
::
Pooling
::
Mode
::
AVERAGE
})
if
(
kernel
>
pad
)
{
param
::
Pooling
param
;
param
.
mode
=
mode
;
param
.
format
=
param
::
Pooling
::
Format
::
NCHW44
;
param
.
pad_h
=
pad
;
param
.
pad_w
=
pad
;
param
.
stride_h
=
param
.
stride_w
=
1
;
param
.
window_h
=
param
.
window_w
=
kernel
;
checker
.
set_param
(
param
).
exec
(
TensorShapeArray
{{
2
,
8
,
ih
,
iw
,
4
},
{}});
}
// clang-format on
}
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
POOLING_W3x3_NCHW44
)
{
UniformIntRNG
rng
{
INT8_MIN
>>
1
,
INT8_MAX
>>
1
};
...
...
scripts/cmake-build/cross_build_android_arm_inference.sh
浏览文件 @
3afa3893
...
...
@@ -2,7 +2,7 @@
set
-e
ARCHS
=(
"arm64-v8a"
"armeabi-v7a"
)
BUILD_TYPE
=
Rel
ease
BUILD_TYPE
=
Rel
WithDebInfo
MGE_ARMV8_2_FEATURE_FP16
=
OFF
MGE_DISABLE_FLOAT16
=
OFF
ARCH
=
arm64-v8a
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录