Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
2b4b4d66
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
2b4b4d66
编写于
6月 12, 2020
作者:
M
Megvii Engine Team
提交者:
Xu Xinran
6月 19, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(dnn/fallback): add aarch64 mk4 dot 3x3 s1 fuse packb
GitOrigin-RevId: 3e69878d8d349d3cd21d828a3029aa7e1c61a294
上级
a1677d7a
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
88 addition
and
1 deletion
+88
-1
dnn/src/fallback/conv_bias/im2col/factory.h
dnn/src/fallback/conv_bias/im2col/factory.h
+1
-1
dnn/src/fallback/conv_bias/im2col/strategy_fuse_nchw44_dot.cpp
...rc/fallback/conv_bias/im2col/strategy_fuse_nchw44_dot.cpp
+15
-0
dnn/test/arm_common/conv_bias_multi_thread.cpp
dnn/test/arm_common/conv_bias_multi_thread.cpp
+19
-0
dnn/test/arm_common/conv_bias_multi_thread_benchmark.cpp
dnn/test/arm_common/conv_bias_multi_thread_benchmark.cpp
+53
-0
未找到文件。
dnn/src/fallback/conv_bias/im2col/factory.h
浏览文件 @
2b4b4d66
...
@@ -450,7 +450,7 @@ Strategy* StrategyDelegationStorage::get(
...
@@ -450,7 +450,7 @@ Strategy* StrategyDelegationStorage::get(
sparam
.
kernel
=
param
.
filter_meta
.
spatial
[
0
];
sparam
.
kernel
=
param
.
filter_meta
.
spatial
[
0
];
sparam
.
stride
=
param
.
filter_meta
.
stride
[
0
];
sparam
.
stride
=
param
.
filter_meta
.
stride
[
0
];
sparam
.
is_square
=
sparam
.
is_square
=
param
.
filter_meta
.
spatial
[
0
]
==
param
.
filter_meta
.
spatial
[
0
];
param
.
filter_meta
.
spatial
[
0
]
==
param
.
filter_meta
.
spatial
[
1
];
sparam
.
is_xcorr
=
param
.
filter_meta
.
should_flip
;
sparam
.
is_xcorr
=
param
.
filter_meta
.
should_flip
;
MEGDNN_LOCK_GUARD
(
m_mtx
);
MEGDNN_LOCK_GUARD
(
m_mtx
);
if
(
map_strategys
.
find
(
sparam
)
==
map_strategys
.
end
())
{
if
(
map_strategys
.
find
(
sparam
)
==
map_strategys
.
end
())
{
...
...
dnn/src/fallback/conv_bias/im2col/strategy_fuse_nchw44_dot.cpp
0 → 100644
浏览文件 @
2b4b4d66
/**
* \file dnn/src/fallback/conv_bias/im2col/strategy_fuse_nchw44_dot.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "src/fallback/conv_bias/im2col/strategy_base.h"
// vim: syntax=cpp.doxygen
dnn/test/arm_common/conv_bias_multi_thread.cpp
浏览文件 @
2b4b4d66
...
@@ -1764,7 +1764,26 @@ TEST_F(ARM_COMMON_MULTI_THREADS,
...
@@ -1764,7 +1764,26 @@ TEST_F(ARM_COMMON_MULTI_THREADS,
#undef cb
#undef cb
}
}
#endif
#endif
#endif
#endif
#if MEGDNN_AARCH64
#if __ARM_FEATURE_DOTPROD
TEST_F
(
ARM_COMMON_MULTI_THREADS
,
CONV_BIAS_IM2COLMATMUL_QUANTIZEDSYM_NCHW44DOT_FUSE
)
{
UniformIntRNG
rng
{
-
50
,
50
};
#define cb(name) \
checker_conv_bias( \
get_nchw44_conv_bias_args({3}, 1, false, false, false, false, \
true, false, false, false), \
handle(), &rng, epsilon, dtype::QuantizedS8(2.5f), \
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), \
dtype::QuantizedS8(60.25f), name);
float
epsilon
=
0.001
;
cb
(
"IM2COLMATMUL:AARCH64_INT8X8X32_MK4_8X12X4_DOTPROD:96"
);
#undef cb
}
#endif
#endif
#endif
#endif
...
...
dnn/test/arm_common/conv_bias_multi_thread_benchmark.cpp
浏览文件 @
2b4b4d66
...
@@ -655,6 +655,59 @@ TEST_F(ARM_COMMON_BENCHMARK_MULTI_THREADS, BENCHMARK_CONVBIAS_INT8_NCHW44) {
...
@@ -655,6 +655,59 @@ TEST_F(ARM_COMMON_BENCHMARK_MULTI_THREADS, BENCHMARK_CONVBIAS_INT8_NCHW44) {
bench_case
(
1
,
512
,
256
,
28
,
28
,
3
,
4
,
1
,
2
);
bench_case
(
1
,
512
,
256
,
28
,
28
,
3
,
4
,
1
,
2
);
}
}
TEST_F
(
ARM_COMMON_BENCHMARK_MULTI_THREADS
,
BENCHMARK_CONVBIAS_INT8_NCHW44_DOT
)
{
constexpr
size_t
RUNS
=
40
;
std
::
vector
<
DType
>
data_type
=
{
dtype
::
QuantizedS8
(
2.5
f
),
dtype
::
QuantizedS8
(
2.5
f
),
dtype
::
QuantizedS32
(
6.25
f
),
dtype
::
QuantizedS8
(
60.25
f
)};
auto
bench_case
=
[
&
](
size_t
N
,
size_t
IC
,
size_t
OC
,
size_t
H
,
size_t
W
,
size_t
FS
,
size_t
group
,
size_t
P
,
size_t
S
,
bool
is_nchw
=
false
)
{
param
::
ConvBias
param
;
param
.
nonlineMode
=
param
::
ConvBias
::
NonlineMode
::
RELU
;
param
.
pad_h
=
P
;
param
.
pad_w
=
P
;
param
.
stride_h
=
S
;
param
.
stride_w
=
S
;
param
.
sparse
=
param
::
ConvBias
::
Sparse
::
DENSE
;
param
.
format
=
param
::
ConvBias
::
Format
::
NCHW44_DOT
;
auto
OH
=
(
H
+
2
*
P
-
FS
)
/
static_cast
<
size_t
>
(
S
)
+
1
;
auto
OW
=
(
W
+
2
*
P
-
FS
)
/
static_cast
<
size_t
>
(
S
)
+
1
;
TensorShape
src
=
{
N
,
IC
/
4
,
H
,
W
,
4
};
TensorShape
filter
=
{
OC
/
4
,
IC
/
4
,
FS
,
FS
,
4
,
4
};
if
(
group
>
1
)
{
filter
=
{
group
,
OC
/
group
/
4
,
IC
/
group
/
4
,
FS
,
FS
,
4
,
4
};
param
.
sparse
=
param
::
ConvBias
::
Sparse
::
GROUP
;
}
if
(
is_nchw
)
{
src
=
{
N
,
IC
,
H
,
W
};
filter
=
{
OC
/
4
,
FS
,
FS
,
IC
,
4
};
}
TensorShape
bias
=
{
1
,
OC
/
4
,
1
,
1
,
4
};
TensorShape
dst
=
{
N
,
OC
/
4
,
OH
,
OW
,
4
};
SmallVector
<
TensorShape
>
shapes
{
src
,
filter
,
bias
,
{},
dst
};
float
computations
=
(((
IC
/
group
)
*
FS
*
FS
+
1
)
*
dst
.
total_nr_elems
()
*
2
+
dst
.
total_nr_elems
())
*
1e-6
;
std
::
vector
<
std
::
pair
<
SmallVector
<
TensorShape
>
,
float
>>
shape_arg
=
{
std
::
make_pair
(
shapes
,
computations
)};
benchmark_impl
(
param
,
shape_arg
,
".+"
,
RUNS
,
{
4
,
{
4
,
5
,
6
,
7
}},
{
1
,
{
7
}},
data_type
);
};
bench_case
(
1
,
64
,
64
,
56
,
56
,
3
,
1
,
1
,
1
);
bench_case
(
1
,
128
,
128
,
28
,
28
,
3
,
1
,
1
,
1
);
bench_case
(
1
,
256
,
256
,
14
,
14
,
3
,
1
,
1
,
1
);
bench_case
(
1
,
512
,
512
,
7
,
7
,
3
,
1
,
1
,
1
);
bench_case
(
1
,
64
,
64
,
56
,
56
,
3
,
4
,
1
,
1
);
bench_case
(
1
,
128
,
128
,
28
,
28
,
3
,
4
,
1
,
1
);
bench_case
(
1
,
256
,
256
,
14
,
14
,
3
,
4
,
1
,
1
);
bench_case
(
1
,
512
,
512
,
7
,
7
,
3
,
4
,
1
,
1
);
}
TEST_F
(
ARM_COMMON_BENCHMARK_MULTI_THREADS
,
TEST_F
(
ARM_COMMON_BENCHMARK_MULTI_THREADS
,
BENCHMARK_CONVBIAS_INT8_INT8_INT8_STRIDE2
)
{
BENCHMARK_CONVBIAS_INT8_INT8_INT8_STRIDE2
)
{
constexpr
size_t
RUNS
=
50
;
constexpr
size_t
RUNS
=
50
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录