Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
d7fbfee1
Mace
项目概览
Xiaomi
/
Mace
通知
107
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
d7fbfee1
编写于
9月 03, 2018
作者:
B
Bin Li
提交者:
李寅
9月 10, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add sgemm benchmark
上级
285604f7
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
76 addition
and
10 deletion
+76
-10
mace/kernels/matmul_benchmark.cc
mace/kernels/matmul_benchmark.cc
+54
-4
mace/kernels/sgemm.cc
mace/kernels/sgemm.cc
+8
-5
mace/kernels/sgemm.h
mace/kernels/sgemm.h
+1
-1
mace/kernels/sgemm_test.cc
mace/kernels/sgemm_test.cc
+13
-0
未找到文件。
mace/kernels/matmul_benchmark.cc
浏览文件 @
d7fbfee1
...
...
@@ -22,6 +22,7 @@
#include "mace/core/testing/test_benchmark.h"
#include "mace/kernels/gemm.h"
#include "mace/kernels/gemmlowp_util.h"
#include "mace/kernels/sgemm.h"
namespace
gemmlowp
{
...
...
@@ -107,6 +108,26 @@ void MatmulBenchmark_Mace(int iters, int m, int k, int n) {
}
}
void
MatmulBenchmark_Mace_SGemm
(
int
iters
,
int
m
,
int
k
,
int
n
)
{
mace
::
testing
::
StopTiming
();
std
::
vector
<
float
>
lhs
(
m
*
k
);
std
::
vector
<
float
>
rhs
(
k
*
n
);
std
::
vector
<
float
>
result
(
m
*
n
);
kernels
::
MatrixMap
<
const
float
>
matrix_lhs
(
m
,
k
,
RowMajor
,
lhs
.
data
(),
true
);
kernels
::
MatrixMap
<
const
float
>
matrix_rhs
(
k
,
n
,
RowMajor
,
rhs
.
data
(),
true
);
kernels
::
MatrixMap
<
float
>
matrix_result
(
m
,
n
,
RowMajor
,
result
.
data
());
kernels
::
SGemm
sgemm
;
sgemm
(
matrix_lhs
,
matrix_rhs
,
&
matrix_result
);
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
sgemm
(
matrix_lhs
,
matrix_rhs
,
&
matrix_result
);
}
}
void
MatmulBenchmark_Eigen
(
int
iters
,
int
m
,
int
k
,
int
n
)
{
mace
::
testing
::
StopTiming
();
Eigen
::
MatrixXf
lhs
=
Eigen
::
MatrixXf
::
Random
(
m
,
k
);
...
...
@@ -202,6 +223,7 @@ void MatmulBenchmark_gemmlowp_int32(int iters, int rows, int depth, int cols) {
#define MACE_BM_MATMUL(M, K, N) \
MACE_BM_MATMUL_FUNC(M, K, N, Mace, float); \
MACE_BM_MATMUL_FUNC(M, K, N, Mace_SGemm, float); \
MACE_BM_MATMUL_FUNC(M, K, N, Eigen, float); \
MACE_BM_MATMUL_FUNC(M, K, N, gemmlowp_uint8, uint8_t); \
MACE_BM_MATMUL_FUNC(M, K, N, gemmlowp_int32, uint8_t);
...
...
@@ -215,15 +237,43 @@ MACE_BM_MATMUL(15, 384, 384);
MACE_BM_MATMUL
(
15
,
384
,
1536
);
MACE_BM_MATMUL
(
15
,
1536
,
384
);
MACE_BM_MATMUL
(
1
,
384
,
384
);
MACE_BM_MATMUL
(
1
,
384
,
1536
);
MACE_BM_MATMUL
(
1
,
1536
,
384
);
MACE_BM_MATMUL
(
1
,
384
,
44678
);
MACE_BM_MATMUL
(
1
,
256
,
256
);
MACE_BM_MATMUL
(
1
,
256
,
1536
);
MACE_BM_MATMUL
(
1
,
1536
,
256
);
MACE_BM_MATMUL
(
256
,
256
,
1
);
MACE_BM_MATMUL
(
1536
,
256
,
1
);
MACE_BM_MATMUL
(
256
,
1536
,
1
);
MACE_BM_MATMUL
(
29792
,
256
,
1
);
MACE_BM_MATMUL
(
1
,
256
,
29792
);
MACE_BM_MATMUL
(
2
,
256
,
256
);
MACE_BM_MATMUL
(
2
,
256
,
1536
);
MACE_BM_MATMUL
(
2
,
1536
,
256
);
MACE_BM_MATMUL
(
3
,
256
,
256
);
MACE_BM_MATMUL
(
3
,
256
,
1536
);
MACE_BM_MATMUL
(
3
,
1536
,
256
);
MACE_BM_MATMUL
(
4
,
256
,
256
);
MACE_BM_MATMUL
(
4
,
256
,
1536
);
MACE_BM_MATMUL
(
4
,
1536
,
256
);
MACE_BM_MATMUL
(
8
,
256
,
256
);
MACE_BM_MATMUL
(
8
,
256
,
1536
);
MACE_BM_MATMUL
(
8
,
1536
,
256
);
MACE_BM_MATMUL
(
10
,
256
,
256
);
MACE_BM_MATMUL
(
10
,
256
,
1536
);
MACE_BM_MATMUL
(
10
,
1536
,
256
);
MACE_BM_MATMUL
(
15
,
256
,
256
);
MACE_BM_MATMUL
(
15
,
256
,
1536
);
MACE_BM_MATMUL
(
15
,
1536
,
256
);
// Embedding size 128
MACE_BM_MATMUL
(
1
,
128
,
1536
);
MACE_BM_MATMUL
(
1
,
128
,
44678
);
// MobileNet
MACE_BM_MATMUL
(
128
,
128
,
3136
);
MACE_BM_MATMUL
(
256
,
256
,
784
);
MACE_BM_MATMUL
(
512
,
512
,
196
);
MACE_BM_MATMUL
(
1024
,
1024
,
49
);
}
// namespace test
}
// namespace kernels
}
// namespace mace
mace/kernels/sgemm.cc
浏览文件 @
d7fbfee1
...
...
@@ -505,12 +505,12 @@ void SGemm::operator()(const PackedBlock<float> &lhs,
#pragma omp parallel for
for
(
index_t
bw
=
0
;
bw
<
remain_w
;
++
bw
)
{
index_t
remain_h
=
height
;
index_t
block_h
=
0
;
const
float
*
lhs_ptr
=
lhs_data
;
float
*
res_ptr
=
result_data
+
height
*
bw
;
#if defined(MACE_ENABLE_NEON)
index_t
block_h
=
0
;
#if defined(__aarch64__)
block_h
=
remain_h
>>
3
;
remain_h
-=
(
block_h
<<
3
);
...
...
@@ -555,12 +555,13 @@ void SGemm::operator()(const PackedBlock<float> &lhs,
for
(
index_t
d
=
0
;
d
<
remain_d
;
++
d
)
{
// 8.1.1
float32x4_t
b0
,
b1
;
float32x4_t
a0
=
vdupq_n_f32
(
rhs_ptr
[
0
]);
b0
=
vld1q_f32
(
lhs_ptr
);
b1
=
vld1q_f32
(
lhs_ptr
+
4
);
c0
+=
b0
*
rhs_ptr
[
0
]
;
c1
+=
b1
*
rhs_ptr
[
0
]
;
c0
=
vfmaq_laneq_f32
(
c0
,
b0
,
a0
,
0
)
;
c1
=
vfmaq_laneq_f32
(
c1
,
b1
,
a0
,
0
)
;
lhs_ptr
+=
8
;
rhs_ptr
+=
1
;
...
...
@@ -611,10 +612,11 @@ void SGemm::operator()(const PackedBlock<float> &lhs,
for
(
index_t
d
=
0
;
d
<
remain_d
;
++
d
)
{
// 4.1.1
float32x4_t
b0
,
b1
;
float32x2_t
a0
=
vdup_n_f32
(
rhs_ptr
[
0
]);
b0
=
vld1q_f32
(
lhs_ptr
);
c0
+=
b0
*
rhs_ptr
[
0
]
;
c0
=
vmlaq_lane_f32
(
c0
,
b0
,
a0
,
0
)
;
lhs_ptr
+=
4
;
rhs_ptr
+=
1
;
...
...
@@ -631,11 +633,12 @@ void SGemm::operator()(const PackedBlock<float> &lhs,
const
float
*
rhs_ptr
=
rhs_data
+
depth
*
bw
;
index_t
remain_d
=
depth
;
index_t
block_d
=
0
;
float
sum
=
0.
f
;
#if defined(MACE_ENABLE_NEON)
index_t
block_d
=
0
;
float32x4_t
c0
;
c0
=
vdupq_n_f32
(
0.
f
);
...
...
mace/kernels/sgemm.h
浏览文件 @
d7fbfee1
...
...
@@ -53,7 +53,7 @@ class MatrixMap {
MatrixMap
transpose
()
const
{
Major
transpose_major
=
major_
==
RowMajor
?
ColMajor
:
RowMajor
;
return
MatrixMap
(
col_
,
row_
,
transpose_major
,
data_
);
return
MatrixMap
(
col_
,
row_
,
transpose_major
,
data_
,
is_const_
);
}
index_t
row
()
const
{
...
...
mace/kernels/sgemm_test.cc
浏览文件 @
d7fbfee1
...
...
@@ -82,10 +82,12 @@ TEST(SGemmTest, Pack) {
std
::
vector
<
float
>
data
=
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
,
32
,
33
,
34
,
35
,
36
};
// For no-transpose lhs
TestPack
(
data
,
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
},
3
,
4
,
Major
::
RowMajor
,
PackOrder
::
ColMajor
);
#if defined(MACE_ENABLE_NEON)
TestPack
(
data
,
{
1
,
5
,
9
,
13
,
2
,
6
,
10
,
14
,
3
,
7
,
11
,
15
,
4
,
8
,
12
,
16
},
4
,
4
,
Major
::
RowMajor
,
PackOrder
::
ColMajor
);
...
...
@@ -93,14 +95,18 @@ TEST(SGemmTest, Pack) {
{
1
,
5
,
9
,
13
,
2
,
6
,
10
,
14
,
3
,
7
,
11
,
15
,
4
,
8
,
12
,
16
,
17
,
18
,
19
,
20
},
5
,
4
,
Major
::
RowMajor
,
PackOrder
::
ColMajor
);
#if defined(__aarch64__)
TestPack
(
data
,
{
1
,
5
,
9
,
13
,
17
,
21
,
25
,
29
,
2
,
6
,
10
,
14
,
18
,
22
,
26
,
30
,
3
,
7
,
11
,
15
,
19
,
23
,
27
,
31
,
4
,
8
,
12
,
16
,
20
,
24
,
28
,
32
,
33
,
34
,
35
,
36
},
9
,
4
,
Major
::
RowMajor
,
PackOrder
::
ColMajor
);
#endif
#endif
// For transpose-needed lhs
TestPack
(
data
,
{
1
,
4
,
7
,
10
,
2
,
5
,
8
,
11
,
3
,
6
,
9
,
12
},
3
,
4
,
Major
::
ColMajor
,
PackOrder
::
ColMajor
);
#if defined(MACE_ENABLE_NEON)
TestPack
(
data
,
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
},
4
,
4
,
Major
::
ColMajor
,
PackOrder
::
ColMajor
);
...
...
@@ -108,14 +114,18 @@ TEST(SGemmTest, Pack) {
{
1
,
2
,
3
,
4
,
6
,
7
,
8
,
9
,
11
,
12
,
13
,
14
,
16
,
17
,
18
,
19
,
5
,
10
,
15
,
20
},
5
,
4
,
Major
::
ColMajor
,
PackOrder
::
ColMajor
);
#if defined(__aarch64__)
TestPack
(
data
,
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
28
,
29
,
30
,
31
,
32
,
33
,
34
,
35
,
9
,
18
,
27
,
36
},
9
,
4
,
Major
::
ColMajor
,
PackOrder
::
ColMajor
);
#endif
#endif
// For no-transpose rhs
TestPack
(
data
,
{
1
,
4
,
7
,
10
,
2
,
5
,
8
,
11
,
3
,
6
,
9
,
12
},
4
,
3
,
Major
::
RowMajor
,
PackOrder
::
RowMajor
);
#if defined(MACE_ENABLE_NEON)
TestPack
(
data
,
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
},
4
,
4
,
Major
::
RowMajor
,
PackOrder
::
RowMajor
);
...
...
@@ -123,10 +133,12 @@ TEST(SGemmTest, Pack) {
{
1
,
2
,
3
,
4
,
6
,
7
,
8
,
9
,
11
,
12
,
13
,
14
,
16
,
17
,
18
,
19
,
5
,
10
,
15
,
20
},
4
,
5
,
Major
::
RowMajor
,
PackOrder
::
RowMajor
);
#endif
// For transpose-needed rhs
TestPack
(
data
,
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
},
4
,
3
,
Major
::
ColMajor
,
PackOrder
::
RowMajor
);
#if defined(MACE_ENABLE_NEON)
TestPack
(
data
,
{
1
,
5
,
9
,
13
,
2
,
6
,
10
,
14
,
3
,
7
,
11
,
15
,
4
,
8
,
12
,
16
},
4
,
4
,
Major
::
ColMajor
,
PackOrder
::
RowMajor
);
...
...
@@ -134,6 +146,7 @@ TEST(SGemmTest, Pack) {
{
1
,
5
,
9
,
13
,
2
,
6
,
10
,
14
,
3
,
7
,
11
,
15
,
4
,
8
,
12
,
16
,
17
,
18
,
19
,
20
},
4
,
5
,
Major
::
ColMajor
,
PackOrder
::
RowMajor
);
#endif
}
TEST
(
SGemmTest
,
UnPack
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录