Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
86ef00bb
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
86ef00bb
编写于
7月 25, 2018
作者:
李
李寅
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'gemmlowp' into 'master'
add gemmlowp benchmark See merge request !680
上级
c2b06975
aa0ecaa2
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
146 addition
and
9 deletion
+146
-9
WORKSPACE
WORKSPACE
+3
-4
mace/kernels/BUILD
mace/kernels/BUILD
+4
-2
mace/kernels/matmul_benchmark.cc
mace/kernels/matmul_benchmark.cc
+139
-3
未找到文件。
WORKSPACE
浏览文件 @
86ef00bb
...
...
@@ -78,11 +78,10 @@ new_http_archive(
http_archive
(
name
=
"gemmlowp"
,
sha256
=
"
b87faa7294dfcc5d678f22a59d2c01ca94ea1e2a3b488c38a95a67889ed0a658
"
,
strip_prefix
=
"gemmlowp-
38ebac7b059e84692f53e5938f97a9943c120d98
"
,
sha256
=
"
5941b50afb7f43f96a2afaa101e024b5a1c6b0b4e4f110688fefa083bbdd652d
"
,
strip_prefix
=
"gemmlowp-
master-3559cf6e2a21a15b5bd8133bb632da6050aa8b8d
"
,
urls
=
[
"http://cnbj1.fds.api.xiaomi.com/mace/third-party/gemmlowp/38ebac7b059e84692f53e5938f97a9943c120d98.zip"
,
"https://github.com/google/gemmlowp/archive/38ebac7b059e84692f53e5938f97a9943c120d98.zip"
,
"https://cnbj1.fds.api.xiaomi.com/mace/third-party/gemmlowp/gemmlowp-master-3559cf6e2a21a15b5bd8133bb632da6050aa8b8d.zip"
,
],
)
...
...
mace/kernels/BUILD
浏览文件 @
86ef00bb
...
...
@@ -70,6 +70,7 @@ cc_library(
deps
=
[
"//mace/core"
,
"//mace/utils"
,
"@gemmlowp"
,
],
)
...
...
@@ -104,7 +105,7 @@ cc_test(
deps
=
[
":kernels"
,
"//mace/ops"
,
"@gtest
//:gtest
"
,
"@gtest"
,
"@gtest//:gtest_main"
,
],
)
...
...
@@ -133,8 +134,9 @@ cc_test(
linkstatic
=
1
,
deps
=
[
":kernels"
,
"//mace/ops"
,
"//mace/core:test_benchmark_main"
,
"//mace/ops"
,
"//third_party/eigen3"
,
"@gemmlowp"
,
],
)
mace/kernels/matmul_benchmark.cc
浏览文件 @
86ef00bb
...
...
@@ -15,10 +15,74 @@
#include <Eigen/Dense>
#include <algorithm>
#include <string>
#include <tuple>
#include <vector>
#include "mace/core/testing/test_benchmark.h"
#include "mace/kernels/gemm.h"
#include "public/gemmlowp.h"
namespace
gemmlowp
{
template
<
typename
tScalar
,
MapOrder
tOrder
>
class
Matrix
:
public
MatrixMap
<
tScalar
,
tOrder
>
{
public:
typedef
MatrixMap
<
tScalar
,
tOrder
>
Map
;
typedef
MatrixMap
<
const
tScalar
,
tOrder
>
ConstMap
;
typedef
typename
Map
::
Scalar
Scalar
;
static
const
MapOrder
Order
=
tOrder
;
using
Map
::
cols_
;
using
Map
::
data_
;
using
Map
::
kOrder
;
using
Map
::
rows_
;
using
Map
::
stride_
;
public:
Matrix
()
:
Map
(
nullptr
,
0
,
0
,
0
)
{}
Matrix
(
int
rows
,
int
cols
)
:
Map
(
nullptr
,
0
,
0
,
0
)
{
Resize
(
rows
,
cols
);
}
Matrix
(
const
Matrix
&
other
)
:
Map
(
nullptr
,
0
,
0
,
0
)
{
*
this
=
other
;
}
Matrix
&
operator
=
(
const
Matrix
&
other
)
{
Resize
(
other
.
rows_
,
other
.
cols_
);
std
::
memcpy
(
data_
,
other
.
data_
,
size
()
*
sizeof
(
Scalar
));
return
*
this
;
}
friend
bool
operator
==
(
const
Matrix
&
a
,
const
Matrix
&
b
)
{
return
a
.
rows_
==
b
.
rows_
&&
a
.
cols_
==
b
.
cols_
&&
!
std
::
memcmp
(
a
.
data_
,
b
.
data_
,
a
.
size
());
}
void
Resize
(
int
rows
,
int
cols
)
{
rows_
=
rows
;
cols_
=
cols
;
stride_
=
kOrder
==
gemmlowp
::
MapOrder
::
ColMajor
?
rows
:
cols
;
storage
.
resize
(
size
());
data_
=
storage
.
data
();
}
int
size
()
const
{
return
rows_
*
cols_
;
}
Map
&
map
()
{
return
*
static_cast
<
Map
*>
(
this
);
}
ConstMap
const_map
()
const
{
return
ConstMap
(
data_
,
rows_
,
cols_
,
stride_
);
}
protected:
std
::
vector
<
Scalar
>
storage
;
};
template
<
typename
MatrixType
>
void
MakeZero
(
MatrixType
*
m
)
{
for
(
int
c
=
0
;
c
<
m
->
cols
();
c
++
)
{
for
(
int
r
=
0
;
r
<
m
->
rows
();
r
++
)
{
(
*
m
)(
r
,
c
)
=
128
;
}
}
}
}
// namespace gemmlowp
namespace
mace
{
namespace
kernels
{
...
...
@@ -55,6 +119,76 @@ void MatmulBenchmark_Eigen(int iters, int m, int k, int n) {
}
}
void
MatmulBenchmark_gemmlowp_uint8
(
int
iters
,
int
rows
,
int
depth
,
int
cols
)
{
mace
::
testing
::
StopTiming
();
gemmlowp
::
Matrix
<
std
::
uint8_t
,
gemmlowp
::
MapOrder
::
RowMajor
>
lhs
;
gemmlowp
::
Matrix
<
std
::
uint8_t
,
gemmlowp
::
MapOrder
::
ColMajor
>
rhs
;
gemmlowp
::
Matrix
<
std
::
uint8_t
,
gemmlowp
::
MapOrder
::
ColMajor
>
result
;
lhs
.
Resize
(
rows
,
depth
);
rhs
.
Resize
(
depth
,
cols
);
result
.
Resize
(
rows
,
cols
);
gemmlowp
::
MakeZero
(
&
lhs
);
gemmlowp
::
MakeZero
(
&
rhs
);
gemmlowp
::
MakeZero
(
&
result
);
gemmlowp
::
OutputStageQuantizeDownInt32ByFixedPoint
quantize_down_stage
;
quantize_down_stage
.
result_offset_after_shift
=
128
;
quantize_down_stage
.
result_fixedpoint_multiplier
=
1234567890
;
quantize_down_stage
.
result_shift
=
16
;
gemmlowp
::
OutputStageSaturatingCastToUint8
saturating_cast_stage
;
const
auto
output_pipeline
=
std
::
make_tuple
(
quantize_down_stage
,
saturating_cast_stage
);
gemmlowp
::
GemmContext
gemm_context
;
gemm_context
.
set_max_num_threads
(
4
);
using
BitDepthParams
=
gemmlowp
::
L8R8WithLhsNonzeroBitDepthParams
;
gemmlowp
::
GemmWithOutputPipeline
<
std
::
uint8_t
,
std
::
uint8_t
,
BitDepthParams
>
(
&
gemm_context
,
lhs
.
const_map
(),
rhs
.
const_map
(),
&
result
.
map
(),
-
128
,
-
128
,
output_pipeline
);
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
gemmlowp
::
GemmWithOutputPipeline
<
std
::
uint8_t
,
std
::
uint8_t
,
BitDepthParams
>
(
&
gemm_context
,
lhs
.
const_map
(),
rhs
.
const_map
(),
&
result
.
map
(),
-
128
,
-
128
,
output_pipeline
);
}
}
void
MatmulBenchmark_gemmlowp_int32
(
int
iters
,
int
rows
,
int
depth
,
int
cols
)
{
mace
::
testing
::
StopTiming
();
gemmlowp
::
Matrix
<
std
::
uint8_t
,
gemmlowp
::
MapOrder
::
RowMajor
>
lhs
;
gemmlowp
::
Matrix
<
std
::
uint8_t
,
gemmlowp
::
MapOrder
::
ColMajor
>
rhs
;
gemmlowp
::
Matrix
<
std
::
int32_t
,
gemmlowp
::
MapOrder
::
ColMajor
>
result
;
lhs
.
Resize
(
rows
,
depth
);
rhs
.
Resize
(
depth
,
cols
);
result
.
Resize
(
rows
,
cols
);
gemmlowp
::
MakeZero
(
&
lhs
);
gemmlowp
::
MakeZero
(
&
rhs
);
gemmlowp
::
MakeZero
(
&
result
);
const
auto
output_pipeline
=
std
::
make_tuple
();
gemmlowp
::
GemmContext
gemm_context
;
gemm_context
.
set_max_num_threads
(
4
);
using
BitDepthParams
=
gemmlowp
::
L8R8WithLhsNonzeroBitDepthParams
;
gemmlowp
::
GemmWithOutputPipeline
<
std
::
uint8_t
,
std
::
int32_t
,
BitDepthParams
>
(
&
gemm_context
,
lhs
.
const_map
(),
rhs
.
const_map
(),
&
result
.
map
(),
-
128
,
-
128
,
output_pipeline
);
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
gemmlowp
::
GemmWithOutputPipeline
<
std
::
uint8_t
,
std
::
int32_t
,
BitDepthParams
>
(
&
gemm_context
,
lhs
.
const_map
(),
rhs
.
const_map
(),
&
result
.
map
(),
-
128
,
-
128
,
output_pipeline
);
}
}
}
// namespace
#define MACE_BM_MATMUL_FUNC(M, K, N, FUNC) \
...
...
@@ -69,7 +203,9 @@ void MatmulBenchmark_Eigen(int iters, int m, int k, int n) {
#define MACE_BM_MATMUL(M, K, N) \
MACE_BM_MATMUL_FUNC(M, K, N, Mace); \
MACE_BM_MATMUL_FUNC(M, K, N, Eigen);
MACE_BM_MATMUL_FUNC(M, K, N, Eigen); \
MACE_BM_MATMUL_FUNC(M, K, N, gemmlowp_uint8); \
MACE_BM_MATMUL_FUNC(M, K, N, gemmlowp_int32);
// Embedding size 384
MACE_BM_MATMUL
(
7
,
384
,
384
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录