Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
冰之2023
Mace
提交
d1d7302c
Mace
项目概览
冰之2023
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
d1d7302c
编写于
2月 24, 2018
作者:
L
Liangliang He
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add MACC metrics in benchmark
上级
858b5c7f
变更
20
隐藏空白更改
内联
并排
Showing
20 changed file
with
137 addition
and
116 deletion
+137
-116
mace/core/testing/test_benchmark.cc
mace/core/testing/test_benchmark.cc
+12
-20
mace/core/testing/test_benchmark.h
mace/core/testing/test_benchmark.h
+1
-1
mace/examples/benchmark_example.cc
mace/examples/benchmark_example.cc
+2
-2
mace/ops/activation_benchmark.cc
mace/ops/activation_benchmark.cc
+41
-37
mace/ops/addn_benchmark.cc
mace/ops/addn_benchmark.cc
+10
-10
mace/ops/batch_norm_benchmark.cc
mace/ops/batch_norm_benchmark.cc
+1
-2
mace/ops/batch_to_space_benchmark.cc
mace/ops/batch_to_space_benchmark.cc
+1
-1
mace/ops/bias_add_benchmark.cc
mace/ops/bias_add_benchmark.cc
+1
-1
mace/ops/channel_shuffle_benchmark.cc
mace/ops/channel_shuffle_benchmark.cc
+1
-1
mace/ops/concat_benchmark.cc
mace/ops/concat_benchmark.cc
+2
-2
mace/ops/conv_2d_benchmark.cc
mace/ops/conv_2d_benchmark.cc
+13
-1
mace/ops/depthwise_conv2d_benchmark.cc
mace/ops/depthwise_conv2d_benchmark.cc
+29
-17
mace/ops/eltwise_benchmark.cc
mace/ops/eltwise_benchmark.cc
+1
-1
mace/ops/global_avg_pooling_benchmark.cc
mace/ops/global_avg_pooling_benchmark.cc
+1
-1
mace/ops/matmul_benchmark.cc
mace/ops/matmul_benchmark.cc
+13
-12
mace/ops/pooling_benchmark.cc
mace/ops/pooling_benchmark.cc
+1
-1
mace/ops/resize_bilinear_benchmark.cc
mace/ops/resize_bilinear_benchmark.cc
+3
-2
mace/ops/softmax_benchmark.cc
mace/ops/softmax_benchmark.cc
+1
-1
mace/ops/space_to_batch_benchmark.cc
mace/ops/space_to_batch_benchmark.cc
+1
-1
mace/ops/winograd_transform_benchmark.cc
mace/ops/winograd_transform_benchmark.cc
+2
-2
未找到文件。
mace/core/testing/test_benchmark.cc
浏览文件 @
d1d7302c
...
...
@@ -9,9 +9,9 @@
#include <regex>
#include <vector>
#include "mace/core/testing/test_benchmark.h"
#include "mace/utils/env_time.h"
#include "mace/utils/logging.h"
#include "mace/core/testing/test_benchmark.h"
namespace
mace
{
namespace
testing
{
...
...
@@ -19,7 +19,7 @@ namespace testing {
static
std
::
vector
<
Benchmark
*>
*
all_benchmarks
=
nullptr
;
static
std
::
string
label
;
static
int64_t
bytes_processed
;
static
int64_t
items
_processed
;
static
int64_t
macc
_processed
;
static
int64_t
accum_time
=
0
;
static
int64_t
start_time
=
0
;
...
...
@@ -81,8 +81,9 @@ void Benchmark::Run(const char *pattern) {
}
}
printf
(
"%-*s %10s %10s
\n
"
,
width
,
"Benchmark"
,
"Time(ns)"
,
"Iterations"
);
printf
(
"%s
\n
"
,
std
::
string
(
width
+
22
,
'-'
).
c_str
());
printf
(
"%-*s %10s %10s %10s %10s
\n
"
,
width
,
"Benchmark"
,
"Time(ns)"
,
"Iterations"
,
"Input(MB/s)"
,
"MACC(G/s)"
);
printf
(
"%s
\n
"
,
std
::
string
(
width
+
44
,
'-'
).
c_str
());
for
(
auto
b
:
*
all_benchmarks
)
{
if
(
!
std
::
regex_match
(
b
->
name_
,
match
,
regex
))
continue
;
for
(
auto
arg
:
b
->
args_
)
{
...
...
@@ -98,20 +99,11 @@ void Benchmark::Run(const char *pattern) {
double
seconds
;
b
->
Run
(
arg
.
first
,
arg
.
second
,
&
iters
,
&
seconds
);
char
buf
[
100
];
std
::
string
full_label
=
label
;
if
(
bytes_processed
>
0
)
{
snprintf
(
buf
,
sizeof
(
buf
),
" %.1fMB/s"
,
(
bytes_processed
*
1e-6
)
/
seconds
);
full_label
+=
buf
;
}
if
(
items_processed
>
0
)
{
snprintf
(
buf
,
sizeof
(
buf
),
" %.1fM items/s"
,
(
items_processed
*
1e-6
)
/
seconds
);
full_label
+=
buf
;
}
printf
(
"%-*s %10.0f %10d
\t
%s
\n
"
,
width
,
name
,
seconds
*
1e9
/
iters
,
iters
,
full_label
.
c_str
());
float
mbps
=
(
bytes_processed
*
1e-6
)
/
seconds
;
// MACCs or other computations
float
gmaccs
=
(
macc_processed
*
1e-9
)
/
seconds
;
printf
(
"%-*s %10.0f %10d %10.2f %10.2f
\n
"
,
width
,
name
,
seconds
*
1e9
/
iters
,
iters
,
mbps
,
gmaccs
);
}
}
}
...
...
@@ -130,7 +122,7 @@ void Benchmark::Run(int arg1, int arg2, int *run_count, double *run_seconds) {
accum_time
=
0
;
start_time
=
utils
::
NowMicros
();
bytes_processed
=
-
1
;
items
_processed
=
-
1
;
macc
_processed
=
-
1
;
label
.
clear
();
if
(
fn0_
)
{
(
*
fn0_
)(
iters
);
...
...
@@ -158,7 +150,7 @@ void Benchmark::Run(int arg1, int arg2, int *run_count, double *run_seconds) {
}
void
BytesProcessed
(
int64_t
n
)
{
bytes_processed
=
n
;
}
void
ItemsProcessed
(
int64_t
n
)
{
items
_processed
=
n
;
}
void
MaccProcessed
(
int64_t
n
)
{
macc
_processed
=
n
;
}
void
StartTiming
()
{
if
(
start_time
==
0
)
start_time
=
utils
::
NowMicros
();
}
...
...
mace/core/testing/test_benchmark.h
浏览文件 @
d1d7302c
...
...
@@ -43,7 +43,7 @@ class Benchmark {
void
RunBenchmarks
();
void
BytesProcessed
(
int64_t
);
void
Items
Processed
(
int64_t
);
void
Macc
Processed
(
int64_t
);
void
StartTiming
();
void
StopTiming
();
...
...
mace/examples/benchmark_example.cc
浏览文件 @
d1d7302c
...
...
@@ -7,7 +7,7 @@
static
void
foo
(
int
iters
)
{
static
const
int
N
=
32
;
const
int64_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
N
;
mace
::
testing
::
Items
Processed
(
tot
);
mace
::
testing
::
Macc
Processed
(
tot
);
mace
::
testing
::
BytesProcessed
(
tot
*
(
sizeof
(
float
)));
float
*
inp
=
new
float
[
N
];
...
...
@@ -26,7 +26,7 @@ BENCHMARK(foo);
static
void
bar
(
int
iters
,
int
n
)
{
const
int64_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
n
;
mace
::
testing
::
Items
Processed
(
tot
);
mace
::
testing
::
Macc
Processed
(
tot
);
mace
::
testing
::
BytesProcessed
(
tot
*
(
sizeof
(
float
)));
float
*
inp
=
new
float
[
n
];
...
...
mace/ops/activation_benchmark.cc
浏览文件 @
d1d7302c
...
...
@@ -51,21 +51,22 @@ static void ReluBenchmark(
#define BM_RELU_MACRO(N, C, H, W, TYPE, DEVICE) \
static void BM_RELU_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
ItemsProcessed(tot);
\
mace::testing::
MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
ReluBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
} \
BENCHMARK(BM_RELU_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#define BM_RELU(N, C, H, W, TYPE) \
BM_RELU_MACRO(N, C, H, W, TYPE, CPU); \
BM_RELU_MACRO(N, C, H, W, TYPE, OPENCL);
#define BM_RELU(N, C, H, W) \
BM_RELU_MACRO(N, C, H, W, float, CPU); \
BM_RELU_MACRO(N, C, H, W, float, OPENCL); \
BM_RELU_MACRO(N, C, H, W, half, OPENCL);
BM_RELU
(
1
,
1
,
512
,
512
,
float
);
BM_RELU
(
1
,
3
,
128
,
128
,
float
);
BM_RELU
(
1
,
3
,
512
,
512
,
float
);
BM_RELU
(
1
,
32
,
112
,
112
,
float
);
BM_RELU
(
1
,
64
,
256
,
256
,
float
);
BM_RELU
(
1
,
1
,
512
,
512
);
BM_RELU
(
1
,
3
,
128
,
128
);
BM_RELU
(
1
,
3
,
512
,
512
);
BM_RELU
(
1
,
32
,
112
,
112
);
BM_RELU
(
1
,
64
,
256
,
256
);
template
<
DeviceType
D
,
typename
T
>
static
void
ReluxBenchmark
(
...
...
@@ -112,21 +113,22 @@ static void ReluxBenchmark(
#define BM_RELUX_MACRO(N, C, H, W, TYPE, DEVICE) \
static void BM_RELUX_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
ItemsProcessed(tot);
\
mace::testing::
MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
ReluxBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
} \
BENCHMARK(BM_RELUX_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#define BM_RELUX(N, C, H, W, TYPE) \
BM_RELUX_MACRO(N, C, H, W, TYPE, CPU); \
BM_RELUX_MACRO(N, C, H, W, TYPE, OPENCL);
#define BM_RELUX(N, C, H, W) \
BM_RELUX_MACRO(N, C, H, W, float, CPU); \
BM_RELUX_MACRO(N, C, H, W, float, OPENCL); \
BM_RELUX_MACRO(N, C, H, W, half, OPENCL);
BM_RELUX
(
1
,
1
,
512
,
512
,
float
);
BM_RELUX
(
1
,
3
,
128
,
128
,
float
);
BM_RELUX
(
1
,
3
,
512
,
512
,
float
);
BM_RELUX
(
1
,
32
,
112
,
112
,
float
);
BM_RELUX
(
1
,
64
,
256
,
256
,
float
);
BM_RELUX
(
1
,
1
,
512
,
512
);
BM_RELUX
(
1
,
3
,
128
,
128
);
BM_RELUX
(
1
,
3
,
512
,
512
);
BM_RELUX
(
1
,
32
,
112
,
112
);
BM_RELUX
(
1
,
64
,
256
,
256
);
template
<
DeviceType
D
,
typename
T
>
static
void
PreluBenchmark
(
...
...
@@ -173,21 +175,22 @@ static void PreluBenchmark(
#define BM_PRELU_MACRO(N, C, H, W, TYPE, DEVICE) \
static void BM_PRELU_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
ItemsProcessed(tot);
\
mace::testing::
MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
PreluBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
} \
BENCHMARK(BM_PRELU_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#define BM_PRELU(N, C, H, W, TYPE) \
BM_PRELU_MACRO(N, C, H, W, TYPE, CPU); \
BM_PRELU_MACRO(N, C, H, W, TYPE, OPENCL);
#define BM_PRELU(N, C, H, W) \
BM_PRELU_MACRO(N, C, H, W, float, CPU); \
BM_PRELU_MACRO(N, C, H, W, float, OPENCL); \
BM_PRELU_MACRO(N, C, H, W, half, OPENCL);
BM_PRELU
(
1
,
1
,
512
,
512
,
float
);
BM_PRELU
(
1
,
3
,
128
,
128
,
float
);
BM_PRELU
(
1
,
3
,
512
,
512
,
float
);
BM_PRELU
(
1
,
32
,
112
,
112
,
float
);
BM_PRELU
(
1
,
64
,
256
,
256
,
float
);
BM_PRELU
(
1
,
1
,
512
,
512
);
BM_PRELU
(
1
,
3
,
128
,
128
);
BM_PRELU
(
1
,
3
,
512
,
512
);
BM_PRELU
(
1
,
32
,
112
,
112
);
BM_PRELU
(
1
,
64
,
256
,
256
);
template
<
DeviceType
D
,
typename
T
>
static
void
TanhBenchmark
(
...
...
@@ -232,21 +235,22 @@ static void TanhBenchmark(
#define BM_TANH_MACRO(N, C, H, W, TYPE, DEVICE) \
static void BM_TANH_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
ItemsProcessed(tot);
\
mace::testing::
MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
TanhBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
} \
BENCHMARK(BM_TANH_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#define BM_TANH(N, C, H, W, TYPE) \
BM_TANH_MACRO(N, C, H, W, TYPE, CPU); \
BM_TANH_MACRO(N, C, H, W, TYPE, OPENCL);
#define BM_TANH(N, C, H, W) \
BM_TANH_MACRO(N, C, H, W, float, CPU); \
BM_TANH_MACRO(N, C, H, W, float, OPENCL); \
BM_TANH_MACRO(N, C, H, W, half, OPENCL);
BM_TANH
(
1
,
1
,
512
,
512
,
float
);
BM_TANH
(
1
,
3
,
128
,
128
,
float
);
BM_TANH
(
1
,
3
,
512
,
512
,
float
);
BM_TANH
(
1
,
32
,
112
,
112
,
float
);
BM_TANH
(
1
,
64
,
256
,
256
,
float
);
BM_TANH
(
1
,
1
,
512
,
512
);
BM_TANH
(
1
,
3
,
128
,
128
);
BM_TANH
(
1
,
3
,
512
,
512
);
BM_TANH
(
1
,
32
,
112
,
112
);
BM_TANH
(
1
,
64
,
256
,
256
);
template
<
DeviceType
D
,
typename
T
>
static
void
SigmoidBenchmark
(
...
...
@@ -292,7 +296,7 @@ static void SigmoidBenchmark(
static void BM_SIGMOID_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
ItemsProcessed(tot);
\
mace::testing::
MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
SigmoidBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
} \
...
...
mace/ops/addn_benchmark.cc
浏览文件 @
d1d7302c
...
...
@@ -55,18 +55,18 @@ static void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) {
}
}
#define BM_ADDN_MACRO(INPUTS, N, H, W, C, TYPE, DEVICE) \
static void BM_ADDN_##INPUTS##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) *
N * H * W * C;
\
mace::testing::
ItemsProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
AddNBenchmark<DEVICE, TYPE>(iters, INPUTS, N, H, W, C); \
} \
#define BM_ADDN_MACRO(INPUTS, N, H, W, C, TYPE, DEVICE)
\
static void BM_ADDN_##INPUTS##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE(
\
int iters) {
\
const int64_t tot = static_cast<int64_t>(iters) *
INPUTS * N * H * W * C;
\
mace::testing::
MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE)));
\
AddNBenchmark<DEVICE, TYPE>(iters, INPUTS, N, H, W, C);
\
}
\
BENCHMARK(BM_ADDN_##INPUTS##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE)
#define BM_ADDN(INPUTS, N, H, W, C) \
BM_ADDN_MACRO(INPUTS, N, H, W, C, float, CPU); \
#define BM_ADDN(INPUTS, N, H, W, C)
\
BM_ADDN_MACRO(INPUTS, N, H, W, C, float, CPU);
\
BM_ADDN_MACRO(INPUTS, N, H, W, C, float, OPENCL); \
BM_ADDN_MACRO(INPUTS, N, H, W, C, half, OPENCL);
...
...
mace/ops/batch_norm_benchmark.cc
浏览文件 @
d1d7302c
...
...
@@ -76,7 +76,7 @@ static void BatchNorm(
static void BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BatchNorm<DEVICE, TYPE>(iters, N, C, H, W); \
} \
...
...
@@ -84,7 +84,6 @@ static void BatchNorm(
#define BM_BATCH_NORM(N, C, H, W) \
BM_BATCH_NORM_MACRO(N, C, H, W, float, CPU); \
BM_BATCH_NORM_MACRO(N, C, H, W, float, NEON); \
BM_BATCH_NORM_MACRO(N, C, H, W, float, OPENCL); \
BM_BATCH_NORM_MACRO(N, C, H, W, half, OPENCL);
...
...
mace/ops/batch_to_space_benchmark.cc
浏览文件 @
d1d7302c
...
...
@@ -41,7 +41,7 @@ static void BMBatchToSpace(
BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BMBatchToSpace<DEVICE, TYPE>(iters, N, C, H, W, ARG); \
} \
...
...
mace/ops/bias_add_benchmark.cc
浏览文件 @
d1d7302c
...
...
@@ -53,7 +53,7 @@ static void BiasAdd(int iters, int batch, int channels, int height, int width) {
static void BM_BIAS_ADD_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BiasAdd<DEVICE, TYPE>(iters, N, C, H, W); \
} \
...
...
mace/ops/channel_shuffle_benchmark.cc
浏览文件 @
d1d7302c
...
...
@@ -41,7 +41,7 @@ static void ChannelShuffle(
static void BM_CHANNEL_SHUFFLE_##N##_##C##_##H##_##W##_##G##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(float))); \
ChannelShuffle<DEVICE>(iters, N, C, H, W, G); \
} \
...
...
mace/ops/concat_benchmark.cc
浏览文件 @
d1d7302c
...
...
@@ -29,7 +29,7 @@ static void ConcatHelper(int iters, int concat_dim, int dim1) {
net
.
RunOp
(
D
);
}
const
int64_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
kDim0
*
dim1
*
2
;
mace
::
testing
::
Items
Processed
(
tot
);
mace
::
testing
::
Macc
Processed
(
tot
);
testing
::
BytesProcessed
(
tot
*
sizeof
(
T
));
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
...
...
@@ -80,7 +80,7 @@ static void OpenclConcatHelper(int iters,
const
int64_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
(
net
.
GetTensor
(
"Input0"
)
->
size
()
+
net
.
GetTensor
(
"Input1"
)
->
size
());
mace
::
testing
::
Items
Processed
(
tot
);
mace
::
testing
::
Macc
Processed
(
tot
);
testing
::
BytesProcessed
(
tot
*
sizeof
(
T
));
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
...
...
mace/ops/conv_2d_benchmark.cc
浏览文件 @
d1d7302c
...
...
@@ -83,8 +83,20 @@ static void Conv2d(int iters,
static void \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t dilation = 1; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
int64_t pad_h = 0, pad_w = 0; \
if (P == SAME) { \
pad_h = KH / 2; \
pad_w = KW / 2; \
} \
int64_t oh = \
(H + 2 * pad_h - KH - (KH - 1) * (dilation - 1)) / STRIDE + 1; \
int64_t ow = \
(W + 2 * pad_w - KW - (KW - 1) * (dilation - 1)) / STRIDE + 1; \
const int64_t macc = \
static_cast<int64_t>(iters) * N * OC * oh * ow * (KH * KW * C + 1); \
mace::testing::MaccProcessed(macc); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
Conv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, mace::Padding::P, \
OC); \
...
...
mace/ops/depthwise_conv2d_benchmark.cc
浏览文件 @
d1d7302c
...
...
@@ -75,24 +75,36 @@ static void DepthwiseConv2d(int iters,
}
}
#define BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, \
DEVICE) \
static void \
BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
DepthwiseConv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, \
mace::Padding::P, OC); \
} \
BENCHMARK( \
BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE)
#define BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, M, TYPE, \
DEVICE) \
static void \
BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##M##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t dilation = 1; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
int64_t pad_h = 0, pad_w = 0; \
if (P == SAME) { \
pad_h = KH / 2; \
pad_w = KW / 2; \
} \
int64_t oh = \
(H + 2 * pad_h - KH - (KH - 1) * (dilation - 1)) / STRIDE + 1; \
int64_t ow = \
(W + 2 * pad_w - KW - (KW - 1) * (dilation - 1)) / STRIDE + 1; \
const int64_t macc = \
static_cast<int64_t>(iters) * N * C * M * oh * ow * (KH * KW + 1); \
mace::testing::MaccProcessed(macc); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
DepthwiseConv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, \
mace::Padding::P, M); \
} \
BENCHMARK( \
BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##M##_##TYPE##_##DEVICE)
#define BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P,
OC
) \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P,
OC
, float, CPU); \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P,
OC
, float, OPENCL); \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P,
OC
, half, OPENCL);
#define BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P,
M
) \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P,
M
, float, CPU); \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P,
M
, float, OPENCL); \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P,
M
, half, OPENCL);
BM_DEPTHWISE_CONV_2D
(
1
,
32
,
112
,
112
,
3
,
3
,
1
,
SAME
,
1
);
BM_DEPTHWISE_CONV_2D
(
1
,
32
,
112
,
112
,
3
,
3
,
2
,
SAME
,
1
);
...
...
mace/ops/eltwise_benchmark.cc
浏览文件 @
d1d7302c
...
...
@@ -61,7 +61,7 @@ static void EltwiseBenchmark(
BM_ELTWISE_##ELT_TYPE##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * H * W * C; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
EltwiseBenchmark<DEVICE, TYPE>( \
iters, static_cast<kernels::EltwiseType>(ELT_TYPE), N, H, W, C); \
...
...
mace/ops/global_avg_pooling_benchmark.cc
浏览文件 @
d1d7302c
...
...
@@ -40,7 +40,7 @@ static void GlobalAvgPooling(
static void BM_GLOBAL_AVG_POOLING_##N##_##C##_##H##_##W##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(float))); \
GlobalAvgPooling<DEVICE>(iters, N, C, H, W); \
} \
...
...
mace/ops/matmul_benchmark.cc
浏览文件 @
d1d7302c
...
...
@@ -20,10 +20,8 @@ static void MatMulBenchmark(
net
.
AddRandomInput
<
D
,
float
>
(
"B"
,
{
batch
,
channels
,
out_width
,
1
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"A"
,
"AImage"
,
kernels
::
BufferType
::
IN_OUT_WIDTH
);
BufferToImage
<
D
,
T
>
(
net
,
"B"
,
"BImage"
,
kernels
::
BufferType
::
IN_OUT_HEIGHT
);
BufferToImage
<
D
,
T
>
(
net
,
"A"
,
"AImage"
,
kernels
::
BufferType
::
IN_OUT_WIDTH
);
BufferToImage
<
D
,
T
>
(
net
,
"B"
,
"BImage"
,
kernels
::
BufferType
::
IN_OUT_HEIGHT
);
OpDefBuilder
(
"MatMul"
,
"MatMulBM"
)
.
Input
(
"AImage"
)
...
...
@@ -52,16 +50,19 @@ static void MatMulBenchmark(
net
.
Sync
();
}
#define BM_MATMUL_MACRO(N, H, C, W, TYPE, DEVICE) \
static void BM_MATMUL_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
MatMulBenchmark<DEVICE, TYPE>(iters, N, H, C, W); \
} \
#define BM_MATMUL_MACRO(N, H, C, W, TYPE, DEVICE) \
static void BM_MATMUL_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE(int iters) { \
const int64_t macc = static_cast<int64_t>(iters) * N * C * H * W; \
const int64_t tot = static_cast<int64_t>(iters) * N * (C * H + H * W); \
mace::testing::MaccProcessed(macc); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
MatMulBenchmark<DEVICE, TYPE>(iters, N, H, C, W); \
} \
BENCHMARK(BM_MATMUL_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE)
#define BM_MATMUL(N, H, C, W) \
#define BM_MATMUL(N, H, C, W) \
BM_MATMUL_MACRO(N, H, C, W, float, CPU); \
BM_MATMUL_MACRO(N, H, C, W, float, OPENCL); \
BM_MATMUL_MACRO(N, H, C, W, half, OPENCL);
BM_MATMUL
(
16
,
32
,
128
,
49
);
...
...
mace/ops/pooling_benchmark.cc
浏览文件 @
d1d7302c
...
...
@@ -54,7 +54,7 @@ static void Pooling(int iters,
BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(float))); \
Pooling<DEVICE>(iters, N, C, H, W, KE, STRIDE, Padding::PA, \
PoolingType::PO); \
...
...
mace/ops/resize_bilinear_benchmark.cc
浏览文件 @
d1d7302c
...
...
@@ -61,8 +61,9 @@ static void ResizeBilinearBenchmark(int iters,
static void \
BM_RESIZE_BILINEAR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H1 * W1; \
mace::testing::ItemsProcessed(tot); \
const int64_t macc = static_cast<int64_t>(iters) * N * C * H1 * W1 * 3; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H0 * W0; \
mace::testing::MaccProcessed(macc); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
ResizeBilinearBenchmark<DEVICE, TYPE>(iters, N, C, H0, W0, H1, W1); \
} \
...
...
mace/ops/softmax_benchmark.cc
浏览文件 @
d1d7302c
...
...
@@ -49,7 +49,7 @@ static void SoftmaxBenchmark(
#define BM_SOFTMAX_MACRO(N, C, H, W, TYPE, DEVICE) \
static void BM_SOFTMAX_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
SoftmaxBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
} \
...
...
mace/ops/space_to_batch_benchmark.cc
浏览文件 @
d1d7302c
...
...
@@ -42,7 +42,7 @@ static void BMSpaceToBatch(
BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BMSpaceToBatch<DEVICE, TYPE>(iters, N, H, W, C, SHAPE); \
} \
...
...
mace/ops/winograd_transform_benchmark.cc
浏览文件 @
d1d7302c
...
...
@@ -41,7 +41,7 @@ static void BMWinogradTransform(
BM_WINOGRAD_TRANSFORM_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BMWinogradTransform<DEVICE, TYPE>(iters, N, H, W, C); \
} \
...
...
@@ -93,7 +93,7 @@ static void BMWinogradInverseTransform(
BM_WINOGRAD_INVERSE_TRANSFORM_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BMWinogradInverseTransform<DEVICE, TYPE>(iters, N, H, W, C); \
} \
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录