Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
1888d2a9
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
1888d2a9
编写于
2月 26, 2018
作者:
刘
刘琦
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'macc' into 'master'
Add MACC metrics in benchmark See merge request !242
上级
858b5c7f
d1d7302c
变更
20
隐藏空白更改
内联
并排
Showing
20 changed file
with
137 addition
and
116 deletion
+137
-116
mace/core/testing/test_benchmark.cc
mace/core/testing/test_benchmark.cc
+12
-20
mace/core/testing/test_benchmark.h
mace/core/testing/test_benchmark.h
+1
-1
mace/examples/benchmark_example.cc
mace/examples/benchmark_example.cc
+2
-2
mace/ops/activation_benchmark.cc
mace/ops/activation_benchmark.cc
+41
-37
mace/ops/addn_benchmark.cc
mace/ops/addn_benchmark.cc
+10
-10
mace/ops/batch_norm_benchmark.cc
mace/ops/batch_norm_benchmark.cc
+1
-2
mace/ops/batch_to_space_benchmark.cc
mace/ops/batch_to_space_benchmark.cc
+1
-1
mace/ops/bias_add_benchmark.cc
mace/ops/bias_add_benchmark.cc
+1
-1
mace/ops/channel_shuffle_benchmark.cc
mace/ops/channel_shuffle_benchmark.cc
+1
-1
mace/ops/concat_benchmark.cc
mace/ops/concat_benchmark.cc
+2
-2
mace/ops/conv_2d_benchmark.cc
mace/ops/conv_2d_benchmark.cc
+13
-1
mace/ops/depthwise_conv2d_benchmark.cc
mace/ops/depthwise_conv2d_benchmark.cc
+29
-17
mace/ops/eltwise_benchmark.cc
mace/ops/eltwise_benchmark.cc
+1
-1
mace/ops/global_avg_pooling_benchmark.cc
mace/ops/global_avg_pooling_benchmark.cc
+1
-1
mace/ops/matmul_benchmark.cc
mace/ops/matmul_benchmark.cc
+13
-12
mace/ops/pooling_benchmark.cc
mace/ops/pooling_benchmark.cc
+1
-1
mace/ops/resize_bilinear_benchmark.cc
mace/ops/resize_bilinear_benchmark.cc
+3
-2
mace/ops/softmax_benchmark.cc
mace/ops/softmax_benchmark.cc
+1
-1
mace/ops/space_to_batch_benchmark.cc
mace/ops/space_to_batch_benchmark.cc
+1
-1
mace/ops/winograd_transform_benchmark.cc
mace/ops/winograd_transform_benchmark.cc
+2
-2
未找到文件。
mace/core/testing/test_benchmark.cc
浏览文件 @
1888d2a9
...
...
@@ -9,9 +9,9 @@
#include <regex>
#include <vector>
#include "mace/core/testing/test_benchmark.h"
#include "mace/utils/env_time.h"
#include "mace/utils/logging.h"
#include "mace/core/testing/test_benchmark.h"
namespace
mace
{
namespace
testing
{
...
...
@@ -19,7 +19,7 @@ namespace testing {
static
std
::
vector
<
Benchmark
*>
*
all_benchmarks
=
nullptr
;
static
std
::
string
label
;
static
int64_t
bytes_processed
;
static
int64_t
items
_processed
;
static
int64_t
macc
_processed
;
static
int64_t
accum_time
=
0
;
static
int64_t
start_time
=
0
;
...
...
@@ -81,8 +81,9 @@ void Benchmark::Run(const char *pattern) {
}
}
printf
(
"%-*s %10s %10s
\n
"
,
width
,
"Benchmark"
,
"Time(ns)"
,
"Iterations"
);
printf
(
"%s
\n
"
,
std
::
string
(
width
+
22
,
'-'
).
c_str
());
printf
(
"%-*s %10s %10s %10s %10s
\n
"
,
width
,
"Benchmark"
,
"Time(ns)"
,
"Iterations"
,
"Input(MB/s)"
,
"MACC(G/s)"
);
printf
(
"%s
\n
"
,
std
::
string
(
width
+
44
,
'-'
).
c_str
());
for
(
auto
b
:
*
all_benchmarks
)
{
if
(
!
std
::
regex_match
(
b
->
name_
,
match
,
regex
))
continue
;
for
(
auto
arg
:
b
->
args_
)
{
...
...
@@ -98,20 +99,11 @@ void Benchmark::Run(const char *pattern) {
double
seconds
;
b
->
Run
(
arg
.
first
,
arg
.
second
,
&
iters
,
&
seconds
);
char
buf
[
100
];
std
::
string
full_label
=
label
;
if
(
bytes_processed
>
0
)
{
snprintf
(
buf
,
sizeof
(
buf
),
" %.1fMB/s"
,
(
bytes_processed
*
1e-6
)
/
seconds
);
full_label
+=
buf
;
}
if
(
items_processed
>
0
)
{
snprintf
(
buf
,
sizeof
(
buf
),
" %.1fM items/s"
,
(
items_processed
*
1e-6
)
/
seconds
);
full_label
+=
buf
;
}
printf
(
"%-*s %10.0f %10d
\t
%s
\n
"
,
width
,
name
,
seconds
*
1e9
/
iters
,
iters
,
full_label
.
c_str
());
float
mbps
=
(
bytes_processed
*
1e-6
)
/
seconds
;
// MACCs or other computations
float
gmaccs
=
(
macc_processed
*
1e-9
)
/
seconds
;
printf
(
"%-*s %10.0f %10d %10.2f %10.2f
\n
"
,
width
,
name
,
seconds
*
1e9
/
iters
,
iters
,
mbps
,
gmaccs
);
}
}
}
...
...
@@ -130,7 +122,7 @@ void Benchmark::Run(int arg1, int arg2, int *run_count, double *run_seconds) {
accum_time
=
0
;
start_time
=
utils
::
NowMicros
();
bytes_processed
=
-
1
;
items
_processed
=
-
1
;
macc
_processed
=
-
1
;
label
.
clear
();
if
(
fn0_
)
{
(
*
fn0_
)(
iters
);
...
...
@@ -158,7 +150,7 @@ void Benchmark::Run(int arg1, int arg2, int *run_count, double *run_seconds) {
}
void
BytesProcessed
(
int64_t
n
)
{
bytes_processed
=
n
;
}
void
ItemsProcessed
(
int64_t
n
)
{
items
_processed
=
n
;
}
void
MaccProcessed
(
int64_t
n
)
{
macc
_processed
=
n
;
}
void
StartTiming
()
{
if
(
start_time
==
0
)
start_time
=
utils
::
NowMicros
();
}
...
...
mace/core/testing/test_benchmark.h
浏览文件 @
1888d2a9
...
...
@@ -43,7 +43,7 @@ class Benchmark {
void
RunBenchmarks
();
void
BytesProcessed
(
int64_t
);
void
Items
Processed
(
int64_t
);
void
Macc
Processed
(
int64_t
);
void
StartTiming
();
void
StopTiming
();
...
...
mace/examples/benchmark_example.cc
浏览文件 @
1888d2a9
...
...
@@ -7,7 +7,7 @@
static
void
foo
(
int
iters
)
{
static
const
int
N
=
32
;
const
int64_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
N
;
mace
::
testing
::
Items
Processed
(
tot
);
mace
::
testing
::
Macc
Processed
(
tot
);
mace
::
testing
::
BytesProcessed
(
tot
*
(
sizeof
(
float
)));
float
*
inp
=
new
float
[
N
];
...
...
@@ -26,7 +26,7 @@ BENCHMARK(foo);
static
void
bar
(
int
iters
,
int
n
)
{
const
int64_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
n
;
mace
::
testing
::
Items
Processed
(
tot
);
mace
::
testing
::
Macc
Processed
(
tot
);
mace
::
testing
::
BytesProcessed
(
tot
*
(
sizeof
(
float
)));
float
*
inp
=
new
float
[
n
];
...
...
mace/ops/activation_benchmark.cc
浏览文件 @
1888d2a9
...
...
@@ -51,21 +51,22 @@ static void ReluBenchmark(
#define BM_RELU_MACRO(N, C, H, W, TYPE, DEVICE) \
static void BM_RELU_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
ItemsProcessed(tot);
\
mace::testing::
MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
ReluBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
} \
BENCHMARK(BM_RELU_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#define BM_RELU(N, C, H, W, TYPE) \
BM_RELU_MACRO(N, C, H, W, TYPE, CPU); \
BM_RELU_MACRO(N, C, H, W, TYPE, OPENCL);
#define BM_RELU(N, C, H, W) \
BM_RELU_MACRO(N, C, H, W, float, CPU); \
BM_RELU_MACRO(N, C, H, W, float, OPENCL); \
BM_RELU_MACRO(N, C, H, W, half, OPENCL);
BM_RELU
(
1
,
1
,
512
,
512
,
float
);
BM_RELU
(
1
,
3
,
128
,
128
,
float
);
BM_RELU
(
1
,
3
,
512
,
512
,
float
);
BM_RELU
(
1
,
32
,
112
,
112
,
float
);
BM_RELU
(
1
,
64
,
256
,
256
,
float
);
BM_RELU
(
1
,
1
,
512
,
512
);
BM_RELU
(
1
,
3
,
128
,
128
);
BM_RELU
(
1
,
3
,
512
,
512
);
BM_RELU
(
1
,
32
,
112
,
112
);
BM_RELU
(
1
,
64
,
256
,
256
);
template
<
DeviceType
D
,
typename
T
>
static
void
ReluxBenchmark
(
...
...
@@ -112,21 +113,22 @@ static void ReluxBenchmark(
#define BM_RELUX_MACRO(N, C, H, W, TYPE, DEVICE) \
static void BM_RELUX_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
ItemsProcessed(tot);
\
mace::testing::
MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
ReluxBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
} \
BENCHMARK(BM_RELUX_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#define BM_RELUX(N, C, H, W, TYPE) \
BM_RELUX_MACRO(N, C, H, W, TYPE, CPU); \
BM_RELUX_MACRO(N, C, H, W, TYPE, OPENCL);
#define BM_RELUX(N, C, H, W) \
BM_RELUX_MACRO(N, C, H, W, float, CPU); \
BM_RELUX_MACRO(N, C, H, W, float, OPENCL); \
BM_RELUX_MACRO(N, C, H, W, half, OPENCL);
BM_RELUX
(
1
,
1
,
512
,
512
,
float
);
BM_RELUX
(
1
,
3
,
128
,
128
,
float
);
BM_RELUX
(
1
,
3
,
512
,
512
,
float
);
BM_RELUX
(
1
,
32
,
112
,
112
,
float
);
BM_RELUX
(
1
,
64
,
256
,
256
,
float
);
BM_RELUX
(
1
,
1
,
512
,
512
);
BM_RELUX
(
1
,
3
,
128
,
128
);
BM_RELUX
(
1
,
3
,
512
,
512
);
BM_RELUX
(
1
,
32
,
112
,
112
);
BM_RELUX
(
1
,
64
,
256
,
256
);
template
<
DeviceType
D
,
typename
T
>
static
void
PreluBenchmark
(
...
...
@@ -173,21 +175,22 @@ static void PreluBenchmark(
#define BM_PRELU_MACRO(N, C, H, W, TYPE, DEVICE) \
static void BM_PRELU_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
ItemsProcessed(tot);
\
mace::testing::
MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
PreluBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
} \
BENCHMARK(BM_PRELU_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#define BM_PRELU(N, C, H, W, TYPE) \
BM_PRELU_MACRO(N, C, H, W, TYPE, CPU); \
BM_PRELU_MACRO(N, C, H, W, TYPE, OPENCL);
#define BM_PRELU(N, C, H, W) \
BM_PRELU_MACRO(N, C, H, W, float, CPU); \
BM_PRELU_MACRO(N, C, H, W, float, OPENCL); \
BM_PRELU_MACRO(N, C, H, W, half, OPENCL);
BM_PRELU
(
1
,
1
,
512
,
512
,
float
);
BM_PRELU
(
1
,
3
,
128
,
128
,
float
);
BM_PRELU
(
1
,
3
,
512
,
512
,
float
);
BM_PRELU
(
1
,
32
,
112
,
112
,
float
);
BM_PRELU
(
1
,
64
,
256
,
256
,
float
);
BM_PRELU
(
1
,
1
,
512
,
512
);
BM_PRELU
(
1
,
3
,
128
,
128
);
BM_PRELU
(
1
,
3
,
512
,
512
);
BM_PRELU
(
1
,
32
,
112
,
112
);
BM_PRELU
(
1
,
64
,
256
,
256
);
template
<
DeviceType
D
,
typename
T
>
static
void
TanhBenchmark
(
...
...
@@ -232,21 +235,22 @@ static void TanhBenchmark(
#define BM_TANH_MACRO(N, C, H, W, TYPE, DEVICE) \
static void BM_TANH_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
ItemsProcessed(tot);
\
mace::testing::
MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
TanhBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
} \
BENCHMARK(BM_TANH_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#define BM_TANH(N, C, H, W, TYPE) \
BM_TANH_MACRO(N, C, H, W, TYPE, CPU); \
BM_TANH_MACRO(N, C, H, W, TYPE, OPENCL);
#define BM_TANH(N, C, H, W) \
BM_TANH_MACRO(N, C, H, W, float, CPU); \
BM_TANH_MACRO(N, C, H, W, float, OPENCL); \
BM_TANH_MACRO(N, C, H, W, half, OPENCL);
BM_TANH
(
1
,
1
,
512
,
512
,
float
);
BM_TANH
(
1
,
3
,
128
,
128
,
float
);
BM_TANH
(
1
,
3
,
512
,
512
,
float
);
BM_TANH
(
1
,
32
,
112
,
112
,
float
);
BM_TANH
(
1
,
64
,
256
,
256
,
float
);
BM_TANH
(
1
,
1
,
512
,
512
);
BM_TANH
(
1
,
3
,
128
,
128
);
BM_TANH
(
1
,
3
,
512
,
512
);
BM_TANH
(
1
,
32
,
112
,
112
);
BM_TANH
(
1
,
64
,
256
,
256
);
template
<
DeviceType
D
,
typename
T
>
static
void
SigmoidBenchmark
(
...
...
@@ -292,7 +296,7 @@ static void SigmoidBenchmark(
static void BM_SIGMOID_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
ItemsProcessed(tot);
\
mace::testing::
MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
SigmoidBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
} \
...
...
mace/ops/addn_benchmark.cc
浏览文件 @
1888d2a9
...
...
@@ -55,18 +55,18 @@ static void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) {
}
}
#define BM_ADDN_MACRO(INPUTS, N, H, W, C, TYPE, DEVICE) \
static void BM_ADDN_##INPUTS##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) *
N * H * W * C;
\
mace::testing::
ItemsProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
AddNBenchmark<DEVICE, TYPE>(iters, INPUTS, N, H, W, C); \
} \
#define BM_ADDN_MACRO(INPUTS, N, H, W, C, TYPE, DEVICE)
\
static void BM_ADDN_##INPUTS##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE(
\
int iters) {
\
const int64_t tot = static_cast<int64_t>(iters) *
INPUTS * N * H * W * C;
\
mace::testing::
MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE)));
\
AddNBenchmark<DEVICE, TYPE>(iters, INPUTS, N, H, W, C);
\
}
\
BENCHMARK(BM_ADDN_##INPUTS##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE)
#define BM_ADDN(INPUTS, N, H, W, C) \
BM_ADDN_MACRO(INPUTS, N, H, W, C, float, CPU); \
#define BM_ADDN(INPUTS, N, H, W, C)
\
BM_ADDN_MACRO(INPUTS, N, H, W, C, float, CPU);
\
BM_ADDN_MACRO(INPUTS, N, H, W, C, float, OPENCL); \
BM_ADDN_MACRO(INPUTS, N, H, W, C, half, OPENCL);
...
...
mace/ops/batch_norm_benchmark.cc
浏览文件 @
1888d2a9
...
...
@@ -76,7 +76,7 @@ static void BatchNorm(
static void BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BatchNorm<DEVICE, TYPE>(iters, N, C, H, W); \
} \
...
...
@@ -84,7 +84,6 @@ static void BatchNorm(
#define BM_BATCH_NORM(N, C, H, W) \
BM_BATCH_NORM_MACRO(N, C, H, W, float, CPU); \
BM_BATCH_NORM_MACRO(N, C, H, W, float, NEON); \
BM_BATCH_NORM_MACRO(N, C, H, W, float, OPENCL); \
BM_BATCH_NORM_MACRO(N, C, H, W, half, OPENCL);
...
...
mace/ops/batch_to_space_benchmark.cc
浏览文件 @
1888d2a9
...
...
@@ -41,7 +41,7 @@ static void BMBatchToSpace(
BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BMBatchToSpace<DEVICE, TYPE>(iters, N, C, H, W, ARG); \
} \
...
...
mace/ops/bias_add_benchmark.cc
浏览文件 @
1888d2a9
...
...
@@ -53,7 +53,7 @@ static void BiasAdd(int iters, int batch, int channels, int height, int width) {
static void BM_BIAS_ADD_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BiasAdd<DEVICE, TYPE>(iters, N, C, H, W); \
} \
...
...
mace/ops/channel_shuffle_benchmark.cc
浏览文件 @
1888d2a9
...
...
@@ -41,7 +41,7 @@ static void ChannelShuffle(
static void BM_CHANNEL_SHUFFLE_##N##_##C##_##H##_##W##_##G##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(float))); \
ChannelShuffle<DEVICE>(iters, N, C, H, W, G); \
} \
...
...
mace/ops/concat_benchmark.cc
浏览文件 @
1888d2a9
...
...
@@ -29,7 +29,7 @@ static void ConcatHelper(int iters, int concat_dim, int dim1) {
net
.
RunOp
(
D
);
}
const
int64_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
kDim0
*
dim1
*
2
;
mace
::
testing
::
Items
Processed
(
tot
);
mace
::
testing
::
Macc
Processed
(
tot
);
testing
::
BytesProcessed
(
tot
*
sizeof
(
T
));
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
...
...
@@ -80,7 +80,7 @@ static void OpenclConcatHelper(int iters,
const
int64_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
(
net
.
GetTensor
(
"Input0"
)
->
size
()
+
net
.
GetTensor
(
"Input1"
)
->
size
());
mace
::
testing
::
Items
Processed
(
tot
);
mace
::
testing
::
Macc
Processed
(
tot
);
testing
::
BytesProcessed
(
tot
*
sizeof
(
T
));
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
...
...
mace/ops/conv_2d_benchmark.cc
浏览文件 @
1888d2a9
...
...
@@ -83,8 +83,20 @@ static void Conv2d(int iters,
static void \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t dilation = 1; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
int64_t pad_h = 0, pad_w = 0; \
if (P == SAME) { \
pad_h = KH / 2; \
pad_w = KW / 2; \
} \
int64_t oh = \
(H + 2 * pad_h - KH - (KH - 1) * (dilation - 1)) / STRIDE + 1; \
int64_t ow = \
(W + 2 * pad_w - KW - (KW - 1) * (dilation - 1)) / STRIDE + 1; \
const int64_t macc = \
static_cast<int64_t>(iters) * N * OC * oh * ow * (KH * KW * C + 1); \
mace::testing::MaccProcessed(macc); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
Conv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, mace::Padding::P, \
OC); \
...
...
mace/ops/depthwise_conv2d_benchmark.cc
浏览文件 @
1888d2a9
...
...
@@ -75,24 +75,36 @@ static void DepthwiseConv2d(int iters,
}
}
#define BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, \
DEVICE) \
static void \
BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
DepthwiseConv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, \
mace::Padding::P, OC); \
} \
BENCHMARK( \
BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE)
#define BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, M, TYPE, \
DEVICE) \
static void \
BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##M##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t dilation = 1; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
int64_t pad_h = 0, pad_w = 0; \
if (P == SAME) { \
pad_h = KH / 2; \
pad_w = KW / 2; \
} \
int64_t oh = \
(H + 2 * pad_h - KH - (KH - 1) * (dilation - 1)) / STRIDE + 1; \
int64_t ow = \
(W + 2 * pad_w - KW - (KW - 1) * (dilation - 1)) / STRIDE + 1; \
const int64_t macc = \
static_cast<int64_t>(iters) * N * C * M * oh * ow * (KH * KW + 1); \
mace::testing::MaccProcessed(macc); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
DepthwiseConv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, \
mace::Padding::P, M); \
} \
BENCHMARK( \
BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##M##_##TYPE##_##DEVICE)
#define BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P,
OC
) \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P,
OC
, float, CPU); \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P,
OC
, float, OPENCL); \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P,
OC
, half, OPENCL);
#define BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P,
M
) \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P,
M
, float, CPU); \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P,
M
, float, OPENCL); \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P,
M
, half, OPENCL);
BM_DEPTHWISE_CONV_2D
(
1
,
32
,
112
,
112
,
3
,
3
,
1
,
SAME
,
1
);
BM_DEPTHWISE_CONV_2D
(
1
,
32
,
112
,
112
,
3
,
3
,
2
,
SAME
,
1
);
...
...
mace/ops/eltwise_benchmark.cc
浏览文件 @
1888d2a9
...
...
@@ -61,7 +61,7 @@ static void EltwiseBenchmark(
BM_ELTWISE_##ELT_TYPE##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * H * W * C; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
EltwiseBenchmark<DEVICE, TYPE>( \
iters, static_cast<kernels::EltwiseType>(ELT_TYPE), N, H, W, C); \
...
...
mace/ops/global_avg_pooling_benchmark.cc
浏览文件 @
1888d2a9
...
...
@@ -40,7 +40,7 @@ static void GlobalAvgPooling(
static void BM_GLOBAL_AVG_POOLING_##N##_##C##_##H##_##W##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(float))); \
GlobalAvgPooling<DEVICE>(iters, N, C, H, W); \
} \
...
...
mace/ops/matmul_benchmark.cc
浏览文件 @
1888d2a9
...
...
@@ -20,10 +20,8 @@ static void MatMulBenchmark(
net
.
AddRandomInput
<
D
,
float
>
(
"B"
,
{
batch
,
channels
,
out_width
,
1
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"A"
,
"AImage"
,
kernels
::
BufferType
::
IN_OUT_WIDTH
);
BufferToImage
<
D
,
T
>
(
net
,
"B"
,
"BImage"
,
kernels
::
BufferType
::
IN_OUT_HEIGHT
);
BufferToImage
<
D
,
T
>
(
net
,
"A"
,
"AImage"
,
kernels
::
BufferType
::
IN_OUT_WIDTH
);
BufferToImage
<
D
,
T
>
(
net
,
"B"
,
"BImage"
,
kernels
::
BufferType
::
IN_OUT_HEIGHT
);
OpDefBuilder
(
"MatMul"
,
"MatMulBM"
)
.
Input
(
"AImage"
)
...
...
@@ -52,16 +50,19 @@ static void MatMulBenchmark(
net
.
Sync
();
}
#define BM_MATMUL_MACRO(N, H, C, W, TYPE, DEVICE) \
static void BM_MATMUL_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
MatMulBenchmark<DEVICE, TYPE>(iters, N, H, C, W); \
} \
#define BM_MATMUL_MACRO(N, H, C, W, TYPE, DEVICE) \
static void BM_MATMUL_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE(int iters) { \
const int64_t macc = static_cast<int64_t>(iters) * N * C * H * W; \
const int64_t tot = static_cast<int64_t>(iters) * N * (C * H + H * W); \
mace::testing::MaccProcessed(macc); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
MatMulBenchmark<DEVICE, TYPE>(iters, N, H, C, W); \
} \
BENCHMARK(BM_MATMUL_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE)
#define BM_MATMUL(N, H, C, W) \
#define BM_MATMUL(N, H, C, W) \
BM_MATMUL_MACRO(N, H, C, W, float, CPU); \
BM_MATMUL_MACRO(N, H, C, W, float, OPENCL); \
BM_MATMUL_MACRO(N, H, C, W, half, OPENCL);
BM_MATMUL
(
16
,
32
,
128
,
49
);
...
...
mace/ops/pooling_benchmark.cc
浏览文件 @
1888d2a9
...
...
@@ -54,7 +54,7 @@ static void Pooling(int iters,
BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(float))); \
Pooling<DEVICE>(iters, N, C, H, W, KE, STRIDE, Padding::PA, \
PoolingType::PO); \
...
...
mace/ops/resize_bilinear_benchmark.cc
浏览文件 @
1888d2a9
...
...
@@ -61,8 +61,9 @@ static void ResizeBilinearBenchmark(int iters,
static void \
BM_RESIZE_BILINEAR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H1 * W1; \
mace::testing::ItemsProcessed(tot); \
const int64_t macc = static_cast<int64_t>(iters) * N * C * H1 * W1 * 3; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H0 * W0; \
mace::testing::MaccProcessed(macc); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
ResizeBilinearBenchmark<DEVICE, TYPE>(iters, N, C, H0, W0, H1, W1); \
} \
...
...
mace/ops/softmax_benchmark.cc
浏览文件 @
1888d2a9
...
...
@@ -49,7 +49,7 @@ static void SoftmaxBenchmark(
#define BM_SOFTMAX_MACRO(N, C, H, W, TYPE, DEVICE) \
static void BM_SOFTMAX_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
SoftmaxBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
} \
...
...
mace/ops/space_to_batch_benchmark.cc
浏览文件 @
1888d2a9
...
...
@@ -42,7 +42,7 @@ static void BMSpaceToBatch(
BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BMSpaceToBatch<DEVICE, TYPE>(iters, N, H, W, C, SHAPE); \
} \
...
...
mace/ops/winograd_transform_benchmark.cc
浏览文件 @
1888d2a9
...
...
@@ -41,7 +41,7 @@ static void BMWinogradTransform(
BM_WINOGRAD_TRANSFORM_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BMWinogradTransform<DEVICE, TYPE>(iters, N, H, W, C); \
} \
...
...
@@ -93,7 +93,7 @@ static void BMWinogradInverseTransform(
BM_WINOGRAD_INVERSE_TRANSFORM_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BMWinogradInverseTransform<DEVICE, TYPE>(iters, N, H, W, C); \
} \
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录