Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
1888d2a9
Mace
项目概览
Xiaomi
/
Mace
通知
107
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
1888d2a9
编写于
2月 26, 2018
作者:
刘
刘琦
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'macc' into 'master'
Add MACC metrics in benchmark See merge request !242
上级
858b5c7f
d1d7302c
变更
20
隐藏空白更改
内联
并排
Showing
20 changed file
with
137 addition
and
116 deletion
+137
-116
mace/core/testing/test_benchmark.cc
mace/core/testing/test_benchmark.cc
+12
-20
mace/core/testing/test_benchmark.h
mace/core/testing/test_benchmark.h
+1
-1
mace/examples/benchmark_example.cc
mace/examples/benchmark_example.cc
+2
-2
mace/ops/activation_benchmark.cc
mace/ops/activation_benchmark.cc
+41
-37
mace/ops/addn_benchmark.cc
mace/ops/addn_benchmark.cc
+10
-10
mace/ops/batch_norm_benchmark.cc
mace/ops/batch_norm_benchmark.cc
+1
-2
mace/ops/batch_to_space_benchmark.cc
mace/ops/batch_to_space_benchmark.cc
+1
-1
mace/ops/bias_add_benchmark.cc
mace/ops/bias_add_benchmark.cc
+1
-1
mace/ops/channel_shuffle_benchmark.cc
mace/ops/channel_shuffle_benchmark.cc
+1
-1
mace/ops/concat_benchmark.cc
mace/ops/concat_benchmark.cc
+2
-2
mace/ops/conv_2d_benchmark.cc
mace/ops/conv_2d_benchmark.cc
+13
-1
mace/ops/depthwise_conv2d_benchmark.cc
mace/ops/depthwise_conv2d_benchmark.cc
+29
-17
mace/ops/eltwise_benchmark.cc
mace/ops/eltwise_benchmark.cc
+1
-1
mace/ops/global_avg_pooling_benchmark.cc
mace/ops/global_avg_pooling_benchmark.cc
+1
-1
mace/ops/matmul_benchmark.cc
mace/ops/matmul_benchmark.cc
+13
-12
mace/ops/pooling_benchmark.cc
mace/ops/pooling_benchmark.cc
+1
-1
mace/ops/resize_bilinear_benchmark.cc
mace/ops/resize_bilinear_benchmark.cc
+3
-2
mace/ops/softmax_benchmark.cc
mace/ops/softmax_benchmark.cc
+1
-1
mace/ops/space_to_batch_benchmark.cc
mace/ops/space_to_batch_benchmark.cc
+1
-1
mace/ops/winograd_transform_benchmark.cc
mace/ops/winograd_transform_benchmark.cc
+2
-2
未找到文件。
mace/core/testing/test_benchmark.cc
浏览文件 @
1888d2a9
...
@@ -9,9 +9,9 @@
...
@@ -9,9 +9,9 @@
#include <regex>
#include <regex>
#include <vector>
#include <vector>
#include "mace/core/testing/test_benchmark.h"
#include "mace/utils/env_time.h"
#include "mace/utils/env_time.h"
#include "mace/utils/logging.h"
#include "mace/utils/logging.h"
#include "mace/core/testing/test_benchmark.h"
namespace
mace
{
namespace
mace
{
namespace
testing
{
namespace
testing
{
...
@@ -19,7 +19,7 @@ namespace testing {
...
@@ -19,7 +19,7 @@ namespace testing {
static
std
::
vector
<
Benchmark
*>
*
all_benchmarks
=
nullptr
;
static
std
::
vector
<
Benchmark
*>
*
all_benchmarks
=
nullptr
;
static
std
::
string
label
;
static
std
::
string
label
;
static
int64_t
bytes_processed
;
static
int64_t
bytes_processed
;
static
int64_t
items
_processed
;
static
int64_t
macc
_processed
;
static
int64_t
accum_time
=
0
;
static
int64_t
accum_time
=
0
;
static
int64_t
start_time
=
0
;
static
int64_t
start_time
=
0
;
...
@@ -81,8 +81,9 @@ void Benchmark::Run(const char *pattern) {
...
@@ -81,8 +81,9 @@ void Benchmark::Run(const char *pattern) {
}
}
}
}
printf
(
"%-*s %10s %10s
\n
"
,
width
,
"Benchmark"
,
"Time(ns)"
,
"Iterations"
);
printf
(
"%-*s %10s %10s %10s %10s
\n
"
,
width
,
"Benchmark"
,
"Time(ns)"
,
printf
(
"%s
\n
"
,
std
::
string
(
width
+
22
,
'-'
).
c_str
());
"Iterations"
,
"Input(MB/s)"
,
"MACC(G/s)"
);
printf
(
"%s
\n
"
,
std
::
string
(
width
+
44
,
'-'
).
c_str
());
for
(
auto
b
:
*
all_benchmarks
)
{
for
(
auto
b
:
*
all_benchmarks
)
{
if
(
!
std
::
regex_match
(
b
->
name_
,
match
,
regex
))
continue
;
if
(
!
std
::
regex_match
(
b
->
name_
,
match
,
regex
))
continue
;
for
(
auto
arg
:
b
->
args_
)
{
for
(
auto
arg
:
b
->
args_
)
{
...
@@ -98,20 +99,11 @@ void Benchmark::Run(const char *pattern) {
...
@@ -98,20 +99,11 @@ void Benchmark::Run(const char *pattern) {
double
seconds
;
double
seconds
;
b
->
Run
(
arg
.
first
,
arg
.
second
,
&
iters
,
&
seconds
);
b
->
Run
(
arg
.
first
,
arg
.
second
,
&
iters
,
&
seconds
);
char
buf
[
100
];
float
mbps
=
(
bytes_processed
*
1e-6
)
/
seconds
;
std
::
string
full_label
=
label
;
// MACCs or other computations
if
(
bytes_processed
>
0
)
{
float
gmaccs
=
(
macc_processed
*
1e-9
)
/
seconds
;
snprintf
(
buf
,
sizeof
(
buf
),
" %.1fMB/s"
,
printf
(
"%-*s %10.0f %10d %10.2f %10.2f
\n
"
,
width
,
name
,
(
bytes_processed
*
1e-6
)
/
seconds
);
seconds
*
1e9
/
iters
,
iters
,
mbps
,
gmaccs
);
full_label
+=
buf
;
}
if
(
items_processed
>
0
)
{
snprintf
(
buf
,
sizeof
(
buf
),
" %.1fM items/s"
,
(
items_processed
*
1e-6
)
/
seconds
);
full_label
+=
buf
;
}
printf
(
"%-*s %10.0f %10d
\t
%s
\n
"
,
width
,
name
,
seconds
*
1e9
/
iters
,
iters
,
full_label
.
c_str
());
}
}
}
}
}
}
...
@@ -130,7 +122,7 @@ void Benchmark::Run(int arg1, int arg2, int *run_count, double *run_seconds) {
...
@@ -130,7 +122,7 @@ void Benchmark::Run(int arg1, int arg2, int *run_count, double *run_seconds) {
accum_time
=
0
;
accum_time
=
0
;
start_time
=
utils
::
NowMicros
();
start_time
=
utils
::
NowMicros
();
bytes_processed
=
-
1
;
bytes_processed
=
-
1
;
items
_processed
=
-
1
;
macc
_processed
=
-
1
;
label
.
clear
();
label
.
clear
();
if
(
fn0_
)
{
if
(
fn0_
)
{
(
*
fn0_
)(
iters
);
(
*
fn0_
)(
iters
);
...
@@ -158,7 +150,7 @@ void Benchmark::Run(int arg1, int arg2, int *run_count, double *run_seconds) {
...
@@ -158,7 +150,7 @@ void Benchmark::Run(int arg1, int arg2, int *run_count, double *run_seconds) {
}
}
void
BytesProcessed
(
int64_t
n
)
{
bytes_processed
=
n
;
}
void
BytesProcessed
(
int64_t
n
)
{
bytes_processed
=
n
;
}
void
ItemsProcessed
(
int64_t
n
)
{
items
_processed
=
n
;
}
void
MaccProcessed
(
int64_t
n
)
{
macc
_processed
=
n
;
}
void
StartTiming
()
{
void
StartTiming
()
{
if
(
start_time
==
0
)
start_time
=
utils
::
NowMicros
();
if
(
start_time
==
0
)
start_time
=
utils
::
NowMicros
();
}
}
...
...
mace/core/testing/test_benchmark.h
浏览文件 @
1888d2a9
...
@@ -43,7 +43,7 @@ class Benchmark {
...
@@ -43,7 +43,7 @@ class Benchmark {
void
RunBenchmarks
();
void
RunBenchmarks
();
void
BytesProcessed
(
int64_t
);
void
BytesProcessed
(
int64_t
);
void
Items
Processed
(
int64_t
);
void
Macc
Processed
(
int64_t
);
void
StartTiming
();
void
StartTiming
();
void
StopTiming
();
void
StopTiming
();
...
...
mace/examples/benchmark_example.cc
浏览文件 @
1888d2a9
...
@@ -7,7 +7,7 @@
...
@@ -7,7 +7,7 @@
static
void
foo
(
int
iters
)
{
static
void
foo
(
int
iters
)
{
static
const
int
N
=
32
;
static
const
int
N
=
32
;
const
int64_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
N
;
const
int64_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
N
;
mace
::
testing
::
Items
Processed
(
tot
);
mace
::
testing
::
Macc
Processed
(
tot
);
mace
::
testing
::
BytesProcessed
(
tot
*
(
sizeof
(
float
)));
mace
::
testing
::
BytesProcessed
(
tot
*
(
sizeof
(
float
)));
float
*
inp
=
new
float
[
N
];
float
*
inp
=
new
float
[
N
];
...
@@ -26,7 +26,7 @@ BENCHMARK(foo);
...
@@ -26,7 +26,7 @@ BENCHMARK(foo);
static
void
bar
(
int
iters
,
int
n
)
{
static
void
bar
(
int
iters
,
int
n
)
{
const
int64_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
n
;
const
int64_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
n
;
mace
::
testing
::
Items
Processed
(
tot
);
mace
::
testing
::
Macc
Processed
(
tot
);
mace
::
testing
::
BytesProcessed
(
tot
*
(
sizeof
(
float
)));
mace
::
testing
::
BytesProcessed
(
tot
*
(
sizeof
(
float
)));
float
*
inp
=
new
float
[
n
];
float
*
inp
=
new
float
[
n
];
...
...
mace/ops/activation_benchmark.cc
浏览文件 @
1888d2a9
...
@@ -51,21 +51,22 @@ static void ReluBenchmark(
...
@@ -51,21 +51,22 @@ static void ReluBenchmark(
#define BM_RELU_MACRO(N, C, H, W, TYPE, DEVICE) \
#define BM_RELU_MACRO(N, C, H, W, TYPE, DEVICE) \
static void BM_RELU_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE(int iters) { \
static void BM_RELU_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
ItemsProcessed(tot);
\
mace::testing::
MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
ReluBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
ReluBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
} \
} \
BENCHMARK(BM_RELU_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
BENCHMARK(BM_RELU_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#define BM_RELU(N, C, H, W, TYPE) \
#define BM_RELU(N, C, H, W) \
BM_RELU_MACRO(N, C, H, W, TYPE, CPU); \
BM_RELU_MACRO(N, C, H, W, float, CPU); \
BM_RELU_MACRO(N, C, H, W, TYPE, OPENCL);
BM_RELU_MACRO(N, C, H, W, float, OPENCL); \
BM_RELU_MACRO(N, C, H, W, half, OPENCL);
BM_RELU
(
1
,
1
,
512
,
512
,
float
);
BM_RELU
(
1
,
1
,
512
,
512
);
BM_RELU
(
1
,
3
,
128
,
128
,
float
);
BM_RELU
(
1
,
3
,
128
,
128
);
BM_RELU
(
1
,
3
,
512
,
512
,
float
);
BM_RELU
(
1
,
3
,
512
,
512
);
BM_RELU
(
1
,
32
,
112
,
112
,
float
);
BM_RELU
(
1
,
32
,
112
,
112
);
BM_RELU
(
1
,
64
,
256
,
256
,
float
);
BM_RELU
(
1
,
64
,
256
,
256
);
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
static
void
ReluxBenchmark
(
static
void
ReluxBenchmark
(
...
@@ -112,21 +113,22 @@ static void ReluxBenchmark(
...
@@ -112,21 +113,22 @@ static void ReluxBenchmark(
#define BM_RELUX_MACRO(N, C, H, W, TYPE, DEVICE) \
#define BM_RELUX_MACRO(N, C, H, W, TYPE, DEVICE) \
static void BM_RELUX_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE(int iters) { \
static void BM_RELUX_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
ItemsProcessed(tot);
\
mace::testing::
MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
ReluxBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
ReluxBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
} \
} \
BENCHMARK(BM_RELUX_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
BENCHMARK(BM_RELUX_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#define BM_RELUX(N, C, H, W, TYPE) \
#define BM_RELUX(N, C, H, W) \
BM_RELUX_MACRO(N, C, H, W, TYPE, CPU); \
BM_RELUX_MACRO(N, C, H, W, float, CPU); \
BM_RELUX_MACRO(N, C, H, W, TYPE, OPENCL);
BM_RELUX_MACRO(N, C, H, W, float, OPENCL); \
BM_RELUX_MACRO(N, C, H, W, half, OPENCL);
BM_RELUX
(
1
,
1
,
512
,
512
,
float
);
BM_RELUX
(
1
,
1
,
512
,
512
);
BM_RELUX
(
1
,
3
,
128
,
128
,
float
);
BM_RELUX
(
1
,
3
,
128
,
128
);
BM_RELUX
(
1
,
3
,
512
,
512
,
float
);
BM_RELUX
(
1
,
3
,
512
,
512
);
BM_RELUX
(
1
,
32
,
112
,
112
,
float
);
BM_RELUX
(
1
,
32
,
112
,
112
);
BM_RELUX
(
1
,
64
,
256
,
256
,
float
);
BM_RELUX
(
1
,
64
,
256
,
256
);
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
static
void
PreluBenchmark
(
static
void
PreluBenchmark
(
...
@@ -173,21 +175,22 @@ static void PreluBenchmark(
...
@@ -173,21 +175,22 @@ static void PreluBenchmark(
#define BM_PRELU_MACRO(N, C, H, W, TYPE, DEVICE) \
#define BM_PRELU_MACRO(N, C, H, W, TYPE, DEVICE) \
static void BM_PRELU_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE(int iters) { \
static void BM_PRELU_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
ItemsProcessed(tot);
\
mace::testing::
MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
PreluBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
PreluBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
} \
} \
BENCHMARK(BM_PRELU_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
BENCHMARK(BM_PRELU_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#define BM_PRELU(N, C, H, W, TYPE) \
#define BM_PRELU(N, C, H, W) \
BM_PRELU_MACRO(N, C, H, W, TYPE, CPU); \
BM_PRELU_MACRO(N, C, H, W, float, CPU); \
BM_PRELU_MACRO(N, C, H, W, TYPE, OPENCL);
BM_PRELU_MACRO(N, C, H, W, float, OPENCL); \
BM_PRELU_MACRO(N, C, H, W, half, OPENCL);
BM_PRELU
(
1
,
1
,
512
,
512
,
float
);
BM_PRELU
(
1
,
1
,
512
,
512
);
BM_PRELU
(
1
,
3
,
128
,
128
,
float
);
BM_PRELU
(
1
,
3
,
128
,
128
);
BM_PRELU
(
1
,
3
,
512
,
512
,
float
);
BM_PRELU
(
1
,
3
,
512
,
512
);
BM_PRELU
(
1
,
32
,
112
,
112
,
float
);
BM_PRELU
(
1
,
32
,
112
,
112
);
BM_PRELU
(
1
,
64
,
256
,
256
,
float
);
BM_PRELU
(
1
,
64
,
256
,
256
);
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
static
void
TanhBenchmark
(
static
void
TanhBenchmark
(
...
@@ -232,21 +235,22 @@ static void TanhBenchmark(
...
@@ -232,21 +235,22 @@ static void TanhBenchmark(
#define BM_TANH_MACRO(N, C, H, W, TYPE, DEVICE) \
#define BM_TANH_MACRO(N, C, H, W, TYPE, DEVICE) \
static void BM_TANH_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE(int iters) { \
static void BM_TANH_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
ItemsProcessed(tot);
\
mace::testing::
MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
TanhBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
TanhBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
} \
} \
BENCHMARK(BM_TANH_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
BENCHMARK(BM_TANH_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#define BM_TANH(N, C, H, W, TYPE) \
#define BM_TANH(N, C, H, W) \
BM_TANH_MACRO(N, C, H, W, TYPE, CPU); \
BM_TANH_MACRO(N, C, H, W, float, CPU); \
BM_TANH_MACRO(N, C, H, W, TYPE, OPENCL);
BM_TANH_MACRO(N, C, H, W, float, OPENCL); \
BM_TANH_MACRO(N, C, H, W, half, OPENCL);
BM_TANH
(
1
,
1
,
512
,
512
,
float
);
BM_TANH
(
1
,
1
,
512
,
512
);
BM_TANH
(
1
,
3
,
128
,
128
,
float
);
BM_TANH
(
1
,
3
,
128
,
128
);
BM_TANH
(
1
,
3
,
512
,
512
,
float
);
BM_TANH
(
1
,
3
,
512
,
512
);
BM_TANH
(
1
,
32
,
112
,
112
,
float
);
BM_TANH
(
1
,
32
,
112
,
112
);
BM_TANH
(
1
,
64
,
256
,
256
,
float
);
BM_TANH
(
1
,
64
,
256
,
256
);
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
static
void
SigmoidBenchmark
(
static
void
SigmoidBenchmark
(
...
@@ -292,7 +296,7 @@ static void SigmoidBenchmark(
...
@@ -292,7 +296,7 @@ static void SigmoidBenchmark(
static void BM_SIGMOID_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \
static void BM_SIGMOID_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \
int iters) { \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
ItemsProcessed(tot);
\
mace::testing::
MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
SigmoidBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
SigmoidBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
} \
} \
...
...
mace/ops/addn_benchmark.cc
浏览文件 @
1888d2a9
...
@@ -55,18 +55,18 @@ static void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) {
...
@@ -55,18 +55,18 @@ static void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) {
}
}
}
}
#define BM_ADDN_MACRO(INPUTS, N, H, W, C, TYPE, DEVICE) \
#define BM_ADDN_MACRO(INPUTS, N, H, W, C, TYPE, DEVICE)
\
static void BM_ADDN_##INPUTS##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \
static void BM_ADDN_##INPUTS##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE(
\
int iters) { \
int iters) {
\
const int64_t tot = static_cast<int64_t>(iters) *
N * H * W * C;
\
const int64_t tot = static_cast<int64_t>(iters) *
INPUTS * N * H * W * C;
\
mace::testing::
ItemsProcessed(tot);
\
mace::testing::
MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE)));
\
AddNBenchmark<DEVICE, TYPE>(iters, INPUTS, N, H, W, C); \
AddNBenchmark<DEVICE, TYPE>(iters, INPUTS, N, H, W, C);
\
} \
}
\
BENCHMARK(BM_ADDN_##INPUTS##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE)
BENCHMARK(BM_ADDN_##INPUTS##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE)
#define BM_ADDN(INPUTS, N, H, W, C) \
#define BM_ADDN(INPUTS, N, H, W, C)
\
BM_ADDN_MACRO(INPUTS, N, H, W, C, float, CPU); \
BM_ADDN_MACRO(INPUTS, N, H, W, C, float, CPU);
\
BM_ADDN_MACRO(INPUTS, N, H, W, C, float, OPENCL); \
BM_ADDN_MACRO(INPUTS, N, H, W, C, float, OPENCL); \
BM_ADDN_MACRO(INPUTS, N, H, W, C, half, OPENCL);
BM_ADDN_MACRO(INPUTS, N, H, W, C, half, OPENCL);
...
...
mace/ops/batch_norm_benchmark.cc
浏览文件 @
1888d2a9
...
@@ -76,7 +76,7 @@ static void BatchNorm(
...
@@ -76,7 +76,7 @@ static void BatchNorm(
static void BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \
static void BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \
int iters) { \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BatchNorm<DEVICE, TYPE>(iters, N, C, H, W); \
BatchNorm<DEVICE, TYPE>(iters, N, C, H, W); \
} \
} \
...
@@ -84,7 +84,6 @@ static void BatchNorm(
...
@@ -84,7 +84,6 @@ static void BatchNorm(
#define BM_BATCH_NORM(N, C, H, W) \
#define BM_BATCH_NORM(N, C, H, W) \
BM_BATCH_NORM_MACRO(N, C, H, W, float, CPU); \
BM_BATCH_NORM_MACRO(N, C, H, W, float, CPU); \
BM_BATCH_NORM_MACRO(N, C, H, W, float, NEON); \
BM_BATCH_NORM_MACRO(N, C, H, W, float, OPENCL); \
BM_BATCH_NORM_MACRO(N, C, H, W, float, OPENCL); \
BM_BATCH_NORM_MACRO(N, C, H, W, half, OPENCL);
BM_BATCH_NORM_MACRO(N, C, H, W, half, OPENCL);
...
...
mace/ops/batch_to_space_benchmark.cc
浏览文件 @
1888d2a9
...
@@ -41,7 +41,7 @@ static void BMBatchToSpace(
...
@@ -41,7 +41,7 @@ static void BMBatchToSpace(
BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE( \
BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE( \
int iters) { \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BMBatchToSpace<DEVICE, TYPE>(iters, N, C, H, W, ARG); \
BMBatchToSpace<DEVICE, TYPE>(iters, N, C, H, W, ARG); \
} \
} \
...
...
mace/ops/bias_add_benchmark.cc
浏览文件 @
1888d2a9
...
@@ -53,7 +53,7 @@ static void BiasAdd(int iters, int batch, int channels, int height, int width) {
...
@@ -53,7 +53,7 @@ static void BiasAdd(int iters, int batch, int channels, int height, int width) {
static void BM_BIAS_ADD_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \
static void BM_BIAS_ADD_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \
int iters) { \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BiasAdd<DEVICE, TYPE>(iters, N, C, H, W); \
BiasAdd<DEVICE, TYPE>(iters, N, C, H, W); \
} \
} \
...
...
mace/ops/channel_shuffle_benchmark.cc
浏览文件 @
1888d2a9
...
@@ -41,7 +41,7 @@ static void ChannelShuffle(
...
@@ -41,7 +41,7 @@ static void ChannelShuffle(
static void BM_CHANNEL_SHUFFLE_##N##_##C##_##H##_##W##_##G##_##DEVICE( \
static void BM_CHANNEL_SHUFFLE_##N##_##C##_##H##_##W##_##G##_##DEVICE( \
int iters) { \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(float))); \
mace::testing::BytesProcessed(tot *(sizeof(float))); \
ChannelShuffle<DEVICE>(iters, N, C, H, W, G); \
ChannelShuffle<DEVICE>(iters, N, C, H, W, G); \
} \
} \
...
...
mace/ops/concat_benchmark.cc
浏览文件 @
1888d2a9
...
@@ -29,7 +29,7 @@ static void ConcatHelper(int iters, int concat_dim, int dim1) {
...
@@ -29,7 +29,7 @@ static void ConcatHelper(int iters, int concat_dim, int dim1) {
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
}
}
const
int64_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
kDim0
*
dim1
*
2
;
const
int64_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
kDim0
*
dim1
*
2
;
mace
::
testing
::
Items
Processed
(
tot
);
mace
::
testing
::
Macc
Processed
(
tot
);
testing
::
BytesProcessed
(
tot
*
sizeof
(
T
));
testing
::
BytesProcessed
(
tot
*
sizeof
(
T
));
mace
::
testing
::
StartTiming
();
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
while
(
iters
--
)
{
...
@@ -80,7 +80,7 @@ static void OpenclConcatHelper(int iters,
...
@@ -80,7 +80,7 @@ static void OpenclConcatHelper(int iters,
const
int64_t
tot
=
const
int64_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
static_cast
<
int64_t
>
(
iters
)
*
(
net
.
GetTensor
(
"Input0"
)
->
size
()
+
net
.
GetTensor
(
"Input1"
)
->
size
());
(
net
.
GetTensor
(
"Input0"
)
->
size
()
+
net
.
GetTensor
(
"Input1"
)
->
size
());
mace
::
testing
::
Items
Processed
(
tot
);
mace
::
testing
::
Macc
Processed
(
tot
);
testing
::
BytesProcessed
(
tot
*
sizeof
(
T
));
testing
::
BytesProcessed
(
tot
*
sizeof
(
T
));
mace
::
testing
::
StartTiming
();
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
while
(
iters
--
)
{
...
...
mace/ops/conv_2d_benchmark.cc
浏览文件 @
1888d2a9
...
@@ -83,8 +83,20 @@ static void Conv2d(int iters,
...
@@ -83,8 +83,20 @@ static void Conv2d(int iters,
static void \
static void \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE( \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE( \
int iters) { \
int iters) { \
const int64_t dilation = 1; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
int64_t pad_h = 0, pad_w = 0; \
if (P == SAME) { \
pad_h = KH / 2; \
pad_w = KW / 2; \
} \
int64_t oh = \
(H + 2 * pad_h - KH - (KH - 1) * (dilation - 1)) / STRIDE + 1; \
int64_t ow = \
(W + 2 * pad_w - KW - (KW - 1) * (dilation - 1)) / STRIDE + 1; \
const int64_t macc = \
static_cast<int64_t>(iters) * N * OC * oh * ow * (KH * KW * C + 1); \
mace::testing::MaccProcessed(macc); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
Conv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, mace::Padding::P, \
Conv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, mace::Padding::P, \
OC); \
OC); \
...
...
mace/ops/depthwise_conv2d_benchmark.cc
浏览文件 @
1888d2a9
...
@@ -75,24 +75,36 @@ static void DepthwiseConv2d(int iters,
...
@@ -75,24 +75,36 @@ static void DepthwiseConv2d(int iters,
}
}
}
}
#define BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, \
#define BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, M, TYPE, \
DEVICE) \
DEVICE) \
static void \
static void \
BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE( \
BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##M##_##TYPE##_##DEVICE( \
int iters) { \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
const int64_t dilation = 1; \
mace::testing::ItemsProcessed(tot); \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
int64_t pad_h = 0, pad_w = 0; \
DepthwiseConv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, \
if (P == SAME) { \
mace::Padding::P, OC); \
pad_h = KH / 2; \
} \
pad_w = KW / 2; \
BENCHMARK( \
} \
BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE)
int64_t oh = \
(H + 2 * pad_h - KH - (KH - 1) * (dilation - 1)) / STRIDE + 1; \
int64_t ow = \
(W + 2 * pad_w - KW - (KW - 1) * (dilation - 1)) / STRIDE + 1; \
const int64_t macc = \
static_cast<int64_t>(iters) * N * C * M * oh * ow * (KH * KW + 1); \
mace::testing::MaccProcessed(macc); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
DepthwiseConv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, \
mace::Padding::P, M); \
} \
BENCHMARK( \
BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##M##_##TYPE##_##DEVICE)
#define BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P,
OC
) \
#define BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P,
M
) \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P,
OC
, float, CPU); \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P,
M
, float, CPU); \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P,
OC
, float, OPENCL); \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P,
M
, float, OPENCL); \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P,
OC
, half, OPENCL);
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P,
M
, half, OPENCL);
BM_DEPTHWISE_CONV_2D
(
1
,
32
,
112
,
112
,
3
,
3
,
1
,
SAME
,
1
);
BM_DEPTHWISE_CONV_2D
(
1
,
32
,
112
,
112
,
3
,
3
,
1
,
SAME
,
1
);
BM_DEPTHWISE_CONV_2D
(
1
,
32
,
112
,
112
,
3
,
3
,
2
,
SAME
,
1
);
BM_DEPTHWISE_CONV_2D
(
1
,
32
,
112
,
112
,
3
,
3
,
2
,
SAME
,
1
);
...
...
mace/ops/eltwise_benchmark.cc
浏览文件 @
1888d2a9
...
@@ -61,7 +61,7 @@ static void EltwiseBenchmark(
...
@@ -61,7 +61,7 @@ static void EltwiseBenchmark(
BM_ELTWISE_##ELT_TYPE##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \
BM_ELTWISE_##ELT_TYPE##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \
int iters) { \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * H * W * C; \
const int64_t tot = static_cast<int64_t>(iters) * N * H * W * C; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
EltwiseBenchmark<DEVICE, TYPE>( \
EltwiseBenchmark<DEVICE, TYPE>( \
iters, static_cast<kernels::EltwiseType>(ELT_TYPE), N, H, W, C); \
iters, static_cast<kernels::EltwiseType>(ELT_TYPE), N, H, W, C); \
...
...
mace/ops/global_avg_pooling_benchmark.cc
浏览文件 @
1888d2a9
...
@@ -40,7 +40,7 @@ static void GlobalAvgPooling(
...
@@ -40,7 +40,7 @@ static void GlobalAvgPooling(
static void BM_GLOBAL_AVG_POOLING_##N##_##C##_##H##_##W##_##DEVICE( \
static void BM_GLOBAL_AVG_POOLING_##N##_##C##_##H##_##W##_##DEVICE( \
int iters) { \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(float))); \
mace::testing::BytesProcessed(tot *(sizeof(float))); \
GlobalAvgPooling<DEVICE>(iters, N, C, H, W); \
GlobalAvgPooling<DEVICE>(iters, N, C, H, W); \
} \
} \
...
...
mace/ops/matmul_benchmark.cc
浏览文件 @
1888d2a9
...
@@ -20,10 +20,8 @@ static void MatMulBenchmark(
...
@@ -20,10 +20,8 @@ static void MatMulBenchmark(
net
.
AddRandomInput
<
D
,
float
>
(
"B"
,
{
batch
,
channels
,
out_width
,
1
});
net
.
AddRandomInput
<
D
,
float
>
(
"B"
,
{
batch
,
channels
,
out_width
,
1
});
if
(
D
==
DeviceType
::
OPENCL
)
{
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"A"
,
"AImage"
,
BufferToImage
<
D
,
T
>
(
net
,
"A"
,
"AImage"
,
kernels
::
BufferType
::
IN_OUT_WIDTH
);
kernels
::
BufferType
::
IN_OUT_WIDTH
);
BufferToImage
<
D
,
T
>
(
net
,
"B"
,
"BImage"
,
kernels
::
BufferType
::
IN_OUT_HEIGHT
);
BufferToImage
<
D
,
T
>
(
net
,
"B"
,
"BImage"
,
kernels
::
BufferType
::
IN_OUT_HEIGHT
);
OpDefBuilder
(
"MatMul"
,
"MatMulBM"
)
OpDefBuilder
(
"MatMul"
,
"MatMulBM"
)
.
Input
(
"AImage"
)
.
Input
(
"AImage"
)
...
@@ -52,16 +50,19 @@ static void MatMulBenchmark(
...
@@ -52,16 +50,19 @@ static void MatMulBenchmark(
net
.
Sync
();
net
.
Sync
();
}
}
#define BM_MATMUL_MACRO(N, H, C, W, TYPE, DEVICE) \
#define BM_MATMUL_MACRO(N, H, C, W, TYPE, DEVICE) \
static void BM_MATMUL_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE(int iters) { \
static void BM_MATMUL_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
const int64_t macc = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
const int64_t tot = static_cast<int64_t>(iters) * N * (C * H + H * W); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
mace::testing::MaccProcessed(macc); \
MatMulBenchmark<DEVICE, TYPE>(iters, N, H, C, W); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
} \
MatMulBenchmark<DEVICE, TYPE>(iters, N, H, C, W); \
} \
BENCHMARK(BM_MATMUL_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE)
BENCHMARK(BM_MATMUL_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE)
#define BM_MATMUL(N, H, C, W) \
#define BM_MATMUL(N, H, C, W) \
BM_MATMUL_MACRO(N, H, C, W, float, CPU); \
BM_MATMUL_MACRO(N, H, C, W, float, OPENCL); \
BM_MATMUL_MACRO(N, H, C, W, half, OPENCL);
BM_MATMUL_MACRO(N, H, C, W, half, OPENCL);
BM_MATMUL
(
16
,
32
,
128
,
49
);
BM_MATMUL
(
16
,
32
,
128
,
49
);
...
...
mace/ops/pooling_benchmark.cc
浏览文件 @
1888d2a9
...
@@ -54,7 +54,7 @@ static void Pooling(int iters,
...
@@ -54,7 +54,7 @@ static void Pooling(int iters,
BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE( \
BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE( \
int iters) { \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(float))); \
mace::testing::BytesProcessed(tot *(sizeof(float))); \
Pooling<DEVICE>(iters, N, C, H, W, KE, STRIDE, Padding::PA, \
Pooling<DEVICE>(iters, N, C, H, W, KE, STRIDE, Padding::PA, \
PoolingType::PO); \
PoolingType::PO); \
...
...
mace/ops/resize_bilinear_benchmark.cc
浏览文件 @
1888d2a9
...
@@ -61,8 +61,9 @@ static void ResizeBilinearBenchmark(int iters,
...
@@ -61,8 +61,9 @@ static void ResizeBilinearBenchmark(int iters,
static void \
static void \
BM_RESIZE_BILINEAR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_##DEVICE( \
BM_RESIZE_BILINEAR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_##DEVICE( \
int iters) { \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H1 * W1; \
const int64_t macc = static_cast<int64_t>(iters) * N * C * H1 * W1 * 3; \
mace::testing::ItemsProcessed(tot); \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H0 * W0; \
mace::testing::MaccProcessed(macc); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
ResizeBilinearBenchmark<DEVICE, TYPE>(iters, N, C, H0, W0, H1, W1); \
ResizeBilinearBenchmark<DEVICE, TYPE>(iters, N, C, H0, W0, H1, W1); \
} \
} \
...
...
mace/ops/softmax_benchmark.cc
浏览文件 @
1888d2a9
...
@@ -49,7 +49,7 @@ static void SoftmaxBenchmark(
...
@@ -49,7 +49,7 @@ static void SoftmaxBenchmark(
#define BM_SOFTMAX_MACRO(N, C, H, W, TYPE, DEVICE) \
#define BM_SOFTMAX_MACRO(N, C, H, W, TYPE, DEVICE) \
static void BM_SOFTMAX_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE(int iters) { \
static void BM_SOFTMAX_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
SoftmaxBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
SoftmaxBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
} \
} \
...
...
mace/ops/space_to_batch_benchmark.cc
浏览文件 @
1888d2a9
...
@@ -42,7 +42,7 @@ static void BMSpaceToBatch(
...
@@ -42,7 +42,7 @@ static void BMSpaceToBatch(
BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE( \
BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE( \
int iters) { \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BMSpaceToBatch<DEVICE, TYPE>(iters, N, H, W, C, SHAPE); \
BMSpaceToBatch<DEVICE, TYPE>(iters, N, H, W, C, SHAPE); \
} \
} \
...
...
mace/ops/winograd_transform_benchmark.cc
浏览文件 @
1888d2a9
...
@@ -41,7 +41,7 @@ static void BMWinogradTransform(
...
@@ -41,7 +41,7 @@ static void BMWinogradTransform(
BM_WINOGRAD_TRANSFORM_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \
BM_WINOGRAD_TRANSFORM_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \
int iters) { \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BMWinogradTransform<DEVICE, TYPE>(iters, N, H, W, C); \
BMWinogradTransform<DEVICE, TYPE>(iters, N, H, W, C); \
} \
} \
...
@@ -93,7 +93,7 @@ static void BMWinogradInverseTransform(
...
@@ -93,7 +93,7 @@ static void BMWinogradInverseTransform(
BM_WINOGRAD_INVERSE_TRANSFORM_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \
BM_WINOGRAD_INVERSE_TRANSFORM_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \
int iters) { \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::
Items
Processed(tot); \
mace::testing::
Macc
Processed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BMWinogradInverseTransform<DEVICE, TYPE>(iters, N, H, W, C); \
BMWinogradInverseTransform<DEVICE, TYPE>(iters, N, H, W, C); \
} \
} \
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录