Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
野马c
Mace
提交
9d40ffd2
Mace
项目概览
野马c
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
9d40ffd2
编写于
10月 17, 2017
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add op run metadata statistics and benchmark framework.
上级
c0b8d045
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
945 addition
and
4 deletion
+945
-4
mace/core/BUILD
mace/core/BUILD
+2
-0
mace/core/net.cc
mace/core/net.cc
+14
-1
mace/core/net.h
mace/core/net.h
+3
-2
mace/examples/BUILD
mace/examples/BUILD
+0
-1
mace/examples/mace_run.cc
mace/examples/mace_run.cc
+5
-0
mace/proto/BUILD
mace/proto/BUILD
+10
-0
mace/proto/stats.proto
mace/proto/stats.proto
+16
-0
mace/tools/benchmark/BUILD
mace/tools/benchmark/BUILD
+36
-0
mace/tools/benchmark/benchmark_model.cc
mace/tools/benchmark/benchmark_model.cc
+333
-0
mace/tools/benchmark/stat_summarizer.cc
mace/tools/benchmark/stat_summarizer.cc
+318
-0
mace/tools/benchmark/stat_summarizer.h
mace/tools/benchmark/stat_summarizer.h
+198
-0
mace/utils/utils.h
mace/utils/utils.h
+10
-0
未找到文件。
mace/core/BUILD
浏览文件 @
9d40ffd2
...
@@ -46,6 +46,8 @@ cc_library(
...
@@ -46,6 +46,8 @@ cc_library(
copts
=
[
"-std=c++11"
],
copts
=
[
"-std=c++11"
],
deps
=
[
deps
=
[
"//mace/proto:cc_proto"
,
"//mace/proto:cc_proto"
,
"//mace/proto:stats_proto"
,
"//mace/utils:utils"
,
],
],
linkopts
=
if_android
([
linkopts
=
if_android
([
"-llog"
,
"-llog"
,
...
...
mace/core/net.cc
浏览文件 @
9d40ffd2
...
@@ -3,6 +3,7 @@
...
@@ -3,6 +3,7 @@
//
//
#include "mace/core/net.h"
#include "mace/core/net.h"
#include "mace/utils/utils.h"
namespace
mace
{
namespace
mace
{
...
@@ -28,15 +29,27 @@ SimpleNet::SimpleNet(const std::shared_ptr<const NetDef>& net_def,
...
@@ -28,15 +29,27 @@ SimpleNet::SimpleNet(const std::shared_ptr<const NetDef>& net_def,
}
}
}
}
}
}
bool
SimpleNet
::
Run
()
{
bool
SimpleNet
::
Run
(
RunMetadata
*
run_metadata
)
{
VLOG
(
1
)
<<
"Running net "
<<
name_
;
VLOG
(
1
)
<<
"Running net "
<<
name_
;
for
(
auto
&
op
:
operators_
)
{
for
(
auto
&
op
:
operators_
)
{
VLOG
(
1
)
<<
"Running operator "
<<
op
->
debug_def
().
name
()
<<
"("
VLOG
(
1
)
<<
"Running operator "
<<
op
->
debug_def
().
name
()
<<
"("
<<
op
->
debug_def
().
type
()
<<
")."
;
<<
op
->
debug_def
().
type
()
<<
")."
;
OperatorStats
*
op_stats
=
nullptr
;
if
(
run_metadata
)
{
op_stats
=
run_metadata
->
add_op_stats
();
op_stats
->
set_operator_name
(
op
->
debug_def
().
name
());
op_stats
->
set_type
(
op
->
debug_def
().
type
());
op_stats
->
set_all_start_micros
(
NowInMicroSec
());
op_stats
->
set_op_start_rel_micros
(
NowInMicroSec
()
-
op_stats
->
all_start_micros
());
}
if
(
!
op
->
Run
())
{
if
(
!
op
->
Run
())
{
LOG
(
ERROR
)
<<
"Operator failed: "
<<
ProtoDebugString
(
op
->
debug_def
());
LOG
(
ERROR
)
<<
"Operator failed: "
<<
ProtoDebugString
(
op
->
debug_def
());
return
false
;
return
false
;
}
}
if
(
op_stats
)
{
op_stats
->
set_op_end_rel_micros
(
NowInMicroSec
()
-
op_stats
->
all_start_micros
());
op_stats
->
set_all_end_rel_micros
(
NowInMicroSec
()
-
op_stats
->
all_start_micros
());
}
VLOG
(
1
)
<<
"Op "
<<
op
->
debug_def
().
name
()
VLOG
(
1
)
<<
"Op "
<<
op
->
debug_def
().
name
()
<<
" has shape: "
<<
internal
::
MakeString
(
op
->
Output
(
0
)
->
shape
());
<<
" has shape: "
<<
internal
::
MakeString
(
op
->
Output
(
0
)
->
shape
());
}
}
...
...
mace/core/net.h
浏览文件 @
9d40ffd2
...
@@ -9,6 +9,7 @@
...
@@ -9,6 +9,7 @@
#include "mace/core/operator.h"
#include "mace/core/operator.h"
#include "mace/core/workspace.h"
#include "mace/core/workspace.h"
#include "mace/proto/mace.pb.h"
#include "mace/proto/mace.pb.h"
#include "mace/proto/stats.pb.h"
namespace
mace
{
namespace
mace
{
...
@@ -19,7 +20,7 @@ class NetBase {
...
@@ -19,7 +20,7 @@ class NetBase {
DeviceType
type
);
DeviceType
type
);
virtual
~
NetBase
()
noexcept
{}
virtual
~
NetBase
()
noexcept
{}
virtual
bool
Run
()
=
0
;
virtual
bool
Run
(
RunMetadata
*
run_metadata
=
nullptr
)
=
0
;
const
string
&
Name
()
const
{
return
name_
;
}
const
string
&
Name
()
const
{
return
name_
;
}
...
@@ -35,7 +36,7 @@ class SimpleNet : public NetBase {
...
@@ -35,7 +36,7 @@ class SimpleNet : public NetBase {
Workspace
*
ws
,
Workspace
*
ws
,
DeviceType
type
);
DeviceType
type
);
bool
Run
()
override
;
bool
Run
(
RunMetadata
*
run_metadata
=
nullptr
)
override
;
protected:
protected:
vector
<
unique_ptr
<
OperatorBase
>
>
operators_
;
vector
<
unique_ptr
<
OperatorBase
>
>
operators_
;
...
...
mace/examples/BUILD
浏览文件 @
9d40ffd2
...
@@ -11,7 +11,6 @@ cc_binary(
...
@@ -11,7 +11,6 @@ cc_binary(
deps
=
[
deps
=
[
"//mace/core"
,
"//mace/core"
,
"//mace/ops"
,
"//mace/ops"
,
"@org_tensorflow//tensorflow/core:android_tensorflow_lib"
,
],
],
)
)
...
...
mace/examples/mace_run.cc
浏览文件 @
9d40ffd2
...
@@ -101,6 +101,11 @@ int main(int argc, char **argv) {
...
@@ -101,6 +101,11 @@ int main(int argc, char **argv) {
VLOG
(
0
)
<<
device_type
;
VLOG
(
0
)
<<
device_type
;
auto
net
=
CreateNet
(
net_def
,
&
ws
,
device_type
);
auto
net
=
CreateNet
(
net_def
,
&
ws
,
device_type
);
// warm up
for
(
int
i
=
0
;
i
<
2
;
++
i
)
{
net
->
Run
();
}
timeval
tv1
,
tv2
;
timeval
tv1
,
tv2
;
gettimeofday
(
&
tv1
,
NULL
);
gettimeofday
(
&
tv1
,
NULL
);
for
(
int
i
=
0
;
i
<
round
;
++
i
)
{
for
(
int
i
=
0
;
i
<
round
;
++
i
)
{
...
...
mace/proto/BUILD
浏览文件 @
9d40ffd2
...
@@ -20,6 +20,16 @@ cc_proto_library(
...
@@ -20,6 +20,16 @@ cc_proto_library(
deps
=
[
":proto"
],
deps
=
[
":proto"
],
)
)
proto_library
(
name
=
"stats"
,
srcs
=
[
"stats.proto"
],
)
cc_proto_library
(
name
=
"stats_proto"
,
deps
=
[
":stats"
],
)
py_proto_library
(
py_proto_library
(
name
=
"mace_py"
,
name
=
"mace_py"
,
srcs
=
[
"mace.proto"
],
srcs
=
[
"mace.proto"
],
...
...
mace/proto/stats.proto
0 → 100644
浏览文件 @
9d40ffd2
syntax
=
"proto2"
;
package
mace
;
message
OperatorStats
{
optional
string
operator_name
=
1
;
optional
string
type
=
2
;
optional
int64
all_start_micros
=
3
;
optional
int64
op_start_rel_micros
=
4
;
optional
int64
op_end_rel_micros
=
5
;
optional
int64
all_end_rel_micros
=
6
;
};
message
RunMetadata
{
repeated
OperatorStats
op_stats
=
1
;
}
mace/tools/benchmark/BUILD
0 → 100644
浏览文件 @
9d40ffd2
# Benchmark
load
(
"//mace:mace.bzl"
,
"if_android"
)
licenses
([
"notice"
])
# Apache 2.0
cc_library
(
name
=
"stat_summarizer"
,
srcs
=
[
"stat_summarizer.cc"
],
hdrs
=
[
"stat_summarizer.h"
],
copts
=
[
"-std=c++11"
],
linkopts
=
[
"-fopenmp"
]
+
if_android
([
"-ldl"
,
"-lm"
,
]),
linkstatic
=
1
,
deps
=
[
"//mace/core"
,
"//mace/proto:stats_proto"
,
],
)
cc_binary
(
name
=
"benchmark_model"
,
srcs
=
[
"benchmark_model.cc"
,
],
copts
=
[
"-std=c++11"
],
linkopts
=
[
"-fopenmp"
]
+
if_android
([
"-ldl"
]),
linkstatic
=
1
,
deps
=
[
":stat_summarizer"
,
"//mace/core"
,
"//mace/ops"
,
"//mace/utils:command_line_flags"
,
],
)
mace/tools/benchmark/benchmark_model.cc
0 → 100644
浏览文件 @
9d40ffd2
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/net.h"
#include "mace/utils/command_line_flags.h"
#include "mace/tools/benchmark/stat_summarizer.h"
#include "mace/utils/utils.h"
#include <fstream>
#include <thread>
namespace
mace
{
namespace
str_util
{
std
::
vector
<
std
::
string
>
Split
(
const
string
&
str
,
char
delims
)
{
std
::
vector
<
std
::
string
>
result
;
string
tmp
=
str
;
while
(
!
tmp
.
empty
())
{
result
.
push_back
(
tmp
.
data
());
size_t
next_offset
=
tmp
.
find
(
delims
);
if
(
next_offset
==
string
::
npos
)
{
break
;
}
else
{
tmp
=
tmp
.
substr
(
next_offset
+
1
);
}
}
return
result
;
}
bool
SplitAndParseToInts
(
const
string
&
str
,
char
delims
,
std
::
vector
<
index_t
>*
result
)
{
string
tmp
=
str
;
while
(
!
tmp
.
empty
())
{
index_t
dim
=
atoi
(
tmp
.
data
());
result
->
push_back
(
dim
);
size_t
next_offset
=
tmp
.
find
(
delims
);
if
(
next_offset
==
string
::
npos
)
{
break
;
}
else
{
tmp
=
tmp
.
substr
(
next_offset
+
1
);
}
}
}
}
// namespace str_util
namespace
benchmark
{
bool
RunInference
(
NetBase
*
net
,
StatSummarizer
*
summarizer
,
int64_t
*
inference_time_us
)
{
RunMetadata
run_metadata
;
RunMetadata
*
run_metadata_ptr
=
nullptr
;
if
(
summarizer
)
{
run_metadata_ptr
=
&
run_metadata
;
}
const
int64_t
start_time
=
NowInMicroSec
();
bool
s
=
net
->
Run
(
run_metadata_ptr
);
const
int64_t
end_time
=
NowInMicroSec
();
if
(
!
s
)
{
LOG
(
ERROR
)
<<
"Error during inference."
;
return
s
;
}
*
inference_time_us
=
end_time
-
start_time
;
if
(
summarizer
!=
nullptr
)
{
summarizer
->
ProcessMetadata
(
run_metadata
);
}
return
true
;
}
bool
Run
(
NetBase
*
net
,
StatSummarizer
*
summarizer
,
int
num_runs
,
double
max_time_sec
,
int64_t
sleep_sec
,
int64_t
*
total_time_us
,
int64_t
*
actual_num_runs
)
{
*
total_time_us
=
0
;
LOG
(
INFO
)
<<
"Running benchmark for max "
<<
num_runs
<<
" iterators, max "
<<
max_time_sec
<<
" seconds "
<<
(
summarizer
!=
nullptr
?
"with "
:
"without "
)
<<
"detailed stat logging, with "
<<
sleep_sec
<<
"s sleep between inferences"
;
Stat
<
int64_t
>
stat
;
bool
util_max_time
=
(
num_runs
<=
0
);
for
(
int
i
=
0
;
util_max_time
||
i
<
num_runs
;
++
i
)
{
int64_t
inference_time_us
=
0
;
bool
s
=
RunInference
(
net
,
summarizer
,
&
inference_time_us
);
stat
.
UpdateStat
(
inference_time_us
);
(
*
total_time_us
)
+=
inference_time_us
;
++
(
*
actual_num_runs
);
if
(
max_time_sec
>
0
&&
(
*
total_time_us
/
1000000.0
)
>
max_time_sec
)
{
break
;
}
if
(
!
s
)
{
LOG
(
INFO
)
<<
"Failed on run "
<<
i
;
return
s
;
}
if
(
sleep_sec
>
0
)
{
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
seconds
(
sleep_sec
));
}
}
std
::
stringstream
stream
;
stat
.
OutputToStream
(
&
stream
);
LOG
(
INFO
)
<<
stream
.
str
();
return
true
;
}
int
Main
(
int
argc
,
char
**
argv
)
{
std
::
string
model_file
=
"/data/local/tmp/mobi_mace.pb"
;
std
::
string
device
=
"CPU"
;
std
::
string
input_layer_string
=
"input:0"
;
std
::
string
input_layer_shape_string
=
"1,224,224,3"
;
std
::
string
input_layer_type_string
=
"float"
;
std
::
string
input_layer_files_string
=
""
;
std
::
string
output_layer_string
=
"output:0"
;
int
max_num_runs
=
10
;
std
::
string
max_time
=
"10.0"
;
std
::
string
inference_delay
=
"-1"
;
std
::
string
inter_benchmark_delay
=
"-1"
;
int
num_threads
=
-
1
;
std
::
string
benchmark_name
=
""
;
std
::
string
output_prefix
=
""
;
bool
show_sizes
=
false
;
bool
show_run_order
=
true
;
int
run_order_limit
=
0
;
bool
show_time
=
true
;
int
time_limit
=
10
;
bool
show_memory
=
true
;
int
memory_limit
=
10
;
bool
show_type
=
true
;
bool
show_summary
=
true
;
bool
show_flops
=
false
;
int
warmup_runs
=
2
;
std
::
vector
<
Flag
>
flag_list
=
{
Flag
(
"model_file"
,
&
model_file
,
"graph file name"
),
Flag
(
"device"
,
&
device
,
"CPU/NEON"
),
Flag
(
"input_layer"
,
&
input_layer_string
,
"input layer names"
),
Flag
(
"input_layer_shape"
,
&
input_layer_shape_string
,
"input layer shape"
),
Flag
(
"input_layer_type"
,
&
input_layer_type_string
,
"input layer type"
),
Flag
(
"input_layer_files"
,
&
input_layer_files_string
,
"files to initialize the inputs with"
),
Flag
(
"output_layer"
,
&
output_layer_string
,
"output layer name"
),
Flag
(
"max_num_runs"
,
&
max_num_runs
,
"number of runs max"
),
Flag
(
"max_time"
,
&
max_time
,
"length to run max"
),
Flag
(
"inference_delay"
,
&
inference_delay
,
"delay between runs in seconds"
),
Flag
(
"inter_benchmark_delay"
,
&
inter_benchmark_delay
,
"delay between benchmarks in seconds"
),
Flag
(
"num_threads"
,
&
num_threads
,
"number of threads"
),
Flag
(
"benchmark_name"
,
&
benchmark_name
,
"benchmark name"
),
Flag
(
"output_prefix"
,
&
output_prefix
,
"benchmark output prefix"
),
Flag
(
"show_sizes"
,
&
show_sizes
,
"whether to show sizes"
),
Flag
(
"show_run_order"
,
&
show_run_order
,
"whether to list stats by run order"
),
Flag
(
"run_order_limit"
,
&
run_order_limit
,
"how many items to show by run order"
),
Flag
(
"show_time"
,
&
show_time
,
"whether to list stats by time taken"
),
Flag
(
"time_limit"
,
&
time_limit
,
"how many items to show by time taken"
),
Flag
(
"show_memory"
,
&
show_memory
,
"whether to list stats by memory used"
),
Flag
(
"memory_limit"
,
&
memory_limit
,
"how many items to show by memory used"
),
Flag
(
"show_type"
,
&
show_type
,
"whether to list stats by op type"
),
Flag
(
"show_summary"
,
&
show_summary
,
"whether to show a summary of the stats"
),
Flag
(
"show_flops"
,
&
show_flops
,
"whether to estimate the model's FLOPs"
),
Flag
(
"warmup_runs"
,
&
warmup_runs
,
"how many runs to initialize model"
),
};
string
usage
=
Flags
::
Usage
(
argv
[
0
],
flag_list
);
const
bool
parse_result
=
Flags
::
Parse
(
&
argc
,
argv
,
flag_list
);
if
(
!
parse_result
)
{
LOG
(
ERROR
)
<<
usage
;
return
-
1
;
}
std
::
vector
<
std
::
string
>
input_layers
=
str_util
::
Split
(
input_layer_string
,
','
);
std
::
vector
<
std
::
string
>
input_layer_shapes
=
str_util
::
Split
(
input_layer_shape_string
,
':'
);
std
::
vector
<
string
>
input_layer_types
=
str_util
::
Split
(
input_layer_type_string
,
','
);
std
::
vector
<
string
>
input_layer_files
=
str_util
::
Split
(
input_layer_files_string
,
':'
);
std
::
vector
<
string
>
output_layers
=
str_util
::
Split
(
output_layer_string
,
','
);
if
((
input_layers
.
size
()
!=
input_layer_shapes
.
size
())
||
(
input_layers
.
size
()
!=
input_layer_types
.
size
()))
{
LOG
(
ERROR
)
<<
"There must be the same number of items in --input_layer,"
<<
" --input_layer_shape, and --input_layer_type, for example"
<<
" --input_layer=input1,input2 --input_layer_type=float,float "
<<
" --input_layer_shape=1,224,224,4:1,20"
;
LOG
(
ERROR
)
<<
"--input_layer="
<<
input_layer_string
<<
" ("
<<
input_layers
.
size
()
<<
" items)"
;
LOG
(
ERROR
)
<<
"--input_layer_type="
<<
input_layer_type_string
<<
" ("
<<
input_layer_types
.
size
()
<<
" items)"
;
LOG
(
ERROR
)
<<
"--input_layer_shape="
<<
input_layer_shape_string
<<
" ("
<<
input_layer_shapes
.
size
()
<<
" items)"
;
return
-
1
;
}
const
size_t
inputs_count
=
input_layers
.
size
();
if
(
argc
>
1
)
{
LOG
(
ERROR
)
<<
"Unknown argument "
<<
argv
[
1
]
<<
"
\n
"
<<
usage
;
return
-
1
;
}
LOG
(
INFO
)
<<
"Model file: ["
<<
model_file
<<
"]"
;
LOG
(
INFO
)
<<
"Device: ["
<<
device
<<
"]"
;
LOG
(
INFO
)
<<
"Input layers: ["
<<
input_layer_string
<<
"]"
;
LOG
(
INFO
)
<<
"Input shapes: ["
<<
input_layer_shape_string
<<
"]"
;
LOG
(
INFO
)
<<
"Input types: ["
<<
input_layer_type_string
<<
"]"
;
LOG
(
INFO
)
<<
"Output layers: ["
<<
output_layer_string
<<
"]"
;
LOG
(
INFO
)
<<
"Num runs: ["
<<
max_num_runs
<<
"]"
;
LOG
(
INFO
)
<<
"Inter-inference delay (seconds): ["
<<
inference_delay
<<
"]"
;
LOG
(
INFO
)
<<
"Inter-benchmark delay (seconds): ["
<<
inter_benchmark_delay
<<
"]"
;
LOG
(
INFO
)
<<
"Num threads: ["
<<
num_threads
<<
"]"
;
LOG
(
INFO
)
<<
"Benchmark name: ["
<<
benchmark_name
<<
"]"
;
LOG
(
INFO
)
<<
"Output prefix: ["
<<
output_prefix
<<
"]"
;
LOG
(
INFO
)
<<
"Show sizes: ["
<<
show_sizes
<<
"]"
;
LOG
(
INFO
)
<<
"Warmup runs: ["
<<
warmup_runs
<<
"]"
;
const
long
int
inter_inference_sleep_seconds
=
std
::
strtol
(
inference_delay
.
c_str
(),
nullptr
,
10
);
const
long
int
inter_benchmark_sleep_seconds
=
std
::
strtol
(
inter_benchmark_delay
.
c_str
(),
nullptr
,
10
);
const
double
max_benchmark_time_seconds
=
std
::
strtod
(
max_time
.
c_str
(),
nullptr
);
std
::
unique_ptr
<
StatSummarizer
>
stats
;
StatSummarizerOptions
stats_options
;
stats_options
.
show_run_order
=
show_run_order
;
stats_options
.
run_order_limit
=
run_order_limit
;
stats_options
.
show_time
=
show_time
;
stats_options
.
time_limit
=
time_limit
;
stats_options
.
show_memory
=
show_memory
;
stats_options
.
memory_limit
=
memory_limit
;
stats_options
.
show_type
=
show_type
;
stats_options
.
show_summary
=
show_summary
;
stats
.
reset
(
new
StatSummarizer
(
stats_options
));
// load model
std
::
ifstream
model_file_stream
(
model_file
,
std
::
ios
::
in
|
std
::
ios
::
binary
);
if
(
!
model_file_stream
.
is_open
())
{
LOG
(
ERROR
)
<<
"model file open failed"
;
return
-
1
;
}
NetDef
net_def
;
net_def
.
ParseFromIstream
(
&
model_file_stream
);
model_file_stream
.
close
();
Workspace
ws
;
ws
.
LoadModelTensor
(
net_def
,
DeviceType
::
CPU
);
// Load inputs
for
(
size_t
i
=
0
;
i
<
inputs_count
;
++
i
)
{
Tensor
*
input_tensor
=
ws
.
CreateTensor
(
input_layers
[
i
],
cpu_allocator
(),
DT_FLOAT
);
vector
<
index_t
>
shapes
;
str_util
::
SplitAndParseToInts
(
input_layer_shapes
[
i
],
','
,
&
shapes
);
input_tensor
->
Resize
(
shapes
);
float
*
input_data
=
input_tensor
->
mutable_data
<
float
>
();
// load input
if
(
i
<
input_layer_files
.
size
())
{
std
::
ifstream
in_file
(
input_layer_files
[
i
],
std
::
ios
::
in
|
std
::
ios
::
binary
);
in_file
.
read
(
reinterpret_cast
<
char
*>
(
input_data
),
input_tensor
->
size
()
*
sizeof
(
float
));
in_file
.
close
();
}
}
// create net
DeviceType
device_type
;
DeviceType_Parse
(
device
,
&
device_type
);
auto
net
=
CreateNet
(
net_def
,
&
ws
,
device_type
);
int64_t
warmup_time_us
=
0
;
int64_t
num_warmup_runs
=
0
;
if
(
warmup_runs
>
0
)
{
bool
status
=
Run
(
net
.
get
(),
nullptr
,
warmup_runs
,
-
1.0
,
inter_inference_sleep_seconds
,
&
warmup_time_us
,
&
num_warmup_runs
);
if
(
!
status
)
{
LOG
(
ERROR
)
<<
"Failed at warm up run"
;
}
}
if
(
inter_benchmark_sleep_seconds
>
0
)
{
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
seconds
(
inter_benchmark_sleep_seconds
));
}
int64_t
no_stat_time_us
=
0
;
int64_t
no_stat_runs
=
0
;
bool
status
=
Run
(
net
.
get
(),
nullptr
,
max_num_runs
,
max_benchmark_time_seconds
,
inter_inference_sleep_seconds
,
&
no_stat_time_us
,
&
no_stat_runs
);
if
(
!
status
)
{
LOG
(
ERROR
)
<<
"Failed at normal no-stat run"
;
}
int64_t
stat_time_us
=
0
;
int64_t
stat_runs
=
0
;
status
=
Run
(
net
.
get
(),
stats
.
get
(),
max_num_runs
,
max_benchmark_time_seconds
,
inter_inference_sleep_seconds
,
&
stat_time_us
,
&
stat_runs
);
if
(
!
status
)
{
LOG
(
ERROR
)
<<
"Failed at normal stat run"
;
}
LOG
(
INFO
)
<<
"Average inference timings in us: "
<<
"Warmup: "
<<
(
warmup_runs
>
0
?
warmup_time_us
/
warmup_runs
:
0
)
<<
", "
<<
"no stats: "
<<
no_stat_time_us
/
no_stat_runs
<<
", "
<<
"with stats: "
<<
stat_time_us
/
stat_runs
;
stats
->
PrintOperatorStats
();
return
0
;
}
}
// namespace benchmark
}
// namespace mace
int
main
(
int
argc
,
char
**
argv
)
{
mace
::
benchmark
::
Main
(
argc
,
argv
);
}
mace/tools/benchmark/stat_summarizer.cc
0 → 100644
浏览文件 @
9d40ffd2
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/common.h"
#include "mace/tools/benchmark/stat_summarizer.h"
#include "mace/proto/stats.pb.h"
#include <iomanip>
#include <queue>
namespace
mace
{
StatSummarizer
::
StatSummarizer
(
const
StatSummarizerOptions
&
options
)
:
options_
(
options
)
{}
StatSummarizer
::~
StatSummarizer
()
{}
void
StatSummarizer
::
Reset
()
{
run_total_us_
.
Reset
();
memory_
.
Reset
();
details_
.
clear
();
}
void
StatSummarizer
::
ProcessMetadata
(
const
RunMetadata
&
run_metadata
)
{
int64_t
curr_total_us
=
0
;
int64_t
mem_total
=
0
;
int64_t
first_node_start_us
=
run_metadata
.
op_stats
(
0
).
all_start_micros
();
int
node_num
=
0
;
for
(
const
auto
&
ops
:
run_metadata
.
op_stats
())
{
std
::
string
name
=
ops
.
operator_name
();
std
::
string
op_type
=
ops
.
type
();
++
node_num
;
const
int64_t
curr_time
=
ops
.
all_end_rel_micros
();
curr_total_us
+=
curr_time
;
auto
result
=
details_
.
emplace
(
name
,
Detail
());
Detail
*
detail
=
&
(
result
.
first
->
second
);
detail
->
start_us
.
UpdateStat
(
ops
.
all_start_micros
()
-
first_node_start_us
);
detail
->
rel_end_us
.
UpdateStat
(
curr_time
);
// If this is the first pass, initialize some values.
if
(
result
.
second
)
{
detail
->
name
=
name
;
detail
->
type
=
op_type
;
detail
->
run_order
=
node_num
;
detail
->
times_called
=
0
;
}
++
detail
->
times_called
;
}
run_total_us_
.
UpdateStat
(
curr_total_us
);
memory_
.
UpdateStat
(
mem_total
);
}
std
::
string
StatSummarizer
::
ShortSummary
()
const
{
std
::
stringstream
stream
;
stream
<<
"Timings (microseconds): "
;
run_total_us_
.
OutputToStream
(
&
stream
);
stream
<<
std
::
endl
;
stream
<<
"Memory (bytes): "
;
memory_
.
OutputToStream
(
&
stream
);
stream
<<
std
::
endl
;
stream
<<
details_
.
size
()
<<
" nodes observed"
<<
std
::
endl
;
return
stream
.
str
();
}
std
::
ostream
&
InitField
(
std
::
ostream
&
stream
,
int
width
)
{
stream
<<
"
\t
"
<<
std
::
right
<<
std
::
setw
(
width
)
<<
std
::
fixed
<<
std
::
setprecision
(
3
);
return
stream
;
}
std
::
string
StatSummarizer
::
HeaderString
(
const
std
::
string
&
title
)
const
{
std
::
stringstream
stream
;
stream
<<
"============================== "
<<
title
<<
" =============================="
<<
std
::
endl
;
InitField
(
stream
,
24
)
<<
"[node type]"
;
InitField
(
stream
,
9
)
<<
"[start]"
;
InitField
(
stream
,
9
)
<<
"[first]"
;
InitField
(
stream
,
9
)
<<
"[avg ms]"
;
InitField
(
stream
,
8
)
<<
"[%]"
;
InitField
(
stream
,
8
)
<<
"[cdf%]"
;
InitField
(
stream
,
10
)
<<
"[mem KB]"
;
InitField
(
stream
,
9
)
<<
"[times called]"
;
stream
<<
"
\t
"
<<
"[Name]"
;
return
stream
.
str
();
}
std
::
string
StatSummarizer
::
ColumnString
(
const
StatSummarizer
::
Detail
&
detail
,
const
int64_t
cumulative_stat_on_node
,
const
Stat
<
int64_t
>&
stat
)
const
{
const
double
start_ms
=
detail
.
start_us
.
avg
()
/
1000.0
;
const
double
first_time_ms
=
detail
.
rel_end_us
.
first
()
/
1000.0
;
const
double
avg_time_ms
=
detail
.
rel_end_us
.
avg
()
/
1000.0
;
const
double
percentage
=
detail
.
rel_end_us
.
sum
()
*
100.0
/
stat
.
sum
();
const
double
cdf_percentage
=
(
cumulative_stat_on_node
*
100.0
f
)
/
stat
.
sum
();
const
int64_t
times_called
=
detail
.
times_called
/
num_runs
();
std
::
stringstream
stream
;
InitField
(
stream
,
24
)
<<
detail
.
type
;
InitField
(
stream
,
9
)
<<
start_ms
;
InitField
(
stream
,
9
)
<<
first_time_ms
;
InitField
(
stream
,
9
)
<<
avg_time_ms
;
InitField
(
stream
,
7
)
<<
percentage
<<
"%"
;
InitField
(
stream
,
7
)
<<
cdf_percentage
<<
"%"
;
InitField
(
stream
,
10
)
<<
detail
.
mem_used
.
newest
()
/
1000.0
;
InitField
(
stream
,
9
)
<<
times_called
;
stream
<<
"
\t
"
<<
detail
.
name
;
return
stream
.
str
();
}
void
StatSummarizer
::
OrderNodesByMetric
(
SortingMetric
metric
,
std
::
vector
<
const
Detail
*>*
details
)
const
{
std
::
priority_queue
<
std
::
pair
<
std
::
string
,
const
Detail
*>>
sorted_list
;
const
int
num_nodes
=
details_
.
size
();
for
(
const
auto
&
det
:
details_
)
{
const
Detail
*
detail
=
&
(
det
.
second
);
std
::
stringstream
stream
;
stream
<<
std
::
setw
(
20
)
<<
std
::
right
<<
std
::
setprecision
(
10
)
<<
std
::
fixed
;
switch
(
metric
)
{
case
BY_NAME
:
stream
<<
detail
->
name
;
break
;
case
BY_RUN_ORDER
:
stream
<<
num_nodes
-
detail
->
run_order
;
break
;
case
BY_TIME
:
stream
<<
detail
->
rel_end_us
.
avg
();
break
;
case
BY_MEMORY
:
stream
<<
detail
->
mem_used
.
avg
();
break
;
case
BY_TYPE
:
stream
<<
detail
->
type
;
break
;
default:
stream
<<
""
;
break
;
}
sorted_list
.
emplace
(
stream
.
str
(),
detail
);
}
while
(
!
sorted_list
.
empty
())
{
auto
entry
=
sorted_list
.
top
();
sorted_list
.
pop
();
details
->
push_back
(
entry
.
second
);
}
}
void
StatSummarizer
::
ComputeStatsByType
(
std
::
map
<
std
::
string
,
int64_t
>*
node_type_map_count
,
std
::
map
<
std
::
string
,
int64_t
>*
node_type_map_time
,
std
::
map
<
std
::
string
,
int64_t
>*
node_type_map_memory
,
std
::
map
<
std
::
string
,
int64_t
>*
node_type_map_times_called
,
int64_t
*
accumulated_us
)
const
{
int64_t
run_count
=
run_total_us_
.
count
();
for
(
const
auto
&
det
:
details_
)
{
const
std
::
string
node_name
=
det
.
first
;
const
Detail
&
detail
=
det
.
second
;
int64_t
curr_time_val
=
static_cast
<
int64_t
>
(
detail
.
rel_end_us
.
sum
()
/
run_count
);
*
accumulated_us
+=
curr_time_val
;
int64_t
curr_memory_val
=
detail
.
mem_used
.
newest
();
const
std
::
string
&
node_type
=
detail
.
type
;
(
*
node_type_map_count
)[
node_type
]
+=
1
;
(
*
node_type_map_time
)[
node_type
]
+=
curr_time_val
;
(
*
node_type_map_memory
)[
node_type
]
+=
curr_memory_val
;
(
*
node_type_map_times_called
)[
node_type
]
+=
detail
.
times_called
/
run_count
;
}
}
std
::
string
StatSummarizer
::
GetStatsByNodeType
()
const
{
std
::
stringstream
stream
;
stream
<<
"============================== Summary by node type "
"=============================="
<<
std
::
endl
;
LOG
(
INFO
)
<<
"Number of nodes executed: "
<<
details_
.
size
();
std
::
map
<
std
::
string
,
int64_t
>
node_type_map_count
;
std
::
map
<
std
::
string
,
int64_t
>
node_type_map_time
;
std
::
map
<
std
::
string
,
int64_t
>
node_type_map_memory
;
std
::
map
<
std
::
string
,
int64_t
>
node_type_map_times_called
;
int64_t
accumulated_us
=
0
;
ComputeStatsByType
(
&
node_type_map_count
,
&
node_type_map_time
,
&
node_type_map_memory
,
&
node_type_map_times_called
,
&
accumulated_us
);
// Sort them.
std
::
priority_queue
<
std
::
pair
<
int64_t
,
std
::
pair
<
std
::
string
,
int64_t
>>>
timings
;
for
(
const
auto
&
node_type
:
node_type_map_time
)
{
const
int64_t
mem_used
=
node_type_map_memory
[
node_type
.
first
];
timings
.
emplace
(
node_type
.
second
,
std
::
pair
<
std
::
string
,
int64_t
>
(
node_type
.
first
,
mem_used
));
}
InitField
(
stream
,
24
)
<<
"[Node type]"
;
InitField
(
stream
,
9
)
<<
"[count]"
;
InitField
(
stream
,
10
)
<<
"[avg ms]"
;
InitField
(
stream
,
11
)
<<
"[avg %]"
;
InitField
(
stream
,
11
)
<<
"[cdf %]"
;
InitField
(
stream
,
10
)
<<
"[mem KB]"
;
InitField
(
stream
,
10
)
<<
"[times called]"
;
stream
<<
std
::
endl
;
float
cdf
=
0.0
f
;
while
(
!
timings
.
empty
())
{
auto
entry
=
timings
.
top
();
timings
.
pop
();
const
std
::
string
node_type
=
entry
.
second
.
first
;
const
float
memory
=
entry
.
second
.
second
/
1000.0
f
;
const
int64_t
node_type_total_us
=
entry
.
first
;
const
float
time_per_run_ms
=
node_type_total_us
/
1000.0
f
;
const
float
percentage
=
((
entry
.
first
/
static_cast
<
float
>
(
accumulated_us
))
*
100.0
f
);
cdf
+=
percentage
;
InitField
(
stream
,
24
)
<<
node_type
;
InitField
(
stream
,
9
)
<<
node_type_map_count
[
node_type
];
InitField
(
stream
,
10
)
<<
time_per_run_ms
;
InitField
(
stream
,
10
)
<<
percentage
<<
"%"
;
InitField
(
stream
,
10
)
<<
cdf
<<
"%"
;
InitField
(
stream
,
10
)
<<
memory
;
InitField
(
stream
,
9
)
<<
node_type_map_times_called
[
node_type
];
stream
<<
std
::
endl
;
}
stream
<<
std
::
endl
;
return
stream
.
str
();
}
std
::
string
StatSummarizer
::
GetStatsByMetric
(
const
std
::
string
&
title
,
SortingMetric
sorting_metric
,
int
num_stats
)
const
{
std
::
vector
<
const
Detail
*>
details
;
OrderNodesByMetric
(
sorting_metric
,
&
details
);
double
cumulative_stat_on_node
=
0
;
std
::
stringstream
stream
;
stream
<<
HeaderString
(
title
)
<<
std
::
endl
;
int
stat_num
=
0
;
for
(
auto
detail
:
details
)
{
++
stat_num
;
if
(
num_stats
>
0
&&
stat_num
>
num_stats
)
{
break
;
}
cumulative_stat_on_node
+=
detail
->
rel_end_us
.
sum
();
stream
<<
ColumnString
(
*
detail
,
cumulative_stat_on_node
,
run_total_us_
)
<<
std
::
endl
;
}
stream
<<
std
::
endl
;
return
stream
.
str
();
}
std
::
string
StatSummarizer
::
GetOutputString
()
const
{
std
::
stringstream
stream
;
if
(
options_
.
show_run_order
)
{
stream
<<
GetStatsByMetric
(
"Run Order"
,
BY_RUN_ORDER
,
options_
.
run_order_limit
);
}
if
(
options_
.
show_time
)
{
stream
<<
GetStatsByMetric
(
"Top by Computation Time"
,
BY_TIME
,
options_
.
time_limit
);
}
if
(
options_
.
show_memory
)
{
stream
<<
GetStatsByMetric
(
"Top by Memory Use"
,
BY_MEMORY
,
options_
.
memory_limit
);
}
if
(
options_
.
show_type
)
{
stream
<<
GetStatsByNodeType
();
}
if
(
options_
.
show_summary
)
{
stream
<<
ShortSummary
()
<<
std
::
endl
;
}
return
stream
.
str
();
}
void
StatSummarizer
::
PrintOperatorStats
()
const
{
std
::
string
output
=
GetOutputString
();
std
::
istringstream
iss
(
output
);
for
(
std
::
string
line
;
std
::
getline
(
iss
,
line
);)
{
LOG
(
INFO
)
<<
line
;
}
}
}
// namespace mace
mace/tools/benchmark/stat_summarizer.h
0 → 100644
浏览文件 @
9d40ffd2
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_TOOLS_BENCHMARK_STAT_SUMMARIZER_H_
#define MACE_TOOLS_BENCHMARK_STAT_SUMMARIZER_H_
#include <stdlib.h>
#include <cmath>
#include <limits>
#include <map>
#include <sstream>
#include <string>
namespace
mace
{
class
RunMetadata
;
template
<
typename
ValueType
,
typename
HighPrecisionValueType
=
double
>
class
Stat
{
public:
void
UpdateStat
(
ValueType
v
)
{
if
(
count_
==
0
)
{
first_
=
v
;
}
newest_
=
v
;
max_
=
std
::
max
(
v
,
max_
);
min_
=
std
::
min
(
v
,
min_
);
++
count_
;
sum_
+=
v
;
squared_sum_
+=
static_cast
<
HighPrecisionValueType
>
(
v
)
*
v
;
}
void
Reset
()
{
new
(
this
)
Stat
<
ValueType
,
HighPrecisionValueType
>
();
}
bool
empty
()
const
{
return
count_
==
0
;
}
ValueType
first
()
const
{
return
first_
;
}
ValueType
newest
()
const
{
return
newest_
;
}
ValueType
max
()
const
{
return
max_
;
}
ValueType
min
()
const
{
return
min_
;
}
int64_t
count
()
const
{
return
count_
;
}
ValueType
sum
()
const
{
return
sum_
;
}
HighPrecisionValueType
squared_sum
()
const
{
return
squared_sum_
;
}
bool
all_same
()
const
{
return
(
count_
==
0
||
min_
==
max_
);
}
HighPrecisionValueType
avg
()
const
{
return
empty
()
?
std
::
numeric_limits
<
ValueType
>::
quiet_NaN
()
:
static_cast
<
HighPrecisionValueType
>
(
sum_
)
/
count_
;
}
ValueType
std_deviation
()
const
{
return
all_same
()
?
0
:
std
::
sqrt
(
squared_sum_
/
count_
-
avg
()
*
avg
());
}
void
OutputToStream
(
std
::
ostream
*
stream
)
const
{
if
(
empty
())
{
*
stream
<<
"count=0"
;
}
else
if
(
all_same
())
{
*
stream
<<
"count="
<<
count_
<<
" curr="
<<
newest_
;
if
(
count_
>
1
)
*
stream
<<
"(all same)"
;
}
else
{
*
stream
<<
"count="
<<
count_
<<
" first="
<<
first_
<<
" curr="
<<
newest_
<<
" min="
<<
min_
<<
" max="
<<
max_
<<
" avg="
<<
avg
()
<<
" std="
<<
std_deviation
();
}
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
stream
,
const
Stat
<
ValueType
>&
stat
)
{
stat
.
OutputToStream
(
&
stream
);
return
stream
;
}
private:
ValueType
first_
=
0
;
ValueType
newest_
=
0
;
ValueType
max_
=
std
::
numeric_limits
<
ValueType
>::
min
();
ValueType
min_
=
std
::
numeric_limits
<
ValueType
>::
max
();
int64_t
count_
=
0
;
ValueType
sum_
=
0
;
HighPrecisionValueType
squared_sum_
=
0
;
};
// Used to control the output of the statistics summarizer;
class
StatSummarizerOptions
{
public:
StatSummarizerOptions
()
:
show_run_order
(
true
),
run_order_limit
(
0
),
show_time
(
true
),
time_limit
(
10
),
show_memory
(
true
),
memory_limit
(
10
),
show_type
(
true
),
show_summary
(
true
)
{}
bool
show_run_order
;
int
run_order_limit
;
bool
show_time
;
int
time_limit
;
bool
show_memory
;
int
memory_limit
;
bool
show_type
;
bool
show_summary
;
};
// A StatSummarizer assists in performance analysis of Graph executions.
//
// It summarizes time spent executing (on GPU/CPU), memory used etc. across
// multiple executions of a single Graph from the StepStats collected during
// graph execution.
//
// See tensorflow/tools/benchmark/benchmark_model.cc for an example usage.
class
StatSummarizer
{
public:
enum
SortingMetric
{
BY_NAME
,
BY_RUN_ORDER
,
BY_TIME
,
BY_MEMORY
,
BY_TYPE
,
};
explicit
StatSummarizer
(
const
StatSummarizerOptions
&
options
);
~
StatSummarizer
();
// Adds another run's StepStats output to the aggregate counts.
void
ProcessMetadata
(
const
RunMetadata
&
run_metadata
);
// Returns a string detailing the accumulated runtime stats in a tab-separated
// format which can be pasted into a spreadsheet for further analysis.
std
::
string
GetOutputString
()
const
;
std
::
string
ShortSummary
()
const
;
// Prints the string returned by GetOutputString().
void
PrintOperatorStats
()
const
;
void
ComputeStatsByType
(
std
::
map
<
std
::
string
,
int64_t
>*
node_type_map_count
,
std
::
map
<
std
::
string
,
int64_t
>*
node_type_map_time
,
std
::
map
<
std
::
string
,
int64_t
>*
node_type_map_memory
,
std
::
map
<
std
::
string
,
int64_t
>*
node_type_map_times_called
,
int64_t
*
accumulated_us
)
const
;
std
::
string
GetStatsByNodeType
()
const
;
std
::
string
GetStatsByMetric
(
const
std
::
string
&
title
,
SortingMetric
sorting_metric
,
int
num_stats
)
const
;
void
Reset
();
// Returns number of runs.
int
num_runs
()
const
{
return
run_total_us_
.
count
();
}
// Returns stats of total microseconds spent by all nodes in each run.
const
Stat
<
int64_t
>&
run_total_us
()
const
{
return
run_total_us_
;
}
private:
struct
Detail
{
std
::
string
name
;
std
::
string
type
;
int64_t
run_order
;
Stat
<
int64_t
>
start_us
;
Stat
<
int64_t
>
rel_end_us
;
Stat
<
int64_t
>
mem_used
;
int64_t
times_called
;
};
void
OrderNodesByMetric
(
SortingMetric
sorting_metric
,
std
::
vector
<
const
Detail
*>*
details
)
const
;
std
::
string
HeaderString
(
const
std
::
string
&
title
)
const
;
std
::
string
ColumnString
(
const
Detail
&
detail
,
const
int64_t
cumulative_stat_on_node
,
const
Stat
<
int64_t
>&
stat
)
const
;
Stat
<
int64_t
>
run_total_us_
;
Stat
<
int64_t
>
memory_
;
std
::
map
<
std
::
string
,
Detail
>
details_
;
StatSummarizerOptions
options_
;
};
}
// namespace mace
#endif // MACE_TOOLS_BENCHMARK_STAT_SUMMARIZER_H_
mace/utils/utils.h
浏览文件 @
9d40ffd2
...
@@ -4,6 +4,9 @@
...
@@ -4,6 +4,9 @@
#ifndef MACE_UTILS_UTILS_H_
#ifndef MACE_UTILS_UTILS_H_
#define MACE_UTILS_UTILS_H_
#define MACE_UTILS_UTILS_H_
#include <sys/time.h>
namespace
mace
{
namespace
mace
{
template
<
typename
Integer
>
template
<
typename
Integer
>
Integer
RoundUp
(
Integer
i
,
Integer
factor
)
{
Integer
RoundUp
(
Integer
i
,
Integer
factor
)
{
...
@@ -14,5 +17,12 @@ template <typename Integer>
...
@@ -14,5 +17,12 @@ template <typename Integer>
Integer
CeilQuotient
(
Integer
a
,
Integer
b
)
{
Integer
CeilQuotient
(
Integer
a
,
Integer
b
)
{
return
(
a
+
b
-
1
)
/
b
;
return
(
a
+
b
-
1
)
/
b
;
}
}
inline
int64_t
NowInMicroSec
()
{
struct
timeval
tv
;
gettimeofday
(
&
tv
,
nullptr
);
return
static_cast
<
int64_t
>
(
tv
.
tv_sec
*
1000000
+
tv
.
tv_usec
);
}
}
// namespace mace
}
// namespace mace
#endif // MACE_UTILS_UTILS_H_
#endif // MACE_UTILS_UTILS_H_
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录