Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
37fe2203
Mace
项目概览
Xiaomi
/
Mace
通知
107
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
37fe2203
编写于
3月 08, 2018
作者:
W
wuchenghui
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
CPU+GPU+DSP full throughput benchmark
上级
371a3b1d
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
306 addition
and
2 deletion
+306
-2
BUILD
BUILD
+25
-2
model_throughput_test.cc
model_throughput_test.cc
+281
-0
未找到文件。
BUILD
浏览文件 @
37fe2203
...
...
@@ -17,7 +17,7 @@ cc_library(
linkstatic
=
1
,
deps
=
[
"@mace//:mace_headers"
,
]
]
,
)
cc_binary
(
...
...
@@ -33,7 +33,7 @@ cc_binary(
"//external:gflags_nothreads"
,
]
+
if_hexagon_enabled
([
"//lib/hexagon:hexagon"
,
])
+
if_production_mode
([
])
+
if_production_mode
([
"@mace//:mace_prod"
,
"//codegen:generated_opencl_prod"
,
"//codegen:generated_tuning_params"
,
...
...
@@ -41,3 +41,26 @@ cc_binary(
"@mace//:mace_dev"
,
]),
)
cc_library
(
name
=
"libmace_merged"
,
srcs
=
[
"libmace_merged.a"
,
],
visibility
=
[
"//visibility:private"
],
)
cc_binary
(
name
=
"model_throughput_test"
,
srcs
=
[
"model_throughput_test.cc"
],
linkopts
=
if_openmp_enabled
([
"-fopenmp"
]),
linkstatic
=
1
,
deps
=
[
":libmace_merged"
,
"//external:gflags_nothreads"
,
"//lib/hexagon"
,
"@mace//:mace"
,
"@mace//:mace_headers"
,
"@mace//:mace_prod"
,
],
)
model_throughput_test.cc
0 → 100644
浏览文件 @
37fe2203
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
/**
* Usage:
* throughput_test \
* --input_shape=1,224,224,3 \
* --output_shape=1,224,224,2 \
* --input_file=input_data \
* --cpu_model_data_file=cpu_model_data.data \
* --gpu_model_data_file=gpu_model_data.data \
* --dsp_model_data_file=dsp_model_data.data \
* --run_seconds=10
*/
#include <malloc.h>
#include <stdint.h>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <numeric>
#include <thread>
#include "gflags/gflags.h"
#include "mace/public/mace.h"
#include "mace/utils/env_time.h"
#include "mace/utils/logging.h"
using
namespace
std
;
using
namespace
mace
;
namespace
mace
{
#ifdef MACE_CPU_MODEL_TAG
namespace
MACE_CPU_MODEL_TAG
{
extern
const
unsigned
char
*
LoadModelData
(
const
char
*
model_data_file
);
extern
void
UnloadModelData
(
const
unsigned
char
*
model_data
);
extern
NetDef
CreateNet
(
const
unsigned
char
*
model_data
);
extern
const
std
::
string
ModelChecksum
();
}
// namespace MACE_CPU_MODEL_TAG
#endif
#ifdef MACE_GPU_MODEL_TAG
namespace
MACE_GPU_MODEL_TAG
{
extern
const
unsigned
char
*
LoadModelData
(
const
char
*
model_data_file
);
extern
void
UnloadModelData
(
const
unsigned
char
*
model_data
);
extern
NetDef
CreateNet
(
const
unsigned
char
*
model_data
);
extern
const
std
::
string
ModelChecksum
();
}
// namespace MACE_GPU_MODEL_TAG
#endif
#ifdef MACE_DSP_MODEL_TAG
namespace
MACE_DSP_MODEL_TAG
{
extern
const
unsigned
char
*
LoadModelData
(
const
char
*
model_data_file
);
extern
void
UnloadModelData
(
const
unsigned
char
*
model_data
);
extern
NetDef
CreateNet
(
const
unsigned
char
*
model_data
);
extern
const
std
::
string
ModelChecksum
();
}
// namespace MACE_DSP_MODEL_TAG
#endif
}
// namespace mace
void
ParseShape
(
const
string
&
str
,
vector
<
int64_t
>
*
shape
)
{
string
tmp
=
str
;
while
(
!
tmp
.
empty
())
{
int
dim
=
atoi
(
tmp
.
data
());
shape
->
push_back
(
dim
);
size_t
next_offset
=
tmp
.
find
(
","
);
if
(
next_offset
==
string
::
npos
)
{
break
;
}
else
{
tmp
=
tmp
.
substr
(
next_offset
+
1
);
}
}
}
DeviceType
ParseDeviceType
(
const
string
&
device_str
)
{
if
(
device_str
.
compare
(
"CPU"
)
==
0
)
{
return
DeviceType
::
CPU
;
}
else
if
(
device_str
.
compare
(
"NEON"
)
==
0
)
{
return
DeviceType
::
NEON
;
}
else
if
(
device_str
.
compare
(
"OPENCL"
)
==
0
)
{
return
DeviceType
::
OPENCL
;
}
else
if
(
device_str
.
compare
(
"HEXAGON"
)
==
0
)
{
return
DeviceType
::
HEXAGON
;
}
else
{
return
DeviceType
::
CPU
;
}
}
DEFINE_string
(
input_shape
,
"1,224,224,3"
,
"input shape, separated by comma"
);
DEFINE_string
(
output_shape
,
"1,224,224,2"
,
"output shape, separated by comma"
);
DEFINE_string
(
input_file
,
""
,
"input file name"
);
DEFINE_string
(
cpu_model_data_file
,
""
,
"cpu model data file name"
);
DEFINE_string
(
gpu_model_data_file
,
""
,
"gpu model data file name"
);
DEFINE_string
(
dsp_model_data_file
,
""
,
"dsp model data file name"
);
DEFINE_int32
(
run_seconds
,
10
,
"run seconds"
);
int
main
(
int
argc
,
char
**
argv
)
{
gflags
::
SetUsageMessage
(
"some usage message"
);
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
true
);
LOG
(
INFO
)
<<
"mace version: "
<<
MaceVersion
();
LOG
(
INFO
)
<<
"mace git version: "
<<
MaceGitVersion
();
#ifdef MACE_CPU_MODEL_TAG
LOG
(
INFO
)
<<
"cpu model checksum: "
<<
mace
::
MACE_CPU_MODEL_TAG
::
ModelChecksum
();
#endif
#ifdef MACE_GPU_MODEL_TAG
LOG
(
INFO
)
<<
"gpu model checksum: "
<<
mace
::
MACE_GPU_MODEL_TAG
::
ModelChecksum
();
#endif
#ifdef MACE_DSP_MODEL_TAG
LOG
(
INFO
)
<<
"dsp model checksum: "
<<
mace
::
MACE_DSP_MODEL_TAG
::
ModelChecksum
();
#endif
LOG
(
INFO
)
<<
"input_shape: "
<<
FLAGS_input_shape
;
LOG
(
INFO
)
<<
"output_shape: "
<<
FLAGS_output_shape
;
LOG
(
INFO
)
<<
"input_file: "
<<
FLAGS_input_file
;
LOG
(
INFO
)
<<
"cpu_model_data_file: "
<<
FLAGS_cpu_model_data_file
;
LOG
(
INFO
)
<<
"gpu_model_data_file: "
<<
FLAGS_gpu_model_data_file
;
LOG
(
INFO
)
<<
"dsp_model_data_file: "
<<
FLAGS_dsp_model_data_file
;
LOG
(
INFO
)
<<
"run_seconds: "
<<
FLAGS_run_seconds
;
vector
<
int64_t
>
input_shape_vec
;
vector
<
int64_t
>
output_shape_vec
;
ParseShape
(
FLAGS_input_shape
,
&
input_shape_vec
);
ParseShape
(
FLAGS_output_shape
,
&
output_shape_vec
);
int64_t
input_size
=
std
::
accumulate
(
input_shape_vec
.
begin
(),
input_shape_vec
.
end
(),
1
,
std
::
multiplies
<
int64_t
>
());
int64_t
output_size
=
std
::
accumulate
(
output_shape_vec
.
begin
(),
output_shape_vec
.
end
(),
1
,
std
::
multiplies
<
int64_t
>
());
std
::
unique_ptr
<
float
[]
>
input_data
(
new
float
[
input_size
]);
std
::
unique_ptr
<
float
[]
>
cpu_output_data
(
new
float
[
output_size
]);
std
::
unique_ptr
<
float
[]
>
gpu_output_data
(
new
float
[
output_size
]);
std
::
unique_ptr
<
float
[]
>
dsp_output_data
(
new
float
[
output_size
]);
// load input
ifstream
in_file
(
FLAGS_input_file
,
ios
::
in
|
ios
::
binary
);
if
(
in_file
.
is_open
())
{
in_file
.
read
(
reinterpret_cast
<
char
*>
(
input_data
.
get
()),
input_size
*
sizeof
(
float
));
in_file
.
close
();
}
else
{
LOG
(
INFO
)
<<
"Open input file failed"
;
return
-
1
;
}
int64_t
t0
,
t1
,
init_micros
;
#ifdef MACE_CPU_MODEL_TAG
/* --------------------- CPU init ----------------------- */
LOG
(
INFO
)
<<
"Load & init cpu model and warm up"
;
const
unsigned
char
*
cpu_model_data
=
mace
::
MACE_CPU_MODEL_TAG
::
LoadModelData
(
FLAGS_cpu_model_data_file
.
c_str
());
NetDef
cpu_net_def
=
mace
::
MACE_CPU_MODEL_TAG
::
CreateNet
(
cpu_model_data
);
mace
::
MaceEngine
cpu_engine
(
&
cpu_net_def
,
DeviceType
::
CPU
);
LOG
(
INFO
)
<<
"CPU Warm up run"
;
t0
=
NowMicros
();
cpu_engine
.
Run
(
input_data
.
get
(),
input_shape_vec
,
cpu_output_data
.
get
());
t1
=
NowMicros
();
LOG
(
INFO
)
<<
"CPU 1st warm up run latency: "
<<
t1
-
t0
<<
" us"
;
#endif
#ifdef MACE_GPU_MODEL_TAG
/* --------------------- GPU init ----------------------- */
LOG
(
INFO
)
<<
"Load & init gpu model and warm up"
;
const
unsigned
char
*
gpu_model_data
=
mace
::
MACE_GPU_MODEL_TAG
::
LoadModelData
(
FLAGS_gpu_model_data_file
.
c_str
());
NetDef
gpu_net_def
=
mace
::
MACE_GPU_MODEL_TAG
::
CreateNet
(
gpu_model_data
);
mace
::
MaceEngine
gpu_engine
(
&
gpu_net_def
,
DeviceType
::
OPENCL
);
mace
::
MACE_GPU_MODEL_TAG
::
UnloadModelData
(
gpu_model_data
);
LOG
(
INFO
)
<<
"GPU Warm up run"
;
t0
=
NowMicros
();
gpu_engine
.
Run
(
input_data
.
get
(),
input_shape_vec
,
gpu_output_data
.
get
());
t1
=
NowMicros
();
LOG
(
INFO
)
<<
"GPU 1st warm up run latency: "
<<
t1
-
t0
<<
" us"
;
#endif
#ifdef MACE_DSP_MODEL_TAG
/* --------------------- DSP init ----------------------- */
LOG
(
INFO
)
<<
"Load & init dsp model and warm up"
;
const
unsigned
char
*
dsp_model_data
=
mace
::
MACE_DSP_MODEL_TAG
::
LoadModelData
(
FLAGS_gpu_model_data_file
.
c_str
());
NetDef
dsp_net_def
=
mace
::
MACE_DSP_MODEL_TAG
::
CreateNet
(
dsp_model_data
);
mace
::
MaceEngine
dsp_engine
(
&
dsp_net_def
,
DeviceType
::
HEXAGON
);
mace
::
MACE_DSP_MODEL_TAG
::
UnloadModelData
(
dsp_model_data
);
LOG
(
INFO
)
<<
"DSP Warm up run"
;
t0
=
NowMicros
();
gpu_engine
.
Run
(
input_data
.
get
(),
input_shape_vec
,
dsp_output_data
.
get
());
t1
=
NowMicros
();
LOG
(
INFO
)
<<
"DSP 1st warm up run latency: "
<<
t1
-
t0
<<
" us"
;
#endif
double
cpu_throughput
=
0
;
double
gpu_throughput
=
0
;
double
dsp_throughput
=
0
;
int64_t
run_micros
=
FLAGS_run_seconds
*
1000000
;
#ifdef MACE_CPU_MODEL_TAG
std
::
thread
cpu_thread
([
&
]()
{
int64_t
frames
=
0
;
int64_t
micros
=
0
;
int64_t
start
=
NowMicros
();
for
(;
micros
<
run_micros
;
++
frames
)
{
cpu_engine
.
Run
(
input_data
.
get
(),
input_shape_vec
,
cpu_output_data
.
get
());
int64_t
end
=
NowMicros
();
micros
=
end
-
start
;
}
cpu_throughput
=
frames
*
1000000.0
/
micros
;
});
#endif
#ifdef MACE_GPU_MODEL_TAG
std
::
thread
gpu_thread
([
&
]()
{
int64_t
frames
=
0
;
int64_t
micros
=
0
;
int64_t
start
=
NowMicros
();
for
(;
micros
<
run_micros
;
++
frames
)
{
gpu_engine
.
Run
(
input_data
.
get
(),
input_shape_vec
,
gpu_output_data
.
get
());
int64_t
end
=
NowMicros
();
micros
=
end
-
start
;
}
gpu_throughput
=
frames
*
1000000.0
/
micros
;
});
#endif
#ifdef MACE_DSP_MODEL_TAG
std
::
thread
dsp_thread
([
&
]()
{
int64_t
frames
=
0
;
int64_t
micros
=
0
;
int64_t
start
=
NowMicros
();
for
(;
micros
<
run_micros
;
++
frames
)
{
dsp_engine
.
Run
(
input_data
.
get
(),
input_shape_vec
,
dsp_output_data
.
get
());
int64_t
end
=
NowMicros
();
micros
=
end
-
start
;
}
dsp_throughput
=
frames
*
1000000.0
/
micros
;
});
#endif
double
total_throughput
=
0
;
#ifdef MACE_CPU_MODEL_TAG
cpu_thread
.
join
();
LOG
(
INFO
)
<<
"CPU throughput: "
<<
cpu_throughput
<<
" f/s"
;
total_throughput
+=
cpu_throughput
;
#endif
#ifdef MACE_GPU_MODEL_TAG
gpu_thread
.
join
();
LOG
(
INFO
)
<<
"GPU throughput: "
<<
gpu_throughput
<<
" f/s"
;
total_throughput
+=
gpu_throughput
;
#endif
#ifdef MACE_DSP_MODEL_TAG
dsp_thread
.
join
();
LOG
(
INFO
)
<<
"DSP throughput: "
<<
dsp_throughput
<<
" f/s"
;
total_throughput
+=
dsp_throughput
;
#endif
LOG
(
INFO
)
<<
"Total throughput: "
<<
total_throughput
<<
" f/s"
;
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录