Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
ab4048ed
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
ab4048ed
编写于
3月 14, 2018
作者:
W
wuchenghui
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
make CPU&GPU Runtime configurable
上级
eb442e35
变更
17
隐藏空白更改
内联
并排
Showing
17 changed file
with
358 addition
and
61 deletion
+358
-61
.gitignore
.gitignore
+1
-0
mace/benchmark/BUILD
mace/benchmark/BUILD
+5
-16
mace/benchmark/benchmark_model.cc
mace/benchmark/benchmark_model.cc
+26
-1
mace/benchmark/stat_summarizer.cc
mace/benchmark/stat_summarizer.cc
+1
-1
mace/core/BUILD
mace/core/BUILD
+4
-0
mace/core/mace.cc
mace/core/mace.cc
+17
-0
mace/core/runtime/cpu/cpu_runtime.cc
mace/core/runtime/cpu/cpu_runtime.cc
+107
-0
mace/core/runtime/cpu/cpu_runtime.h
mace/core/runtime/cpu/cpu_runtime.h
+17
-0
mace/core/runtime/opencl/opencl_runtime.cc
mace/core/runtime/opencl/opencl_runtime.cc
+64
-11
mace/core/runtime/opencl/opencl_runtime.h
mace/core/runtime/opencl/opencl_runtime.h
+4
-2
mace/core/testing/test_benchmark_main.cc
mace/core/testing/test_benchmark_main.cc
+5
-0
mace/examples/mace_run.cc
mace/examples/mace_run.cc
+50
-2
mace/public/mace.h
mace/public/mace.h
+14
-0
tools/benchmark.sh
tools/benchmark.sh
+10
-5
tools/build_run_throughput_test.sh
tools/build_run_throughput_test.sh
+9
-9
tools/mace_tools.py
tools/mace_tools.py
+13
-9
tools/tuning_run.sh
tools/tuning_run.sh
+11
-5
未找到文件。
.gitignore
浏览文件 @
ab4048ed
...
@@ -7,3 +7,4 @@ mace/codegen/models/
...
@@ -7,3 +7,4 @@ mace/codegen/models/
mace/codegen/opencl/
mace/codegen/opencl/
mace/codegen/opencl_bin/
mace/codegen/opencl_bin/
mace/codegen/version/
mace/codegen/version/
build/
mace/benchmark/BUILD
浏览文件 @
ab4048ed
# Benchmark
# Benchmark
# Examples
# Examples
load
(
load
(
"//:mace.bzl"
,
"//
mace
:mace.bzl"
,
"if_production_mode"
,
"if_production_mode"
,
"if_not_production_mode"
,
"if_not_production_mode"
,
"if_hexagon_enabled"
,
"if_hexagon_enabled"
,
...
@@ -16,7 +16,7 @@ cc_library(
...
@@ -16,7 +16,7 @@ cc_library(
hdrs
=
[
"stat_summarizer.h"
],
hdrs
=
[
"stat_summarizer.h"
],
linkstatic
=
1
,
linkstatic
=
1
,
deps
=
[
deps
=
[
"
@mace//:mace_headers
"
,
"
//mace/core
"
,
],
],
)
)
...
@@ -29,17 +29,9 @@ cc_binary(
...
@@ -29,17 +29,9 @@ cc_binary(
linkstatic
=
1
,
linkstatic
=
1
,
deps
=
[
deps
=
[
":stat_summarizer"
,
":stat_summarizer"
,
"//mace/codegen:generated_models"
,
"//external:gflags_nothreads"
,
"//external:gflags_nothreads"
,
]
+
if_hexagon_enabled
([
"//mace/codegen:generated_models"
,
"//lib/hexagon:hexagon"
,
],
])
+
if_production_mode
([
"@mace//:mace_prod"
,
"//codegen:generated_opencl_prod"
,
"//codegen:generated_tuning_params"
,
])
+
if_not_production_mode
([
"@mace//:mace_dev"
,
]),
)
)
cc_library
(
cc_library
(
...
@@ -58,9 +50,6 @@ cc_binary(
...
@@ -58,9 +50,6 @@ cc_binary(
deps
=
[
deps
=
[
":libmace_merged"
,
":libmace_merged"
,
"//external:gflags_nothreads"
,
"//external:gflags_nothreads"
,
"//lib/hexagon"
,
"//mace/core"
,
"@mace//:mace"
,
"@mace//:mace_headers"
,
"@mace//:mace_prod"
,
],
],
)
)
mace/benchmark/benchmark_model.cc
浏览文件 @
ab4048ed
...
@@ -5,7 +5,7 @@
...
@@ -5,7 +5,7 @@
#include "gflags/gflags.h"
#include "gflags/gflags.h"
#include "mace/public/mace.h"
#include "mace/public/mace.h"
#include "mace/utils/logging.h"
#include "mace/utils/logging.h"
#include "benchmark/stat_summarizer.h"
#include "
mace/
benchmark/stat_summarizer.h"
#include <cstdlib>
#include <cstdlib>
#include <fstream>
#include <fstream>
...
@@ -204,6 +204,11 @@ DEFINE_bool(show_summary, true, "whether to show a summary of the stats");
...
@@ -204,6 +204,11 @@ DEFINE_bool(show_summary, true, "whether to show a summary of the stats");
DEFINE_bool
(
show_flops
,
true
,
"whether to estimate the model's FLOPs"
);
DEFINE_bool
(
show_flops
,
true
,
"whether to estimate the model's FLOPs"
);
DEFINE_int32
(
warmup_runs
,
1
,
"how many runs to initialize model"
);
DEFINE_int32
(
warmup_runs
,
1
,
"how many runs to initialize model"
);
DEFINE_string
(
model_data_file
,
""
,
"model data file name, used when EMBED_MODEL_DATA set to 0"
);
DEFINE_string
(
model_data_file
,
""
,
"model data file name, used when EMBED_MODEL_DATA set to 0"
);
DEFINE_string
(
gpu_type
,
"ADRENO"
,
"ADRENO/MALI"
);
DEFINE_int32
(
gpu_perf_hint
,
2
,
"0:NA/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
gpu_priority_hint
,
1
,
"0:NA/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
omp_num_threads
,
8
,
"num of openmp threads"
);
DEFINE_int32
(
cpu_power_option
,
0
,
"0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE"
);
int
Main
(
int
argc
,
char
**
argv
)
{
int
Main
(
int
argc
,
char
**
argv
)
{
MACE_CHECK
(
FLAGS_device
!=
"HEXAGON"
,
"Model benchmark tool do not support DSP."
);
MACE_CHECK
(
FLAGS_device
!=
"HEXAGON"
,
"Model benchmark tool do not support DSP."
);
...
@@ -212,6 +217,11 @@ int Main(int argc, char **argv) {
...
@@ -212,6 +217,11 @@ int Main(int argc, char **argv) {
LOG
(
INFO
)
<<
"Benchmark name: ["
<<
FLAGS_benchmark_name
<<
"]"
;
LOG
(
INFO
)
<<
"Benchmark name: ["
<<
FLAGS_benchmark_name
<<
"]"
;
LOG
(
INFO
)
<<
"Device: ["
<<
FLAGS_device
<<
"]"
;
LOG
(
INFO
)
<<
"Device: ["
<<
FLAGS_device
<<
"]"
;
LOG
(
INFO
)
<<
"gpu_type: ["
<<
FLAGS_gpu_type
<<
"]"
;
LOG
(
INFO
)
<<
"gpu_perf_hint: ["
<<
FLAGS_gpu_perf_hint
<<
"]"
;
LOG
(
INFO
)
<<
"gpu_priority_hint: ["
<<
FLAGS_gpu_priority_hint
<<
"]"
;
LOG
(
INFO
)
<<
"omp_num_threads: ["
<<
FLAGS_omp_num_threads
<<
"]"
;
LOG
(
INFO
)
<<
"cpu_power_option: ["
<<
FLAGS_cpu_power_option
<<
"]"
;
LOG
(
INFO
)
<<
"Input node: ["
<<
FLAGS_input_node
<<
"]"
;
LOG
(
INFO
)
<<
"Input node: ["
<<
FLAGS_input_node
<<
"]"
;
LOG
(
INFO
)
<<
"Input shapes: ["
<<
FLAGS_input_shape
<<
"]"
;
LOG
(
INFO
)
<<
"Input shapes: ["
<<
FLAGS_input_shape
<<
"]"
;
LOG
(
INFO
)
<<
"Output node: ["
<<
FLAGS_output_node
<<
"]"
;
LOG
(
INFO
)
<<
"Output node: ["
<<
FLAGS_output_node
<<
"]"
;
...
@@ -246,6 +256,21 @@ int Main(int argc, char **argv) {
...
@@ -246,6 +256,21 @@ int Main(int argc, char **argv) {
device_type
=
OPENCL
;
device_type
=
OPENCL
;
}
}
// config runtime
if
(
device_type
==
OPENCL
)
{
GPUType
gpu_type
=
ADRENO
;
if
(
FLAGS_gpu_type
==
"MALI"
)
gpu_type
=
MALI
;
mace
::
ConfigOpenCLRuntime
(
gpu_type
,
static_cast
<
GPUPerfHint
>
(
FLAGS_gpu_perf_hint
),
static_cast
<
GPUPriorityHint
>
(
FLAGS_gpu_priority_hint
));
}
else
if
(
device_type
==
CPU
)
{
mace
::
ConfigCPURuntime
(
FLAGS_omp_num_threads
,
static_cast
<
CPUPowerOption
>
(
FLAGS_cpu_power_option
));
}
std
::
vector
<
std
::
string
>
input_names
=
str_util
::
Split
(
FLAGS_input_node
,
','
);
std
::
vector
<
std
::
string
>
input_names
=
str_util
::
Split
(
FLAGS_input_node
,
','
);
std
::
vector
<
std
::
string
>
output_names
=
str_util
::
Split
(
FLAGS_output_node
,
','
);
std
::
vector
<
std
::
string
>
output_names
=
str_util
::
Split
(
FLAGS_output_node
,
','
);
std
::
vector
<
std
::
string
>
input_shapes
=
str_util
::
Split
(
FLAGS_input_shape
,
':'
);
std
::
vector
<
std
::
string
>
input_shapes
=
str_util
::
Split
(
FLAGS_input_shape
,
':'
);
...
...
mace/benchmark/stat_summarizer.cc
浏览文件 @
ab4048ed
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
// Copyright (c) 2017 XiaoMi All rights reserved.
//
//
#include "benchmark/stat_summarizer.h"
#include "
mace/
benchmark/stat_summarizer.h"
#include "mace/public/mace.h"
#include "mace/public/mace.h"
#include "mace/utils/logging.h"
#include "mace/utils/logging.h"
...
...
mace/core/BUILD
浏览文件 @
ab4048ed
...
@@ -14,6 +14,7 @@ load(
...
@@ -14,6 +14,7 @@ load(
"if_not_hexagon_enabled"
,
"if_not_hexagon_enabled"
,
"if_production_mode"
,
"if_production_mode"
,
"if_not_production_mode"
,
"if_not_production_mode"
,
"if_openmp_enabled"
,
)
)
cc_library
(
cc_library
(
...
@@ -21,6 +22,7 @@ cc_library(
...
@@ -21,6 +22,7 @@ cc_library(
srcs
=
glob
(
srcs
=
glob
(
[
[
"*.cc"
,
"*.cc"
,
"runtime/cpu/*.cc"
,
"runtime/opencl/*.cc"
,
"runtime/opencl/*.cc"
,
"runtime/hexagon/*.cc"
,
"runtime/hexagon/*.cc"
,
],
],
...
@@ -37,9 +39,11 @@ cc_library(
...
@@ -37,9 +39,11 @@ cc_library(
]),
]),
hdrs
=
glob
([
hdrs
=
glob
([
"*.h"
,
"*.h"
,
"runtime/cpu/*.h"
,
"runtime/opencl/*.h"
,
"runtime/opencl/*.h"
,
"runtime/hexagon/*.h"
,
"runtime/hexagon/*.h"
,
]),
]),
copts
=
if_openmp_enabled
([
"-fopenmp"
]),
linkopts
=
[
"-ldl"
]
+
if_android
([
linkopts
=
[
"-ldl"
]
+
if_android
([
"-pie"
,
"-pie"
,
"-lm"
,
"-lm"
,
...
...
mace/core/mace.cc
浏览文件 @
ab4048ed
...
@@ -5,6 +5,8 @@
...
@@ -5,6 +5,8 @@
#include "mace/public/mace.h"
#include "mace/public/mace.h"
#include "mace/core/net.h"
#include "mace/core/net.h"
#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h"
#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/runtime/cpu/cpu_runtime.h"
#include "mace/core/types.h"
#include "mace/core/types.h"
namespace
mace
{
namespace
mace
{
...
@@ -349,6 +351,21 @@ const OperatorDef &NetDef::op(const int idx) const {
...
@@ -349,6 +351,21 @@ const OperatorDef &NetDef::op(const int idx) const {
return
op_
[
idx
];
return
op_
[
idx
];
}
}
void
ConfigOpenCLRuntime
(
GPUType
gpu_type
,
GPUPerfHint
gpu_perf_hint
,
GPUPriorityHint
gpu_priority_hint
)
{
LOG
(
INFO
)
<<
"Config OpenCL Runtime: gpu_type: "
<<
gpu_type
<<
", gpu_perf_hint: "
<<
gpu_perf_hint
<<
", gpu_priority_hint: "
<<
gpu_priority_hint
;
OpenCLRuntime
::
CreateGlobal
(
gpu_type
,
gpu_perf_hint
,
gpu_priority_hint
);
}
void
ConfigCPURuntime
(
int
omp_num_threads
,
CPUPowerOption
power_option
)
{
LOG
(
INFO
)
<<
"Config CPU Runtime: omp_num_threads: "
<<
omp_num_threads
<<
", cpu_power_option: "
<<
power_option
;
SetCPURuntime
(
omp_num_threads
,
power_option
);
}
// Mace Engine
// Mace Engine
MaceEngine
::
MaceEngine
(
const
NetDef
*
net_def
,
DeviceType
device_type
)
MaceEngine
::
MaceEngine
(
const
NetDef
*
net_def
,
DeviceType
device_type
)
:
op_registry_
(
new
OperatorRegistry
()),
:
op_registry_
(
new
OperatorRegistry
()),
...
...
mace/core/runtime/cpu/cpu_runtime.cc
0 → 100644
浏览文件 @
ab4048ed
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/public/mace.h"
#include "mace/utils/logging.h"
#include <omp.h>
#include <sys/syscall.h>
#include <unistd.h>
namespace
mace
{
namespace
{
static
int
GetCPUMaxFreq
(
int
cpu_id
)
{
char
path
[
64
];
sprintf
(
path
,
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq"
,
cpu_id
);
FILE
*
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
return
0
;
int
freq
=
0
;
fscanf
(
fp
,
"%d"
,
&
freq
);
fclose
(
fp
);
return
freq
;
}
static
void
SortCPUIdsByMaxFreqAsc
(
std
::
vector
<
int
>
&
cpu_ids
)
{
int
cpu_count
=
cpu_ids
.
size
();
std
::
vector
<
int
>
cpu_max_freq
;
cpu_max_freq
.
resize
(
cpu_count
);
// set cpu max frequency
for
(
int
i
=
0
;
i
<
cpu_count
;
++
i
)
{
cpu_max_freq
[
i
]
=
GetCPUMaxFreq
(
i
);
cpu_ids
[
i
]
=
i
;
}
// sort cpu ids by max frequency asc, bubble sort
for
(
int
i
=
0
;
i
<
cpu_count
-
1
;
++
i
)
{
for
(
int
j
=
i
+
1
;
j
<
cpu_count
;
++
j
)
{
if
(
cpu_max_freq
[
i
]
>
cpu_max_freq
[
j
])
{
int
tmp
=
cpu_ids
[
i
];
cpu_ids
[
i
]
=
cpu_ids
[
j
];
cpu_ids
[
j
]
=
tmp
;
tmp
=
cpu_max_freq
[
i
];
cpu_max_freq
[
i
]
=
cpu_max_freq
[
j
];
cpu_max_freq
[
j
]
=
tmp
;
}
}
}
}
static
void
SetThreadAffinity
(
cpu_set_t
mask
)
{
int
sys_call_res
;
pid_t
pid
=
gettid
();
// TODO: when set omp num threads to 1, sometiomes return EINVAL(22) error
// https://linux.die.net/man/2/sched_setaffinity
sys_call_res
=
syscall
(
__NR_sched_setaffinity
,
pid
,
sizeof
(
mask
),
&
mask
);
if
(
sys_call_res
!=
0
)
{
LOG
(
FATAL
)
<<
"syscall setaffinity error: "
<<
sys_call_res
<<
' '
<<
errno
;
}
}
}
// namespace
void
SetCPURuntime
(
int
omp_num_threads
,
CPUPowerOption
power_option
)
{
int
cpu_count
=
omp_get_num_procs
();
LOG
(
INFO
)
<<
"cpu_count: "
<<
cpu_count
;
std
::
vector
<
int
>
sorted_cpu_ids
;
sorted_cpu_ids
.
resize
(
cpu_count
);
SortCPUIdsByMaxFreqAsc
(
sorted_cpu_ids
);
std
::
vector
<
int
>
use_cpu_ids
;
if
(
power_option
==
CPUPowerOption
::
DEFAULT
||
omp_num_threads
>=
cpu_count
)
{
use_cpu_ids
=
sorted_cpu_ids
;
omp_num_threads
=
cpu_count
;
}
else
if
(
power_option
==
CPUPowerOption
::
HIGH_PERFORMANCE
)
{
use_cpu_ids
=
std
::
vector
<
int
>
(
sorted_cpu_ids
.
begin
()
+
cpu_count
-
omp_num_threads
,
sorted_cpu_ids
.
end
());
}
else
{
use_cpu_ids
=
std
::
vector
<
int
>
(
sorted_cpu_ids
.
begin
(),
sorted_cpu_ids
.
begin
()
+
omp_num_threads
);
}
omp_set_num_threads
(
omp_num_threads
);
// compute mask
cpu_set_t
mask
;
CPU_ZERO
(
&
mask
);
for
(
auto
cpu_id
:
use_cpu_ids
)
{
CPU_SET
(
cpu_id
,
&
mask
);
}
LOG
(
INFO
)
<<
"use cpus mask: "
<<
mask
.
__bits
[
0
];
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
omp_num_threads
;
++
i
)
{
SetThreadAffinity
(
mask
);
}
}
}
// namespace mace
mace/core/runtime/cpu/cpu_runtime.h
0 → 100644
浏览文件 @
ab4048ed
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H
#define MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H
#include "mace/public/mace.h"
namespace
mace
{
void
SetCPURuntime
(
int
omp_num_threads
,
CPUPowerOption
power_option
);
}
#endif //MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H
mace/core/runtime/opencl/opencl_runtime.cc
浏览文件 @
ab4048ed
...
@@ -63,11 +63,67 @@ void OpenCLProfilingTimer::ClearTiming() {
...
@@ -63,11 +63,67 @@ void OpenCLProfilingTimer::ClearTiming() {
}
}
OpenCLRuntime
*
OpenCLRuntime
::
Global
()
{
OpenCLRuntime
*
OpenCLRuntime
::
Global
()
{
static
OpenCLRuntime
instance
;
if
(
opencl_runtime_instance
==
nullptr
)
{
return
&
instance
;
return
CreateGlobal
(
GPUType
::
ADRENO
,
GPUPerfHint
::
PERF_NORMAL
,
GPUPriorityHint
::
PRIORITY_LOW
);
}
return
opencl_runtime_instance
;
}
OpenCLRuntime
*
OpenCLRuntime
::
CreateGlobal
(
GPUType
gpu_type
,
GPUPerfHint
gpu_perf_hint
,
GPUPriorityHint
gpu_priority_hint
)
{
opencl_runtime_instance
=
new
OpenCLRuntime
(
gpu_type
,
gpu_perf_hint
,
gpu_priority_hint
);
return
opencl_runtime_instance
;
}
void
ParseOpenCLRuntimeConfig
(
cl_context_properties
*
properties
,
GPUType
gpu_type
,
GPUPerfHint
gpu_perf_hint
,
GPUPriorityHint
gpu_priority_hint
)
{
int
index
=
0
;
if
(
gpu_type
==
GPUType
::
ADRENO
)
{
switch
(
gpu_perf_hint
)
{
case
GPUPerfHint
::
PERF_LOW
:
properties
[
index
++
]
=
CL_CONTEXT_PERF_HINT_QCOM
;
properties
[
index
++
]
=
CL_PERF_HINT_LOW_QCOM
;
break
;
case
GPUPerfHint
::
PERF_NORMAL
:
properties
[
index
++
]
=
CL_CONTEXT_PERF_HINT_QCOM
;
properties
[
index
++
]
=
CL_PERF_HINT_NORMAL_QCOM
;
break
;
case
GPUPerfHint
::
PERF_HIGH
:
properties
[
index
++
]
=
CL_CONTEXT_PERF_HINT_QCOM
;
properties
[
index
++
]
=
CL_PERF_HINT_HIGH_QCOM
;
break
;
default:
break
;
}
switch
(
gpu_priority_hint
)
{
case
GPUPriorityHint
::
PRIORITY_LOW
:
properties
[
index
++
]
=
CL_CONTEXT_PRIORITY_HINT_QCOM
;
properties
[
index
++
]
=
CL_PRIORITY_HINT_LOW_QCOM
;
break
;
case
GPUPriorityHint
::
PRIORITY_NORMAL
:
properties
[
index
++
]
=
CL_CONTEXT_PRIORITY_HINT_QCOM
;
properties
[
index
++
]
=
CL_PRIORITY_HINT_NORMAL_QCOM
;
break
;
case
GPUPriorityHint
::
PRIORITY_HIGH
:
properties
[
index
++
]
=
CL_CONTEXT_PRIORITY_HINT_QCOM
;
properties
[
index
++
]
=
CL_PRIORITY_HINT_HIGH_QCOM
;
break
;
default:
break
;
}
}
else
{
// TODO: support Mali GPU context properties
}
// The properties list should be terminated with 0
properties
[
index
]
=
0
;
}
}
OpenCLRuntime
::
OpenCLRuntime
()
{
OpenCLRuntime
::
OpenCLRuntime
(
GPUType
gpu_type
,
GPUPerfHint
gpu_perf_hint
,
GPUPriorityHint
gpu_priority_hint
)
{
LoadOpenCLLibrary
();
LoadOpenCLLibrary
();
std
::
vector
<
cl
::
Platform
>
all_platforms
;
std
::
vector
<
cl
::
Platform
>
all_platforms
;
...
@@ -109,15 +165,12 @@ OpenCLRuntime::OpenCLRuntime() {
...
@@ -109,15 +165,12 @@ OpenCLRuntime::OpenCLRuntime() {
properties
|=
CL_QUEUE_PROFILING_ENABLE
;
properties
|=
CL_QUEUE_PROFILING_ENABLE
;
}
}
// TODO (heliangliang) Make this configurable (e.g.HIGH for benchmark,
std
::
unique_ptr
<
cl_context_properties
[]
>
context_properties
(
// disabled for Mali)
new
cl_context_properties
[
5
]);
cl_context_properties
context_properties
[]
=
{
ParseOpenCLRuntimeConfig
(
context_properties
.
get
(),
gpu_type
,
gpu_perf_hint
,
// Set context perf hint to normal
gpu_priority_hint
);
CL_CONTEXT_PERF_HINT_QCOM
,
CL_PERF_HINT_NORMAL_QCOM
,
// Set context priority hint to low
CL_CONTEXT_PRIORITY_HINT_QCOM
,
CL_PRIORITY_HINT_LOW_QCOM
,
0
};
cl
::
Context
context
({
gpu_device
},
context_properties
);
cl
::
Context
context
({
gpu_device
},
context_properties
.
get
()
);
cl
::
CommandQueue
command_queue
(
context
,
gpu_device
,
properties
);
cl
::
CommandQueue
command_queue
(
context
,
gpu_device
,
properties
);
const
char
*
kernel_path
=
getenv
(
"MACE_KERNEL_PATH"
);
const
char
*
kernel_path
=
getenv
(
"MACE_KERNEL_PATH"
);
...
...
mace/core/runtime/opencl/opencl_runtime.h
浏览文件 @
ab4048ed
...
@@ -20,7 +20,7 @@ namespace mace {
...
@@ -20,7 +20,7 @@ namespace mace {
class
OpenCLProfilingTimer
:
public
Timer
{
class
OpenCLProfilingTimer
:
public
Timer
{
public:
public:
explicit
OpenCLProfilingTimer
(
const
cl
::
Event
*
event
)
explicit
OpenCLProfilingTimer
(
const
cl
::
Event
*
event
)
:
event_
(
event
),
accumulated_micros_
(
0
){};
:
event_
(
event
),
accumulated_micros_
(
0
)
{};
void
StartTiming
()
override
;
void
StartTiming
()
override
;
void
StopTiming
()
override
;
void
StopTiming
()
override
;
void
AccumulateTiming
()
override
;
void
AccumulateTiming
()
override
;
...
@@ -38,6 +38,7 @@ class OpenCLProfilingTimer : public Timer {
...
@@ -38,6 +38,7 @@ class OpenCLProfilingTimer : public Timer {
class
OpenCLRuntime
{
class
OpenCLRuntime
{
public:
public:
static
OpenCLRuntime
*
Global
();
static
OpenCLRuntime
*
Global
();
static
OpenCLRuntime
*
CreateGlobal
(
GPUType
,
GPUPerfHint
,
GPUPriorityHint
);
cl
::
Context
&
context
();
cl
::
Context
&
context
();
cl
::
Device
&
device
();
cl
::
Device
&
device
();
...
@@ -51,7 +52,7 @@ class OpenCLRuntime {
...
@@ -51,7 +52,7 @@ class OpenCLRuntime {
const
std
::
set
<
std
::
string
>
&
build_options
);
const
std
::
set
<
std
::
string
>
&
build_options
);
private:
private:
OpenCLRuntime
();
OpenCLRuntime
(
GPUType
,
GPUPerfHint
,
GPUPriorityHint
);
~
OpenCLRuntime
();
~
OpenCLRuntime
();
OpenCLRuntime
(
const
OpenCLRuntime
&
)
=
delete
;
OpenCLRuntime
(
const
OpenCLRuntime
&
)
=
delete
;
OpenCLRuntime
&
operator
=
(
const
OpenCLRuntime
&
)
=
delete
;
OpenCLRuntime
&
operator
=
(
const
OpenCLRuntime
&
)
=
delete
;
...
@@ -73,6 +74,7 @@ class OpenCLRuntime {
...
@@ -73,6 +74,7 @@ class OpenCLRuntime {
std
::
string
kernel_path_
;
std
::
string
kernel_path_
;
};
};
static
OpenCLRuntime
*
opencl_runtime_instance
=
nullptr
;
}
// namespace mace
}
// namespace mace
#endif // MACE_CORE_RUNTIME_OPENCL_OPENCL_RUNTIME_H_
#endif // MACE_CORE_RUNTIME_OPENCL_OPENCL_RUNTIME_H_
mace/core/testing/test_benchmark_main.cc
浏览文件 @
ab4048ed
...
@@ -5,10 +5,15 @@
...
@@ -5,10 +5,15 @@
#include <iostream>
#include <iostream>
#include "mace/core/testing/test_benchmark.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/public/mace.h"
int
main
(
int
argc
,
char
**
argv
)
{
int
main
(
int
argc
,
char
**
argv
)
{
std
::
cout
<<
"Running main() from test_main.cc
\n
"
;
std
::
cout
<<
"Running main() from test_main.cc
\n
"
;
mace
::
ConfigCPURuntime
(
4
,
mace
::
CPUPowerOption
::
HIGH_PERFORMANCE
);
mace
::
ConfigOpenCLRuntime
(
mace
::
GPUType
::
ADRENO
,
mace
::
GPUPerfHint
::
PERF_HIGH
,
mace
::
GPUPriorityHint
::
PRIORITY_HIGH
);
// TODO Use gflags
// TODO Use gflags
if
(
argc
==
2
)
{
if
(
argc
==
2
)
{
mace
::
testing
::
Benchmark
::
Run
(
argv
[
1
]);
mace
::
testing
::
Benchmark
::
Run
(
argv
[
1
]);
...
...
mace/examples/mace_run.cc
浏览文件 @
ab4048ed
...
@@ -101,6 +101,16 @@ DeviceType ParseDeviceType(const string &device_str) {
...
@@ -101,6 +101,16 @@ DeviceType ParseDeviceType(const string &device_str) {
}
}
}
}
GPUType
ParseGPUType
(
const
string
&
gpu_type_str
)
{
if
(
gpu_type_str
.
compare
(
"ADRENO"
)
==
0
)
{
return
GPUType
::
ADRENO
;
}
else
if
(
gpu_type_str
.
compare
(
"MALI"
)
==
0
)
{
return
GPUType
::
MALI
;
}
else
{
return
GPUType
::
ADRENO
;
}
}
struct
mallinfo
LogMallinfoChange
(
struct
mallinfo
prev
)
{
struct
mallinfo
LogMallinfoChange
(
struct
mallinfo
prev
)
{
struct
mallinfo
curr
=
mallinfo
();
struct
mallinfo
curr
=
mallinfo
();
if
(
prev
.
arena
!=
curr
.
arena
)
{
if
(
prev
.
arena
!=
curr
.
arena
)
{
...
@@ -160,6 +170,11 @@ DEFINE_string(device, "OPENCL", "CPU/NEON/OPENCL/HEXAGON");
...
@@ -160,6 +170,11 @@ DEFINE_string(device, "OPENCL", "CPU/NEON/OPENCL/HEXAGON");
DEFINE_int32
(
round
,
1
,
"round"
);
DEFINE_int32
(
round
,
1
,
"round"
);
DEFINE_int32
(
restart_round
,
1
,
"restart round"
);
DEFINE_int32
(
restart_round
,
1
,
"restart round"
);
DEFINE_int32
(
malloc_check_cycle
,
-
1
,
"malloc debug check cycle, -1 to disable"
);
DEFINE_int32
(
malloc_check_cycle
,
-
1
,
"malloc debug check cycle, -1 to disable"
);
DEFINE_string
(
gpu_type
,
"ADRENO"
,
"ADRENO/MALI"
);
DEFINE_int32
(
gpu_perf_hint
,
2
,
"0:NA/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
gpu_priority_hint
,
1
,
"0:NA/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
omp_num_threads
,
8
,
"num of openmp threads"
);
DEFINE_int32
(
cpu_power_option
,
0
,
"0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE"
);
bool
SingleInputAndOutput
(
const
std
::
vector
<
int64_t
>
&
input_shape
,
bool
SingleInputAndOutput
(
const
std
::
vector
<
int64_t
>
&
input_shape
,
const
std
::
vector
<
int64_t
>
&
output_shape
)
{
const
std
::
vector
<
int64_t
>
&
output_shape
)
{
...
@@ -175,6 +190,20 @@ bool SingleInputAndOutput(const std::vector<int64_t> &input_shape,
...
@@ -175,6 +190,20 @@ bool SingleInputAndOutput(const std::vector<int64_t> &input_shape,
DeviceType
device_type
=
ParseDeviceType
(
FLAGS_device
);
DeviceType
device_type
=
ParseDeviceType
(
FLAGS_device
);
LOG
(
INFO
)
<<
"Runing with device type: "
<<
device_type
;
LOG
(
INFO
)
<<
"Runing with device type: "
<<
device_type
;
// config runtime
if
(
device_type
==
DeviceType
::
OPENCL
)
{
GPUType
gpu_type
=
ParseGPUType
(
FLAGS_gpu_type
);
mace
::
ConfigOpenCLRuntime
(
gpu_type
,
static_cast
<
GPUPerfHint
>
(
FLAGS_gpu_perf_hint
),
static_cast
<
GPUPriorityHint
>
(
FLAGS_gpu_priority_hint
));
}
else
if
(
device_type
==
DeviceType
::
CPU
)
{
mace
::
ConfigCPURuntime
(
FLAGS_omp_num_threads
,
static_cast
<
CPUPowerOption
>
(
FLAGS_cpu_power_option
));
}
// Init model
// Init model
LOG
(
INFO
)
<<
"Run init"
;
LOG
(
INFO
)
<<
"Run init"
;
t0
=
NowMicros
();
t0
=
NowMicros
();
...
@@ -266,6 +295,20 @@ bool MultipleInputOrOutput(const std::vector<std::string> &input_names,
...
@@ -266,6 +295,20 @@ bool MultipleInputOrOutput(const std::vector<std::string> &input_names,
DeviceType
device_type
=
ParseDeviceType
(
FLAGS_device
);
DeviceType
device_type
=
ParseDeviceType
(
FLAGS_device
);
LOG
(
INFO
)
<<
"Runing with device type: "
<<
device_type
;
LOG
(
INFO
)
<<
"Runing with device type: "
<<
device_type
;
// config runtime
if
(
device_type
==
DeviceType
::
OPENCL
)
{
GPUType
gpu_type
=
ParseGPUType
(
FLAGS_gpu_type
);
mace
::
ConfigOpenCLRuntime
(
gpu_type
,
static_cast
<
GPUPerfHint
>
(
FLAGS_gpu_perf_hint
),
static_cast
<
GPUPriorityHint
>
(
FLAGS_gpu_priority_hint
));
}
else
if
(
device_type
==
DeviceType
::
CPU
)
{
mace
::
ConfigCPURuntime
(
FLAGS_omp_num_threads
,
static_cast
<
CPUPowerOption
>
(
FLAGS_cpu_power_option
));
}
// Init model
// Init model
LOG
(
INFO
)
<<
"Run init"
;
LOG
(
INFO
)
<<
"Run init"
;
t0
=
NowMicros
();
t0
=
NowMicros
();
...
@@ -367,8 +410,13 @@ int main(int argc, char **argv) {
...
@@ -367,8 +410,13 @@ int main(int argc, char **argv) {
LOG
(
INFO
)
<<
"output_file: "
<<
FLAGS_output_file
;
LOG
(
INFO
)
<<
"output_file: "
<<
FLAGS_output_file
;
LOG
(
INFO
)
<<
"model_data_file: "
<<
FLAGS_model_data_file
;
LOG
(
INFO
)
<<
"model_data_file: "
<<
FLAGS_model_data_file
;
LOG
(
INFO
)
<<
"device: "
<<
FLAGS_device
;
LOG
(
INFO
)
<<
"device: "
<<
FLAGS_device
;
LOG
(
INFO
)
<<
"round: "
<<
FLAGS_restart_round
;
LOG
(
INFO
)
<<
"round: "
<<
FLAGS_round
;
LOG
(
INFO
)
<<
"restart_round: "
<<
FLAGS_round
;
LOG
(
INFO
)
<<
"restart_round: "
<<
FLAGS_restart_round
;
LOG
(
INFO
)
<<
"gpu_type: "
<<
FLAGS_gpu_type
;
LOG
(
INFO
)
<<
"gpu_perf_hint: "
<<
FLAGS_gpu_perf_hint
;
LOG
(
INFO
)
<<
"gpu_priority_hint: "
<<
FLAGS_gpu_priority_hint
;
LOG
(
INFO
)
<<
"omp_num_threads: "
<<
FLAGS_omp_num_threads
;
LOG
(
INFO
)
<<
"cpu_power_option: "
<<
FLAGS_cpu_power_option
;
std
::
vector
<
std
::
string
>
input_names
=
str_util
::
Split
(
FLAGS_input_node
,
','
);
std
::
vector
<
std
::
string
>
input_names
=
str_util
::
Split
(
FLAGS_input_node
,
','
);
std
::
vector
<
std
::
string
>
output_names
=
str_util
::
Split
(
FLAGS_output_node
,
','
);
std
::
vector
<
std
::
string
>
output_names
=
str_util
::
Split
(
FLAGS_output_node
,
','
);
...
...
mace/public/mace.h
浏览文件 @
ab4048ed
...
@@ -61,6 +61,17 @@ enum DataType {
...
@@ -61,6 +61,17 @@ enum DataType {
DT_UINT32
=
22
DT_UINT32
=
22
};
};
enum
GPUType
{
ADRENO
=
0
,
MALI
=
1
};
enum
GPUPerfHint
{
PERF_NA
=
0
,
PERF_LOW
=
1
,
PERF_NORMAL
=
2
,
PERF_HIGH
=
3
};
enum
GPUPriorityHint
{
PRIORITY_NA
=
0
,
PRIORITY_LOW
=
1
,
PRIORITY_NORMAL
=
2
,
PRIORITY_HIGH
=
3
};
enum
CPUPowerOption
{
DEFAULT
=
0
,
HIGH_PERFORMANCE
=
1
,
BATTERY_SAVE
=
2
};
class
ConstTensor
{
class
ConstTensor
{
public:
public:
ConstTensor
(
const
std
::
string
&
name
,
ConstTensor
(
const
std
::
string
&
name
,
...
@@ -369,6 +380,9 @@ struct MaceInputInfo {
...
@@ -369,6 +380,9 @@ struct MaceInputInfo {
const
float
*
data
;
const
float
*
data
;
};
};
void
ConfigOpenCLRuntime
(
GPUType
,
GPUPerfHint
,
GPUPriorityHint
);
void
ConfigCPURuntime
(
int
omp_num_threads
,
CPUPowerOption
power_option
);
class
MaceEngine
{
class
MaceEngine
{
public:
public:
// Single input and output
// Single input and output
...
...
tools/benchmark.sh
浏览文件 @
ab4048ed
...
@@ -13,6 +13,9 @@ CURRENT_DIR=`dirname $0`
...
@@ -13,6 +13,9 @@ CURRENT_DIR=`dirname $0`
source
${
CURRENT_DIR
}
/env.sh
source
${
CURRENT_DIR
}
/env.sh
MODEL_OUTPUT_DIR
=
$1
MODEL_OUTPUT_DIR
=
$1
OPTION_ARGS
=
$2
echo
$OPTION_ARGS
if
[
-f
"
$MODEL_OUTPUT_DIR
/benchmark_model"
]
;
then
if
[
-f
"
$MODEL_OUTPUT_DIR
/benchmark_model"
]
;
then
rm
-rf
$MODEL_OUTPUT_DIR
/benchmark_model
rm
-rf
$MODEL_OUTPUT_DIR
/benchmark_model
...
@@ -23,7 +26,7 @@ if [ "$EMBED_MODEL_DATA" = 0 ]; then
...
@@ -23,7 +26,7 @@ if [ "$EMBED_MODEL_DATA" = 0 ]; then
fi
fi
if
[
x
"
$TARGET_ABI
"
==
x
"host"
]
;
then
if
[
x
"
$TARGET_ABI
"
==
x
"host"
]
;
then
bazel build
--verbose_failures
-c
opt
--strip
always benchmark:benchmark_model
\
bazel build
--verbose_failures
-c
opt
--strip
always
//mace/
benchmark:benchmark_model
\
--copt
=
"-std=c++11"
\
--copt
=
"-std=c++11"
\
--copt
=
"-D_GLIBCXX_USE_C99_MATH_TR1"
\
--copt
=
"-D_GLIBCXX_USE_C99_MATH_TR1"
\
--copt
=
"-Werror=return-type"
\
--copt
=
"-Werror=return-type"
\
...
@@ -42,10 +45,11 @@ if [ x"$TARGET_ABI" == x"host" ]; then
...
@@ -42,10 +45,11 @@ if [ x"$TARGET_ABI" == x"host" ]; then
--input_shape
=
"
${
INPUT_SHAPES
}
"
\
--input_shape
=
"
${
INPUT_SHAPES
}
"
\
--output_node
=
"
${
OUTPUT_NODES
}
"
\
--output_node
=
"
${
OUTPUT_NODES
}
"
\
--output_shape
=
"
${
OUTPUT_SHAPES
}
"
\
--output_shape
=
"
${
OUTPUT_SHAPES
}
"
\
--input_file
=
${
MODEL_OUTPUT_DIR
}
/
${
INPUT_FILE_NAME
}
||
exit
1
--input_file
=
${
MODEL_OUTPUT_DIR
}
/
${
INPUT_FILE_NAME
}
_
${
INPUT_NODES
}
\
$OPTION_ARGS
||
exit
1
else
else
bazel build
--verbose_failures
-c
opt
--strip
always benchmark:benchmark_model
\
bazel build
--verbose_failures
-c
opt
--strip
always
//mace/
benchmark:benchmark_model
\
--crosstool_top
=
//external:android/crosstool
\
--crosstool_top
=
//external:android/crosstool
\
--host_crosstool_top
=
@bazel_tools//tools/cpp:toolchain
\
--host_crosstool_top
=
@bazel_tools//tools/cpp:toolchain
\
--cpu
=
${
TARGET_ABI
}
\
--cpu
=
${
TARGET_ABI
}
\
...
@@ -57,7 +61,7 @@ else
...
@@ -57,7 +61,7 @@ else
--define
openmp
=
true
\
--define
openmp
=
true
\
--define
production
=
true
||
exit
1
--define
production
=
true
||
exit
1
cp
bazel-bin/benchmark/benchmark_model
$MODEL_OUTPUT_DIR
cp
bazel-bin/
mace/
benchmark/benchmark_model
$MODEL_OUTPUT_DIR
adb shell
"mkdir -p
${
PHONE_DATA_DIR
}
"
||
exit
1
adb shell
"mkdir -p
${
PHONE_DATA_DIR
}
"
||
exit
1
IFS
=
','
read
-r
-a
INPUT_NAMES
<<<
"
${
INPUT_NODES
}
"
IFS
=
','
read
-r
-a
INPUT_NAMES
<<<
"
${
INPUT_NODES
}
"
...
@@ -83,5 +87,6 @@ else
...
@@ -83,5 +87,6 @@ else
--input_shape
=
"
${
INPUT_SHAPES
}
"
\
--input_shape
=
"
${
INPUT_SHAPES
}
"
\
--output_node
=
"
${
OUTPUT_NODES
}
"
\
--output_node
=
"
${
OUTPUT_NODES
}
"
\
--output_shape
=
"
${
OUTPUT_SHAPES
}
"
\
--output_shape
=
"
${
OUTPUT_SHAPES
}
"
\
--input_file
=
${
PHONE_DATA_DIR
}
/
${
INPUT_FILE_NAME
}
||
exit
1
--input_file
=
${
PHONE_DATA_DIR
}
/
${
INPUT_FILE_NAME
}
\
$OPTION_ARGS
||
exit
1
fi
fi
tools/build_run_throughput_test.sh
浏览文件 @
ab4048ed
...
@@ -29,9 +29,9 @@ if [ "$DSP_MODEL_TAG" != '' ]; then
...
@@ -29,9 +29,9 @@ if [ "$DSP_MODEL_TAG" != '' ]; then
DSP_MODEL_TAG_BUILD_FLAGS
=
"--copt=-DMACE_DSP_MODEL_TAG=
${
DSP_MODEL_TAG
}
"
DSP_MODEL_TAG_BUILD_FLAGS
=
"--copt=-DMACE_DSP_MODEL_TAG=
${
DSP_MODEL_TAG
}
"
fi
fi
cp
$MERGED_LIB_FILE
benchmark/libmace_merged.a
cp
$MERGED_LIB_FILE
mace/
benchmark/libmace_merged.a
bazel build
--verbose_failures
-c
opt
--strip
always benchmark:model_throughput_test
\
bazel build
--verbose_failures
-c
opt
--strip
always
//mace/
benchmark:model_throughput_test
\
--crosstool_top
=
//external:android/crosstool
\
--crosstool_top
=
//external:android/crosstool
\
--host_crosstool_top
=
@bazel_tools//tools/cpp:toolchain
\
--host_crosstool_top
=
@bazel_tools//tools/cpp:toolchain
\
--cpu
=
${
TARGET_ABI
}
\
--cpu
=
${
TARGET_ABI
}
\
...
@@ -44,18 +44,18 @@ bazel build --verbose_failures -c opt --strip always benchmark:model_throughput_
...
@@ -44,18 +44,18 @@ bazel build --verbose_failures -c opt --strip always benchmark:model_throughput_
--define
openmp
=
true
\
--define
openmp
=
true
\
--copt
=
"-O3"
||
exit
1
--copt
=
"-O3"
||
exit
1
rm
benchmark/libmace_merged.a
rm
mace/
benchmark/libmace_merged.a
adb shell
"mkdir -p
${
PHONE_DATA_DIR
}
"
||
exit
1
adb shell
"mkdir -p
${
PHONE_DATA_DIR
}
"
||
exit
1
adb push
${
MODEL_INPUT_DIR
}
/
${
INPUT_FILE_NAME
}
${
PHONE_DATA_DIR
}
||
exit
1
adb push
${
MODEL_INPUT_DIR
}
/
${
INPUT_FILE_NAME
}
_
${
INPUT_NODES
}
${
PHONE_DATA_DIR
}
||
exit
1
adb push bazel-bin/benchmark/model_throughput_test
${
PHONE_DATA_DIR
}
||
exit
1
adb push bazel-bin/
mace/
benchmark/model_throughput_test
${
PHONE_DATA_DIR
}
||
exit
1
if
[
"
$EMBED_MODEL_DATA
"
=
0
]
;
then
if
[
"
$EMBED_MODEL_DATA
"
=
0
]
;
then
adb push codegen/models/
${
CPU_MODEL_TAG
}
/
${
CPU_MODEL_TAG
}
.data
${
PHONE_DATA_DIR
}
||
exit
1
adb push codegen/models/
${
CPU_MODEL_TAG
}
/
${
CPU_MODEL_TAG
}
.data
${
PHONE_DATA_DIR
}
||
exit
1
adb push codegen/models/
${
GPU_MODEL_TAG
}
/
${
GPU_MODEL_TAG
}
.data
${
PHONE_DATA_DIR
}
||
exit
1
adb push codegen/models/
${
GPU_MODEL_TAG
}
/
${
GPU_MODEL_TAG
}
.data
${
PHONE_DATA_DIR
}
||
exit
1
adb push codegen/models/
${
DSP_MODEL_TAG
}
/
${
DSP_MODEL_TAG
}
.data
${
PHONE_DATA_DIR
}
||
exit
1
adb push codegen/models/
${
DSP_MODEL_TAG
}
/
${
DSP_MODEL_TAG
}
.data
${
PHONE_DATA_DIR
}
||
exit
1
fi
fi
adb push
lib
/hexagon/libhexagon_controller.so
${
PHONE_DATA_DIR
}
||
exit
1
adb push
mace/core/runtime
/hexagon/libhexagon_controller.so
${
PHONE_DATA_DIR
}
||
exit
1
adb </dev/null shell
\
adb </dev/null shell
\
LD_LIBRARY_PATH
=
${
PHONE_DATA_DIR
}
\
LD_LIBRARY_PATH
=
${
PHONE_DATA_DIR
}
\
...
@@ -64,9 +64,9 @@ MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \
...
@@ -64,9 +64,9 @@ MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \
MACE_KERNEL_PATH
=
$KERNEL_DIR
\
MACE_KERNEL_PATH
=
$KERNEL_DIR
\
MACE_LIMIT_OPENCL_KERNEL_TIME
=
${
LIMIT_OPENCL_KERNEL_TIME
}
\
MACE_LIMIT_OPENCL_KERNEL_TIME
=
${
LIMIT_OPENCL_KERNEL_TIME
}
\
${
PHONE_DATA_DIR
}
/model_throughput_test
\
${
PHONE_DATA_DIR
}
/model_throughput_test
\
--input_shape
=
"
${
INPUT_SHAPE
}
"
\
--input_shape
=
"
${
INPUT_SHAPE
S
}
"
\
--output_shape
=
"
${
OUTPUT_SHAPE
}
"
\
--output_shape
=
"
${
OUTPUT_SHAPE
S
}
"
\
--input_file
=
${
PHONE_DATA_DIR
}
/
${
INPUT_FILE_NAME
}
\
--input_file
=
${
PHONE_DATA_DIR
}
/
${
INPUT_FILE_NAME
}
_
${
INPUT_NODES
}
\
--cpu_model_data_file
=
${
PHONE_DATA_DIR
}
/
${
CPU_MODEL_TAG
}
.data
\
--cpu_model_data_file
=
${
PHONE_DATA_DIR
}
/
${
CPU_MODEL_TAG
}
.data
\
--gpu_model_data_file
=
${
PHONE_DATA_DIR
}
/
${
GPU_MODEL_TAG
}
.data
\
--gpu_model_data_file
=
${
PHONE_DATA_DIR
}
/
${
GPU_MODEL_TAG
}
.data
\
--dsp_model_data_file
=
${
PHONE_DATA_DIR
}
/
${
DSP_MODEL_TAG
}
.data
\
--dsp_model_data_file
=
${
PHONE_DATA_DIR
}
/
${
DSP_MODEL_TAG
}
.data
\
...
...
tools/mace_tools.py
浏览文件 @
ab4048ed
...
@@ -80,19 +80,22 @@ def build_mace_run(production_mode, model_output_dir, hexagon_mode):
...
@@ -80,19 +80,22 @@ def build_mace_run(production_mode, model_output_dir, hexagon_mode):
run_command
(
command
)
run_command
(
command
)
def
tuning_run
(
model_output_dir
,
running_round
,
tuning
,
production_mode
,
restart_round
):
def
tuning_run
(
model_output_dir
,
running_round
,
tuning
,
production_mode
,
command
=
"bash tools/tuning_run.sh {} {} {} {} {}"
.
format
(
restart_round
,
option_args
=
''
):
model_output_dir
,
running_round
,
int
(
tuning
),
int
(
production_mode
),
restart_round
)
command
=
"bash tools/tuning_run.sh {} {} {} {} {}
\"
{}
\"
"
.
format
(
model_output_dir
,
running_round
,
int
(
tuning
),
int
(
production_mode
),
restart_round
,
option_args
)
run_command
(
command
)
run_command
(
command
)
def
benchmark_model
(
model_output_dir
):
def
benchmark_model
(
model_output_dir
,
option_args
=
''
):
command
=
"bash tools/benchmark.sh {}
"
.
format
(
model_output_dir
)
command
=
"bash tools/benchmark.sh {}
\"
{}
\"
"
.
format
(
model_output_dir
,
option_args
)
run_command
(
command
)
run_command
(
command
)
def
run_model
(
model_output_dir
,
running_round
,
restart_round
):
def
run_model
(
model_output_dir
,
running_round
,
restart_round
,
option_args
):
tuning_run
(
model_output_dir
,
running_round
,
False
,
False
,
restart_round
)
tuning_run
(
model_output_dir
,
running_round
,
False
,
False
,
restart_round
,
option_args
)
def
generate_production_code
(
model_output_dirs
,
pull_or_not
):
def
generate_production_code
(
model_output_dirs
,
pull_or_not
):
...
@@ -204,6 +207,7 @@ def main(unused_args):
...
@@ -204,6 +207,7 @@ def main(unused_args):
os
.
environ
[
"PROJECT_NAME"
]
=
os
.
path
.
splitext
(
os
.
path
.
basename
(
FLAGS
.
config
))[
0
]
os
.
environ
[
"PROJECT_NAME"
]
=
os
.
path
.
splitext
(
os
.
path
.
basename
(
FLAGS
.
config
))[
0
]
generate_opencl_and_version_code
()
generate_opencl_and_version_code
()
option_args
=
' '
.
join
([
arg
for
arg
in
unused_args
if
arg
.
startswith
(
'--'
)])
for
target_abi
in
configs
[
"target_abis"
]:
for
target_abi
in
configs
[
"target_abis"
]:
global_runtime
=
get_global_runtime
(
configs
)
global_runtime
=
get_global_runtime
(
configs
)
...
@@ -255,10 +259,10 @@ def main(unused_args):
...
@@ -255,10 +259,10 @@ def main(unused_args):
build_mace_run_prod
(
model_output_dir
,
FLAGS
.
tuning
,
global_runtime
)
build_mace_run_prod
(
model_output_dir
,
FLAGS
.
tuning
,
global_runtime
)
if
FLAGS
.
mode
==
"run"
or
FLAGS
.
mode
==
"validate"
or
FLAGS
.
mode
==
"all"
:
if
FLAGS
.
mode
==
"run"
or
FLAGS
.
mode
==
"validate"
or
FLAGS
.
mode
==
"all"
:
run_model
(
model_output_dir
,
FLAGS
.
round
,
FLAGS
.
restart_round
)
run_model
(
model_output_dir
,
FLAGS
.
round
,
FLAGS
.
restart_round
,
option_args
)
if
FLAGS
.
mode
==
"benchmark"
:
if
FLAGS
.
mode
==
"benchmark"
:
benchmark_model
(
model_output_dir
)
benchmark_model
(
model_output_dir
,
option_args
)
if
FLAGS
.
mode
==
"validate"
or
FLAGS
.
mode
==
"all"
:
if
FLAGS
.
mode
==
"validate"
or
FLAGS
.
mode
==
"all"
:
validate_model
(
model_output_dir
)
validate_model
(
model_output_dir
)
...
...
tools/tuning_run.sh
浏览文件 @
ab4048ed
...
@@ -17,6 +17,9 @@ ROUND=$2
...
@@ -17,6 +17,9 @@ ROUND=$2
TUNING_OR_NOT
=
$3
TUNING_OR_NOT
=
$3
PRODUCTION_MODE
=
$4
PRODUCTION_MODE
=
$4
RESTART_ROUND
=
$5
RESTART_ROUND
=
$5
OPTION_ARGS
=
$6
echo
$OPTION_ARGS
if
[
x
"
$TARGET_ABI
"
=
x
"host"
]
;
then
if
[
x
"
$TARGET_ABI
"
=
x
"host"
]
;
then
MACE_CPP_MIN_VLOG_LEVEL
=
$VLOG_LEVEL
\
MACE_CPP_MIN_VLOG_LEVEL
=
$VLOG_LEVEL
\
...
@@ -30,7 +33,8 @@ if [ x"$TARGET_ABI" = x"host" ]; then
...
@@ -30,7 +33,8 @@ if [ x"$TARGET_ABI" = x"host" ]; then
--model_data_file
=
${
MODEL_OUTPUT_DIR
}
/
${
MODEL_TAG
}
.data
\
--model_data_file
=
${
MODEL_OUTPUT_DIR
}
/
${
MODEL_TAG
}
.data
\
--device
=
${
DEVICE_TYPE
}
\
--device
=
${
DEVICE_TYPE
}
\
--round
=
1
\
--round
=
1
\
--restart_round
=
1
||
exit
1
--restart_round
=
1
\
$OPTION_ARGS
||
exit
1
else
else
if
[[
"
${
TUNING_OR_NOT
}
"
!=
"0"
&&
"
$PRODUCTION_MODE
"
!=
1
]]
;
then
if
[[
"
${
TUNING_OR_NOT
}
"
!=
"0"
&&
"
$PRODUCTION_MODE
"
!=
1
]]
;
then
tuning_flag
=
1
tuning_flag
=
1
...
@@ -54,9 +58,8 @@ else
...
@@ -54,9 +58,8 @@ else
adb push
${
MODEL_OUTPUT_DIR
}
/
${
MODEL_TAG
}
.data
${
PHONE_DATA_DIR
}
>
/dev/null
||
exit
1
adb push
${
MODEL_OUTPUT_DIR
}
/
${
MODEL_TAG
}
.data
${
PHONE_DATA_DIR
}
>
/dev/null
||
exit
1
fi
fi
adb push mace/core/runtime/hexagon/libhexagon_controller.so
${
PHONE_DATA_DIR
}
>
/dev/null
||
exit
1
adb push mace/core/runtime/hexagon/libhexagon_controller.so
${
PHONE_DATA_DIR
}
>
/dev/null
||
exit
1
mace_adb_output
=
`
adb </dev/null shell
\
ADB_CMD_STR
=
"LD_LIBRARY_PATH=
${
PHONE_DATA_DIR
}
\
"LD_LIBRARY_PATH=
${
PHONE_DATA_DIR
}
\
MACE_TUNING=
${
tuning_flag
}
\
MACE_TUNING=
${
tuning_flag
}
\
MACE_CPP_MIN_VLOG_LEVEL=
$VLOG_LEVEL
\
MACE_CPP_MIN_VLOG_LEVEL=
$VLOG_LEVEL
\
MACE_RUN_PARAMETER_PATH=
${
PHONE_DATA_DIR
}
/mace_run.config
\
MACE_RUN_PARAMETER_PATH=
${
PHONE_DATA_DIR
}
/mace_run.config
\
...
@@ -72,7 +75,10 @@ else
...
@@ -72,7 +75,10 @@ else
--model_data_file=
${
PHONE_DATA_DIR
}
/
${
MODEL_TAG
}
.data
\
--model_data_file=
${
PHONE_DATA_DIR
}
/
${
MODEL_TAG
}
.data
\
--device=
${
DEVICE_TYPE
}
\
--device=
${
DEVICE_TYPE
}
\
--round=
$ROUND
\
--round=
$ROUND
\
--restart_round=
$RESTART_ROUND
; echo
\\
$?
"
`
||
exit
1
--restart_round=
$RESTART_ROUND
\
$OPTION_ARGS
; echo
\\
$?
"
echo
$ADB_CMD_STR
mace_adb_output
=
`
adb </dev/null shell
"
$ADB_CMD_STR
"
`
||
exit
1
echo
"
$mace_adb_output
"
|
head
-n
-1
echo
"
$mace_adb_output
"
|
head
-n
-1
mace_adb_return_code
=
`
echo
"
$mace_adb_output
"
|
tail
-1
`
mace_adb_return_code
=
`
echo
"
$mace_adb_output
"
|
tail
-1
`
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录