Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
fd0a439c
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
fd0a439c
编写于
3月 19, 2018
作者:
L
Liangliang He
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'perf_config' into 'master'
fix runtime config See merge request !298
上级
917f19e9
b0d79f5d
变更
11
显示空白变更内容
内联
并排
Showing
11 changed file
with
98 addition
and
53 deletion
+98
-53
.gitlab-ci.yml
.gitlab-ci.yml
+1
-1
mace/benchmark/benchmark_model.cc
mace/benchmark/benchmark_model.cc
+3
-3
mace/core/BUILD
mace/core/BUILD
+1
-0
mace/core/mace.cc
mace/core/mace.cc
+3
-2
mace/core/runtime/cpu/cpu_runtime.cc
mace/core/runtime/cpu/cpu_runtime.cc
+31
-21
mace/core/runtime/cpu/cpu_runtime.h
mace/core/runtime/cpu/cpu_runtime.h
+2
-1
mace/core/runtime/opencl/opencl_runtime.cc
mace/core/runtime/opencl/opencl_runtime.cc
+11
-7
mace/core/runtime/opencl/opencl_runtime.h
mace/core/runtime/opencl/opencl_runtime.h
+2
-2
mace/core/testing/test_benchmark_main.cc
mace/core/testing/test_benchmark_main.cc
+31
-9
mace/examples/mace_run.cc
mace/examples/mace_run.cc
+4
-4
mace/public/mace.h
mace/public/mace.h
+9
-3
未找到文件。
.gitlab-ci.yml
浏览文件 @
fd0a439c
...
...
@@ -23,4 +23,4 @@ ops_benchmark:
only
:
-
master
script
:
-
FAILURE_PATTERN="Aborted" tools/bazel-adb-run.sh //mace/ops:ops_benchmark .*CONV.*
-
FAILURE_PATTERN="Aborted" tools/bazel-adb-run.sh //mace/ops:ops_benchmark
--pattern=
.*CONV.*
mace/benchmark/benchmark_model.cc
浏览文件 @
fd0a439c
...
...
@@ -205,8 +205,8 @@ DEFINE_bool(show_flops, true, "whether to estimate the model's FLOPs");
DEFINE_int32
(
warmup_runs
,
1
,
"how many runs to initialize model"
);
DEFINE_string
(
model_data_file
,
""
,
"model data file name, used when EMBED_MODEL_DATA set to 0"
);
DEFINE_string
(
gpu_type
,
"ADRENO"
,
"ADRENO/MALI"
);
DEFINE_int32
(
gpu_perf_hint
,
2
,
"0:
NA
/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
gpu_priority_hint
,
1
,
"0:
NA
/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
gpu_perf_hint
,
2
,
"0:
DEFAULT
/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
gpu_priority_hint
,
1
,
"0:
DEFAULT
/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
omp_num_threads
,
8
,
"num of openmp threads"
);
DEFINE_int32
(
cpu_power_option
,
0
,
"0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE"
);
...
...
@@ -266,7 +266,7 @@ int Main(int argc, char **argv) {
static_cast
<
GPUPriorityHint
>
(
FLAGS_gpu_priority_hint
));
}
else
if
(
device_type
==
CPU
)
{
mace
::
Config
CPURuntime
(
mace
::
Config
OmpThreadsAndAffinity
(
FLAGS_omp_num_threads
,
static_cast
<
CPUPowerOption
>
(
FLAGS_cpu_power_option
));
}
...
...
mace/core/BUILD
浏览文件 @
fd0a439c
...
...
@@ -85,6 +85,7 @@ cc_library(
],
deps
=
[
":core"
,
"//external:gflags_nothreads"
,
"//mace/utils"
,
],
alwayslink
=
1
,
...
...
mace/core/mace.cc
浏览文件 @
fd0a439c
...
...
@@ -360,10 +360,11 @@ void ConfigOpenCLRuntime(GPUType gpu_type,
OpenCLRuntime
::
CreateGlobal
(
gpu_type
,
gpu_perf_hint
,
gpu_priority_hint
);
}
void
ConfigCPURuntime
(
int
omp_num_threads
,
CPUPowerOption
power_option
)
{
void
ConfigOmpThreadsAndAffinity
(
int
omp_num_threads
,
CPUPowerOption
power_option
)
{
LOG
(
INFO
)
<<
"Config CPU Runtime: omp_num_threads: "
<<
omp_num_threads
<<
", cpu_power_option: "
<<
power_option
;
Set
CPURuntime
(
omp_num_threads
,
power_option
);
Set
OmpThreadsAndAffinity
(
omp_num_threads
,
power_option
);
}
// Mace Engine
...
...
mace/core/runtime/cpu/cpu_runtime.cc
浏览文件 @
fd0a439c
...
...
@@ -20,7 +20,7 @@ int GetCPUMaxFreq(int cpu_id) {
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq"
,
cpu_id
);
FILE
*
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
return
0
;
MACE_CHECK
(
fp
,
"File: "
,
path
,
" not exists"
)
;
int
freq
=
0
;
fscanf
(
fp
,
"%d"
,
&
freq
);
...
...
@@ -28,7 +28,7 @@ int GetCPUMaxFreq(int cpu_id) {
return
freq
;
}
void
SortCPUIdsByMaxFreqAsc
(
std
::
vector
<
int
>
*
cpu_ids
)
{
void
SortCPUIdsByMaxFreqAsc
(
std
::
vector
<
int
>
*
cpu_ids
,
int
*
big_core_offset
)
{
MACE_CHECK_NOTNULL
(
cpu_ids
);
int
cpu_count
=
cpu_ids
->
size
();
std
::
vector
<
int
>
cpu_max_freq
;
...
...
@@ -54,51 +54,61 @@ void SortCPUIdsByMaxFreqAsc(std::vector<int> *cpu_ids) {
}
}
}
*
big_core_offset
=
0
;
for
(
int
i
=
1
;
i
<
cpu_count
;
++
i
)
{
if
(
cpu_max_freq
[
i
]
>
cpu_max_freq
[
i
-
1
])
{
*
big_core_offset
=
i
;
break
;
}
}
}
void
SetThreadAffinity
(
cpu_set_t
mask
)
{
int
sys_call_res
;
pid_t
pid
=
gettid
();
// TODO(chenghui): when set omp num threads to 1,
// sometiomes return EINVAL(22) error.
// https://linux.die.net/man/2/sched_setaffinity
sys_call_res
=
syscall
(
__NR_sched_setaffinity
,
pid
,
sizeof
(
mask
),
&
mask
);
if
(
sys_call_res
!=
0
)
{
LOG
(
FATAL
)
<<
"syscall setaffinity error: "
<<
sys_call_res
<<
' '
<<
errno
;
}
int
err
=
sched_setaffinity
(
pid
,
sizeof
(
mask
),
&
mask
);
MACE_CHECK
(
err
==
0
,
"set affinity error: "
,
errno
);
}
}
// namespace
void
SetCPURuntime
(
int
omp_num_threads
,
CPUPowerOption
power_option
)
{
void
SetOmpThreadsAndAffinity
(
int
omp_num_threads
,
CPUPowerOption
power_option
)
{
int
cpu_count
=
omp_get_num_procs
();
LOG
(
INFO
)
<<
"cpu_count: "
<<
cpu_count
;
std
::
vector
<
int
>
sorted_cpu_ids
;
sorted_cpu_ids
.
resize
(
cpu_count
);
SortCPUIdsByMaxFreqAsc
(
&
sorted_cpu_ids
);
int
big_core_offset
;
SortCPUIdsByMaxFreqAsc
(
&
sorted_cpu_ids
,
&
big_core_offset
);
std
::
vector
<
int
>
use_cpu_ids
;
if
(
power_option
==
CPUPowerOption
::
DEFAULT
||
omp_num_threads
>=
cpu_count
)
{
if
(
power_option
==
CPUPowerOption
::
DEFAULT
)
{
use_cpu_ids
=
sorted_cpu_ids
;
omp_num_threads
=
cpu_count
;
}
else
if
(
power_option
==
CPUPowerOption
::
HIGH_PERFORMANCE
)
{
use_cpu_ids
=
std
::
vector
<
int
>
(
sorted_cpu_ids
.
begin
()
+
cpu_count
-
omp_num_threads
,
use_cpu_ids
=
std
::
vector
<
int
>
(
sorted_cpu_ids
.
begin
()
+
big_core_offset
,
sorted_cpu_ids
.
end
());
}
else
{
if
(
big_core_offset
>
0
)
{
use_cpu_ids
=
std
::
vector
<
int
>
(
sorted_cpu_ids
.
begin
(),
sorted_cpu_ids
.
begin
()
+
omp_num_threads
);
sorted_cpu_ids
.
begin
()
+
big_core_offset
);
}
else
{
use_cpu_ids
=
sorted_cpu_ids
;
}
}
if
(
omp_num_threads
>
use_cpu_ids
.
size
())
{
LOG
(
WARNING
)
<<
"set omp num threads greater than num of cpus can use: "
<<
use_cpu_ids
.
size
();
}
omp_set_num_threads
(
omp_num_threads
);
// compute mask
cpu_set_t
mask
;
CPU_ZERO
(
&
mask
);
for
(
auto
cpu_id
:
use_cpu_ids
)
{
CPU_SET
(
cpu_id
,
&
mask
);
}
LOG
(
INFO
)
<<
"use cpus
mask: "
<<
mask
.
__bits
[
0
];
VLOG
(
3
)
<<
"Set cpu affinity with
mask: "
<<
mask
.
__bits
[
0
];
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
omp_num_threads
;
++
i
)
{
...
...
mace/core/runtime/cpu/cpu_runtime.h
浏览文件 @
fd0a439c
...
...
@@ -10,7 +10,8 @@
namespace
mace
{
void
SetCPURuntime
(
int
omp_num_threads
,
CPUPowerOption
power_option
);
void
SetOmpThreadsAndAffinity
(
int
omp_num_threads
,
CPUPowerOption
power_option
);
}
...
...
mace/core/runtime/opencl/opencl_runtime.cc
浏览文件 @
fd0a439c
...
...
@@ -65,20 +65,24 @@ void OpenCLProfilingTimer::ClearTiming() {
accumulated_micros_
=
0
;
}
std
::
unique_ptr
<
OpenCLRuntime
>
OpenCLRuntime
::
runtime_instance_
=
nullptr
;
OpenCLRuntime
*
OpenCLRuntime
::
Global
()
{
if
(
opencl_runtime_instance
==
nullptr
)
{
return
CreateGlobal
(
GPUType
::
ADRENO
,
GPUPerfHint
::
PERF_NORMAL
,
GPUPriorityHint
::
PRIORITY_LOW
);
// FIXME: not thread safe
if
(
runtime_instance_
==
nullptr
)
{
return
CreateGlobal
(
GPUType
::
ADRENO
,
GPUPerfHint
::
PERF_DEFAULT
,
GPUPriorityHint
::
PRIORITY_DEFAULT
);
}
return
opencl_runtime_instance
;
return
runtime_instance_
.
get
()
;
}
OpenCLRuntime
*
OpenCLRuntime
::
CreateGlobal
(
GPUType
gpu_type
,
GPUPerfHint
gpu_perf_hint
,
GPUPriorityHint
gpu_priority_hint
)
{
opencl_runtime_instance
=
new
OpenCLRuntime
(
gpu_type
,
gpu_perf_hint
,
gpu_priority_hint
);
return
opencl_runtime_instance
;
runtime_instance_
=
std
::
unique_ptr
<
OpenCLRuntime
>
(
new
OpenCLRuntime
(
gpu_type
,
gpu_perf_hint
,
gpu_priority_hint
));
return
runtime_instance_
.
get
();
}
void
ParseOpenCLRuntimeConfig
(
std
::
vector
<
cl_context_properties
>
*
properties
,
...
...
mace/core/runtime/opencl/opencl_runtime.h
浏览文件 @
fd0a439c
...
...
@@ -52,10 +52,10 @@ class OpenCLRuntime {
cl
::
Kernel
BuildKernel
(
const
std
::
string
&
program_name
,
const
std
::
string
&
kernel_name
,
const
std
::
set
<
std
::
string
>
&
build_options
);
~
OpenCLRuntime
();
private:
OpenCLRuntime
(
GPUType
,
GPUPerfHint
,
GPUPriorityHint
);
~
OpenCLRuntime
();
OpenCLRuntime
(
const
OpenCLRuntime
&
)
=
delete
;
OpenCLRuntime
&
operator
=
(
const
OpenCLRuntime
&
)
=
delete
;
...
...
@@ -74,9 +74,9 @@ class OpenCLRuntime {
std
::
map
<
std
::
string
,
cl
::
Program
>
built_program_map_
;
std
::
mutex
program_build_mutex_
;
std
::
string
kernel_path_
;
static
std
::
unique_ptr
<
OpenCLRuntime
>
runtime_instance_
;
};
static
OpenCLRuntime
*
opencl_runtime_instance
=
nullptr
;
}
// namespace mace
#endif // MACE_CORE_RUNTIME_OPENCL_OPENCL_RUNTIME_H_
mace/core/testing/test_benchmark_main.cc
浏览文件 @
fd0a439c
...
...
@@ -4,20 +4,42 @@
#include <iostream>
#include "gflags/gflags.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/public/mace.h"
int
main
(
int
argc
,
char
**
argv
)
{
std
::
cout
<<
"Running main() from test_main.cc
\n
"
;
mace
::
ConfigCPURuntime
(
4
,
mace
::
CPUPowerOption
::
HIGH_PERFORMANCE
);
mace
::
ConfigOpenCLRuntime
(
mace
::
GPUType
::
ADRENO
,
mace
::
GPUPerfHint
::
PERF_HIGH
,
mace
::
GPUPriorityHint
::
PRIORITY_HIGH
);
DEFINE_string
(
pattern
,
"all"
,
"op benchmark pattern, eg:.*CONV.*"
);
DEFINE_string
(
gpu_type
,
"ADRENO"
,
"ADRENO/MALI"
);
DEFINE_int32
(
gpu_perf_hint
,
3
,
"0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
gpu_priority_hint
,
3
,
"0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
omp_num_threads
,
1
,
"num of openmp threads"
);
DEFINE_int32
(
cpu_power_option
,
1
,
"0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE"
);
if
(
argc
==
2
)
{
mace
::
testing
::
Benchmark
::
Run
(
argv
[
1
]);
mace
::
GPUType
ParseGPUType
(
const
std
::
string
&
gpu_type_str
)
{
if
(
gpu_type_str
.
compare
(
"ADRENO"
)
==
0
)
{
return
mace
::
GPUType
::
ADRENO
;
}
else
if
(
gpu_type_str
.
compare
(
"MALI"
)
==
0
)
{
return
mace
::
GPUType
::
MALI
;
}
else
{
mace
::
testing
::
Benchmark
::
Run
(
"all"
)
;
return
mace
::
GPUType
::
ADRENO
;
}
}
int
main
(
int
argc
,
char
**
argv
)
{
gflags
::
SetUsageMessage
(
"some usage message"
);
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
true
);
// config runtime
mace
::
GPUType
gpu_type
=
ParseGPUType
(
FLAGS_gpu_type
);
mace
::
ConfigOpenCLRuntime
(
gpu_type
,
static_cast
<
mace
::
GPUPerfHint
>
(
FLAGS_gpu_perf_hint
),
static_cast
<
mace
::
GPUPriorityHint
>
(
FLAGS_gpu_priority_hint
));
mace
::
ConfigOmpThreadsAndAffinity
(
FLAGS_omp_num_threads
,
static_cast
<
mace
::
CPUPowerOption
>
(
FLAGS_cpu_power_option
));
mace
::
testing
::
Benchmark
::
Run
(
FLAGS_pattern
.
c_str
());
return
0
;
}
mace/examples/mace_run.cc
浏览文件 @
fd0a439c
...
...
@@ -171,8 +171,8 @@ DEFINE_int32(round, 1, "round");
DEFINE_int32
(
restart_round
,
1
,
"restart round"
);
DEFINE_int32
(
malloc_check_cycle
,
-
1
,
"malloc debug check cycle, -1 to disable"
);
DEFINE_string
(
gpu_type
,
"ADRENO"
,
"ADRENO/MALI"
);
DEFINE_int32
(
gpu_perf_hint
,
2
,
"0:
NA
/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
gpu_priority_hint
,
1
,
"0:
NA
/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
gpu_perf_hint
,
2
,
"0:
DEFAULT
/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
gpu_priority_hint
,
1
,
"0:
DEFAULT
/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
omp_num_threads
,
8
,
"num of openmp threads"
);
DEFINE_int32
(
cpu_power_option
,
0
,
"0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE"
);
...
...
@@ -199,7 +199,7 @@ bool SingleInputAndOutput(const std::vector<int64_t> &input_shape,
static_cast
<
GPUPriorityHint
>
(
FLAGS_gpu_priority_hint
));
}
else
if
(
device_type
==
DeviceType
::
CPU
)
{
mace
::
Config
CPURuntime
(
mace
::
Config
OmpThreadsAndAffinity
(
FLAGS_omp_num_threads
,
static_cast
<
CPUPowerOption
>
(
FLAGS_cpu_power_option
));
}
...
...
@@ -304,7 +304,7 @@ bool MultipleInputOrOutput(const std::vector<std::string> &input_names,
static_cast
<
GPUPriorityHint
>
(
FLAGS_gpu_priority_hint
));
}
else
if
(
device_type
==
DeviceType
::
CPU
)
{
mace
::
Config
CPURuntime
(
mace
::
Config
OmpThreadsAndAffinity
(
FLAGS_omp_num_threads
,
static_cast
<
CPUPowerOption
>
(
FLAGS_cpu_power_option
));
}
...
...
mace/public/mace.h
浏览文件 @
fd0a439c
...
...
@@ -62,9 +62,14 @@ enum DataType {
};
enum
GPUType
{
ADRENO
=
0
,
MALI
=
1
};
enum
GPUPerfHint
{
PERF_NA
=
0
,
PERF_LOW
=
1
,
PERF_NORMAL
=
2
,
PERF_HIGH
=
3
};
enum
GPUPerfHint
{
PERF_DEFAULT
=
0
,
PERF_LOW
=
1
,
PERF_NORMAL
=
2
,
PERF_HIGH
=
3
};
enum
GPUPriorityHint
{
PRIORITY_
NA
=
0
,
PRIORITY_
DEFAULT
=
0
,
PRIORITY_LOW
=
1
,
PRIORITY_NORMAL
=
2
,
PRIORITY_HIGH
=
3
...
...
@@ -381,7 +386,8 @@ struct MaceInputInfo {
};
void
ConfigOpenCLRuntime
(
GPUType
,
GPUPerfHint
,
GPUPriorityHint
);
void
ConfigCPURuntime
(
int
omp_num_threads
,
CPUPowerOption
power_option
);
void
ConfigOmpThreadsAndAffinity
(
int
omp_num_threads
,
CPUPowerOption
power_option
);
class
MaceEngine
{
public:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录