Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
f617e03d
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
f617e03d
编写于
4月 13, 2018
作者:
吴
吴承辉
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'mace_runtime_doc' into 'master'
Update comments for mace_tuntime.h See merge request !382
上级
e581e3cf
a566195e
变更
6
显示空白变更内容
内联
并排
Showing
6 changed file
with
44 addition
and
32 deletion
+44
-32
mace/benchmark/benchmark_model.cc
mace/benchmark/benchmark_model.cc
+1
-1
mace/core/runtime/cpu/cpu_runtime.cc
mace/core/runtime/cpu/cpu_runtime.cc
+8
-9
mace/core/testing/test_benchmark_main.cc
mace/core/testing/test_benchmark_main.cc
+1
-1
mace/examples/example.cc
mace/examples/example.cc
+1
-1
mace/public/mace_runtime.h
mace/public/mace_runtime.h
+32
-19
mace/tools/validation/mace_run.cc
mace/tools/validation/mace_run.cc
+1
-1
未找到文件。
mace/benchmark/benchmark_model.cc
浏览文件 @
f617e03d
...
...
@@ -219,7 +219,7 @@ DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32
(
gpu_priority_hint
,
3
,
"0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
omp_num_threads
,
-
1
,
"num of openmp threads"
);
DEFINE_int32
(
cpu_affinity_policy
,
1
,
"0:AFFINITY_
DEFAULT
/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"
);
"0:AFFINITY_
NONE
/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"
);
int
Main
(
int
argc
,
char
**
argv
)
{
MACE_CHECK
(
FLAGS_device
!=
"HEXAGON"
,
...
...
mace/core/runtime/cpu/cpu_runtime.cc
浏览文件 @
f617e03d
...
...
@@ -121,10 +121,8 @@ MaceStatus GetCPUBigLittleCoreIDs(std::vector<int> *big_core_ids,
void
SetOpenMPThreadsAndAffinityCPUs
(
int
omp_num_threads
,
const
std
::
vector
<
int
>
&
cpu_ids
)
{
std
::
ostringstream
oss
;
for
(
auto
cpu_id
:
cpu_ids
)
oss
<<
cpu_id
<<
' '
;
VLOG
(
1
)
<<
"Set CPU openmp num_threads: "
<<
omp_num_threads
<<
", cpu_ids: "
<<
oss
.
str
();
VLOG
(
1
)
<<
"Set OpenMP threads number: "
<<
omp_num_threads
<<
", CPU core IDs: "
<<
MakeString
(
cpu_ids
);
omp_set_num_threads
(
omp_num_threads
);
...
...
@@ -134,7 +132,6 @@ void SetOpenMPThreadsAndAffinityCPUs(int omp_num_threads,
for
(
auto
cpu_id
:
cpu_ids
)
{
CPU_SET
(
cpu_id
,
&
mask
);
}
VLOG
(
3
)
<<
"Set cpu affinity with mask: "
<<
mask
.
__bits
[
0
];
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
omp_num_threads
;
++
i
)
{
...
...
@@ -144,9 +141,10 @@ void SetOpenMPThreadsAndAffinityCPUs(int omp_num_threads,
MaceStatus
SetOpenMPThreadsAndAffinityPolicy
(
int
omp_num_threads_hint
,
CPUAffinityPolicy
policy
)
{
// There is no need to set affinity in default mode
if
(
policy
==
CPUAffinityPolicy
::
AFFINITY_DEFAULT
)
{
if
(
omp_num_threads_hint
>
0
)
omp_set_num_threads
(
omp_num_threads_hint
);
if
(
policy
==
CPUAffinityPolicy
::
AFFINITY_NONE
)
{
if
(
omp_num_threads_hint
>
0
)
{
omp_set_num_threads
(
std
::
min
(
omp_num_threads_hint
,
omp_get_num_procs
()));
}
return
MACE_SUCCESS
;
}
...
...
@@ -164,7 +162,8 @@ MaceStatus SetOpenMPThreadsAndAffinityPolicy(int omp_num_threads_hint,
use_cpu_ids
=
std
::
move
(
little_core_ids
);
}
if
(
omp_num_threads_hint
<
0
)
{
if
(
omp_num_threads_hint
<=
0
||
omp_num_threads_hint
>
use_cpu_ids
.
size
())
{
omp_num_threads_hint
=
use_cpu_ids
.
size
();
}
SetOpenMPThreadsAndAffinityCPUs
(
omp_num_threads_hint
,
use_cpu_ids
);
...
...
mace/core/testing/test_benchmark_main.cc
浏览文件 @
f617e03d
...
...
@@ -14,7 +14,7 @@ DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32
(
gpu_priority_hint
,
3
,
"0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
omp_num_threads
,
-
1
,
"num of openmp threads"
);
DEFINE_int32
(
cpu_affinity_policy
,
1
,
"0:AFFINITY_
DEFAULT
/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"
);
"0:AFFINITY_
NONE
/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"
);
int
main
(
int
argc
,
char
**
argv
)
{
gflags
::
SetUsageMessage
(
"some usage message"
);
...
...
mace/examples/example.cc
浏览文件 @
f617e03d
...
...
@@ -130,7 +130,7 @@ DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32
(
gpu_priority_hint
,
3
,
"0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
omp_num_threads
,
-
1
,
"num of openmp threads"
);
DEFINE_int32
(
cpu_affinity_policy
,
1
,
"0:AFFINITY_
DEFAULT
/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"
);
"0:AFFINITY_
NONE
/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"
);
bool
RunModel
(
const
std
::
vector
<
std
::
string
>
&
input_names
,
const
std
::
vector
<
std
::
vector
<
int64_t
>>
&
input_shapes
,
...
...
mace/public/mace_runtime.h
浏览文件 @
f617e03d
...
...
@@ -32,7 +32,7 @@ enum GPUPriorityHint {
};
enum
CPUAffinityPolicy
{
AFFINITY_
DEFAULT
=
0
,
AFFINITY_
NONE
=
0
,
AFFINITY_BIG_ONLY
=
1
,
AFFINITY_LITTLE_ONLY
=
2
,
};
...
...
@@ -66,39 +66,52 @@ class FileStorageFactory : public KVStorageFactory {
std
::
unique_ptr
<
Impl
>
impl_
;
};
// Set KV store factory used as OpenCL cache
// Set KV store factory used as OpenCL cache
.
void
SetKVStorageFactory
(
std
::
shared_ptr
<
KVStorageFactory
>
storage_factory
);
// Set GPU hints, currently only supports Adreno GPU
// Set GPU hints, currently only supports Adreno GPU.
//
// Caution: this function may hurt performance if improper parameters provided.
void
SetGPUHints
(
GPUPerfHint
perf_hint
,
GPUPriorityHint
priority_hint
);
// Set OpenMP threads number and affinity policy.
//
// num_threads_hint is only a hint, the function can change it when it's larger
// than 0. When num_threads_hint is not positive, the function will set the
// threads number equaling to the number of big + little, big or little cores
// according to the policy.
// Caution: this function may hurt performance if improper parameters provided.
//
// num_threads_hint is only a hint. When num_threads_hint is zero or negative,
// the function will set the threads number equaling to the number of
// big (AFFINITY_BIG_ONLY), little (AFFINITY_LITTLE_ONLY) or all
// (AFFINITY_NONE) cores according to the policy. The threads number will
// also be truncated to the corresponding cores number when num_threads_hint
// is larger than it.
//
// The OpenMP threads will be bind to (via sched_setaffinity) big cores
// (AFFINITY_BIG_ONLY) and little cores (AFFINITY_LITTLE_ONLY).
//
//
This function may not work well on some ships (e.g. MTK), and in such
//
cases (when it returns error MACE_INVALID_ARGS) you may try to use
//
SetOpenMPThreadAffinity to set affinity manually, or just set default policy
.
//
If successful, it returns MACE_SUCCESS and error if it can't reliabley
//
detect big-LITTLE cores (see GetBigLittleCoreIDs). In such cases, it's
//
suggested to use AFFINITY_NONE to use all cores
.
MaceStatus
SetOpenMPThreadPolicy
(
int
num_threads_hint
,
CPUAffinityPolicy
policy
);
// Set OpenMP threads number and processor affinity
// This function may not work well on some chips (e.g. MTK). Set thread affinity
// to offline cores may fail or run unexpectedly. In such cases, please use
// SetOpenMPThreadPolicy with default policy instead.
// Set OpenMP threads number and processor affinity.
//
// Caution: this function may hurt performance if improper parameters provided.
//
// This function may not work well on some chips (e.g. MTK). Setting thread
// affinity to offline cores may run very slow or unexpectedly. In such cases,
// please use SetOpenMPThreadPolicy with default policy instead.
void
SetOpenMPThreadAffinity
(
int
num_threads
,
const
std
::
vector
<
int
>
&
cpu_ids
);
// Get ARM big.LITTLE configuration.
//
// This function may not work well on some chips (e.g. MTK) and miss the
// offline cores, and the user should detect the configurations manually
// in such case(when it returns error MACE_INVALID_ARGS).
// This function will detect the max frequencies of all CPU cores, and assume
// the cores with largest max frequencies as big cores, and all the remaining
// cores as little. If all cpu core's max frequencies equals, big_core_ids and
// little_core_ids will both be filled with all cpu core ids.
//
// If
all cpu's frequencies are equal(i.e. all cores are the same),
//
big_core_ids and little_core_ids will be set to all cpu ids
.
// If
successful, it returns MACE_SUCCESS and error if it can't reliabley
//
detect the frequency of big-LITTLE cores (e.g. MTK)
.
MaceStatus
GetBigLittleCoreIDs
(
std
::
vector
<
int
>
*
big_core_ids
,
std
::
vector
<
int
>
*
little_core_ids
);
...
...
mace/tools/validation/mace_run.cc
浏览文件 @
f617e03d
...
...
@@ -193,7 +193,7 @@ DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32
(
gpu_priority_hint
,
3
,
"0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
omp_num_threads
,
-
1
,
"num of openmp threads"
);
DEFINE_int32
(
cpu_affinity_policy
,
1
,
"0:AFFINITY_
DEFAULT
/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"
);
"0:AFFINITY_
NONE
/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"
);
bool
RunModel
(
const
std
::
vector
<
std
::
string
>
&
input_names
,
const
std
::
vector
<
std
::
vector
<
int64_t
>>
&
input_shapes
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录