Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
ce46ef22
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
接近 2 年 前同步成功
通知
706
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ce46ef22
编写于
6月 20, 2019
作者:
H
hong19860320
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
ARM cpu_info refine
test=develop
上级
251255bc
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
773 addition
and
724 deletion
+773
-724
paddle/fluid/lite/core/context.h
paddle/fluid/lite/core/context.h
+4
-5
paddle/fluid/lite/core/cpu_info.cc
paddle/fluid/lite/core/cpu_info.cc
+726
-658
paddle/fluid/lite/core/cpu_info.h
paddle/fluid/lite/core/cpu_info.h
+43
-61
未找到文件。
paddle/fluid/lite/core/context.h
浏览文件 @
ce46ef22
...
...
@@ -67,7 +67,7 @@ class Context<TargetType::kARM> {
ARMContext
&
operator
=
(
const
ARMContext
&
ctx
)
{}
// NOTE: InitOnce should only be used by ContextScheduler
void
InitOnce
()
{}
void
InitOnce
()
{
DeviceInfo
::
Init
();
}
void
CopyShared
(
const
ARMContext
*
ctx
)
{}
...
...
@@ -78,20 +78,19 @@ class Context<TargetType::kARM> {
return
DeviceInfo
::
Global
().
SetCache
(
l1size
,
l2size
,
l3size
);
}
void
SetArch
(
ARMArch
arch
)
{
return
DeviceInfo
::
Global
().
SetArch
(
arch
);
}
void
BindDev
()
{
return
DeviceInfo
::
Global
().
BindDev
();
}
PowerMode
mode
()
const
{
return
DeviceInfo
::
Global
().
mode
();
}
int
threads
()
const
{
return
DeviceInfo
::
Global
().
threads
();
}
ARMArch
arch
()
const
{
return
DeviceInfo
::
Global
().
arch
();
}
int
l1_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l1_cache_size
();
}
int
l2_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l2_cache_size
();
}
int
l3_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l3_cache_size
();
}
template
<
typename
T
>
T
*
workspace_data
()
{
return
DeviceInfo
::
Global
().
workspace_data
<
T
>
();
}
int
l1_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l1_cache_size
();
}
int
l2_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l2_cache_size
();
}
int
l3_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l3_cache_size
();
}
bool
ExtendWorkspace
(
DDimLite
dims
)
{
return
DeviceInfo
::
Global
().
ExtendWorkspace
(
dims
);
}
...
...
paddle/fluid/lite/core/cpu_info.cc
浏览文件 @
ce46ef22
...
...
@@ -29,7 +29,8 @@
#include <omp.h>
#endif
#include <cstdarg>
#include <algorithm>
#include <limits>
#include "paddle/fluid/lite/core/cpu_info.h"
namespace
paddle
{
...
...
@@ -37,549 +38,55 @@ namespace lite {
#ifdef LITE_WITH_ARM
void
DeviceInfo
::
InitInternal
(
DeviceInfo
*
dev
)
{
set_default_cache
(
dev
);
dev
->
compute_core_num_
=
arm_get_cpucount
();
dev
->
max_memory_
=
arm_get_meminfo
();
// get max freq
#ifdef LITE_WITH_LINUX
std
::
vector
<
int
>
max_freq
(
dev
->
compute_core_num_
);
for
(
int
i
=
0
;
i
<
dev
->
compute_core_num_
;
++
i
)
{
max_freq
[
i
]
=
get_max_freq_khz
(
i
)
/
1000
;
}
std
::
string
cpu_name
=
arm_get_cpu_name
();
if
(
get_cpu_info_from_name
(
dev
,
cpu_name
)
!=
true
)
{
arm_sort_cpuid_by_max_frequency
(
dev
->
compute_core_num_
,
&
dev
->
core_ids_
,
max_freq
,
&
dev
->
cluster_ids_
);
dev
->
big_core_ids_
.
clear
();
dev
->
little_core_ids_
.
clear
();
for
(
int
i
=
0
;
i
<
dev
->
cluster_ids_
.
size
();
++
i
)
{
if
(
dev
->
cluster_ids_
[
i
]
==
0
)
{
dev
->
big_core_ids_
.
push_back
(
dev
->
core_ids_
[
i
]);
}
else
{
dev
->
little_core_ids_
.
push_back
(
dev
->
core_ids_
[
i
]);
}
}
arm_get_cpu_arch
(
&
dev
->
archs_
);
}
LOG
(
INFO
)
<<
"ARM multiprocessors number: "
<<
dev
->
compute_core_num_
;
for
(
int
i
=
0
;
i
<
dev
->
compute_core_num_
;
++
i
)
{
LOG
(
INFO
)
<<
"ARM multiprocessors ID: "
<<
dev
->
core_ids_
[
i
]
<<
", frequence: "
<<
max_freq
[
i
]
<<
", cluster ID: "
<<
dev
->
cluster_ids_
[
dev
->
core_ids_
[
i
]]
<<
", CPU ARCH: A"
<<
dev
->
archs_
[
i
];
}
VLOG
(
1
)
<<
"L1 DataCache size is: "
;
for
(
int
i
=
0
;
i
<
dev
->
compute_core_num_
;
++
i
)
{
VLOG
(
1
)
<<
dev
->
L1_cache_
[
i
]
/
1024
<<
" KB"
;
}
VLOG
(
1
)
<<
"L2 Cache size is: "
;
for
(
int
i
=
0
;
i
<
dev
->
compute_core_num_
;
++
i
)
{
VLOG
(
1
)
<<
dev
->
L2_cache_
[
i
]
/
1024
<<
" KB"
;
}
VLOG
(
1
)
<<
"Total memory: "
<<
dev
->
max_memory_
<<
"KB"
;
dev
->
max_freq_
=
max_freq
[
0
];
for
(
int
j
=
1
;
j
<
dev
->
compute_core_num_
;
++
j
)
{
if
(
dev
->
max_freq_
<
max_freq
[
j
])
{
dev
->
max_freq_
=
max_freq
[
j
];
}
}
#elif defined(TARGET_IOS)
arm_get_cpu_arch
(
&
dev
->
archs_
);
#endif
dev
->
active_ids_
=
{
0
};
dev
->
mode_
=
LITE_POWER_HIGH
;
dev
->
workspace_
.
Resize
({
static_cast
<
int64_t
>
(
dev
->
L2_cache_
[
dev
->
active_ids_
[
0
]]
/
sizeof
(
float
))});
#ifdef TARGET_IOS
dev
->
arch_
=
APPLE
;
// use 6x8
const
int
DEFAULT_L1_CACHE_SIZE
=
64
*
1024
;
const
int
DEFAULT_L2_CACHE_SIZE
=
2048
*
1024
;
const
int
DEFAULT_L3_CACHE_SIZE
=
0
;
#else
if
(
dev
->
big_core_ids_
.
size
()
>
0
)
{
dev
->
arch_
=
dev
->
archs_
[
dev
->
big_core_ids_
[
0
]]
;
}
const
int
DEFAULT_L1_CACHE_SIZE
=
32
*
1024
;
const
int
DEFAULT_L2_CACHE_SIZE
=
512
*
1024
;
const
int
DEFAULT_L3_CACHE_SIZE
=
0
;
#endif
}
void
DeviceInfo
::
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
)
{
int
cpu_count
=
arm_get_cpucount
();
L1_cache_
.
resize
(
cpu_count
);
L2_cache_
.
resize
(
cpu_count
);
L3_cache_
.
resize
(
cpu_count
);
for
(
int
i
=
0
;
i
<
cpu_count
;
++
i
)
{
L1_cache_
[
i
]
=
l1size
;
L2_cache_
[
i
]
=
l2size
;
L3_cache_
[
i
]
=
l3size
;
}
workspace_
.
Resize
({
2
*
(
l1size
+
l2size
)});
}
void
DeviceInfo
::
BindDev
()
{
#ifdef ARM_WITH_OMP
int
num_threads
=
active_ids_
.
size
();
omp_set_num_threads
(
num_threads
);
int
get_cpu_num
()
{
#ifdef LITE_WITH_LINUX
std
::
vector
<
int
>
ssarets
;
for
(
int
j
=
0
;
j
<
num_threads
;
++
j
)
{
ssarets
.
push_back
(
0
);
}
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
num_threads
;
i
++
)
{
ssarets
[
i
]
=
set_sched_affinity
(
active_ids_
);
}
for
(
int
i
=
0
;
i
<
num_threads
;
i
++
)
{
if
(
ssarets
[
i
]
!=
0
)
{
LOG
(
ERROR
)
<<
"set cpu affinity failed, cpuID: "
<<
active_ids_
[
i
];
return
;
// get cpu count from /sys/devices/system/cpu/cpunum/uevent
int
max_cpu_num
=
20
;
int
cpu_num
=
0
;
for
(
int
i
=
0
;
i
<
max_cpu_num
;
++
i
)
{
char
path
[
256
];
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/uevent"
,
i
);
FILE
*
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
break
;
}
cpu_num
++
;
fclose
(
fp
);
}
#endif // LITE_WITH_LINUX
#else // ARM_WITH_OMP
#ifdef LITE_WITH_LINUX
std
::
vector
<
int
>
cpuid1
;
cpuid1
.
push_back
(
active_ids_
[
0
]);
int
ssaret
=
set_sched_affinity
(
cpuid1
);
if
(
ssaret
!=
0
)
{
printf
(
"set cpu affinity failed, cpuID: %d
\n
"
,
active_ids_
[
0
]);
return
;
}
#endif // LITE_WITH_LINUX
#endif // ARM_WITH_OMP
}
void
DeviceInfo
::
SetRunMode
(
PowerMode
mode
,
int
threads
)
{
int
big_core_size
=
big_core_ids_
.
size
();
int
small_core_size
=
little_core_ids_
.
size
();
if
(
threads
>
big_core_size
+
small_core_size
)
{
threads
=
big_core_size
+
small_core_size
;
if
(
cpu_num
<
1
)
{
cpu_num
=
1
;
}
#ifdef ARM_WITH_OMP
count_
++
;
int
shift_num
=
(
count_
/
10
)
%
big_core_size
;
switch
(
mode
)
{
case
LITE_POWER_FULL
:
mode_
=
mode
;
active_ids_
.
clear
();
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
if
(
i
<
big_core_size
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
else
{
active_ids_
.
push_back
(
little_core_ids_
[
i
-
big_core_size
]);
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_HIGH
:
active_ids_
.
clear
();
if
(
big_core_size
>
0
)
{
mode_
=
LITE_POWER_HIGH
;
if
(
threads
>
big_core_size
)
{
LOG
(
ERROR
)
<<
"threads: "
<<
threads
<<
", exceed the big cores size: "
<<
big_core_size
;
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
}
else
{
mode_
=
LITE_POWER_LOW
;
LOG
(
ERROR
)
<<
"HIGH POWER MODE is not support, switch to little cores."
;
if
(
threads
>
small_core_size
)
{
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_LOW
:
active_ids_
.
clear
();
if
(
small_core_size
>
0
)
{
mode_
=
LITE_POWER_LOW
;
if
(
threads
>
small_core_size
)
{
LOG
(
WARNING
)
<<
"threads: "
<<
threads
<<
", exceed the little cores size: "
<<
small_core_size
;
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
}
else
{
mode_
=
LITE_POWER_HIGH
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores"
;
if
(
threads
>
big_core_size
)
{
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_NO_BIND
:
mode_
=
LITE_POWER_NO_BIND
;
active_ids_
.
clear
();
if
(
threads
>
core_ids_
.
size
())
{
active_ids_
.
resize
(
core_ids_
.
size
());
}
else
{
active_ids_
.
resize
(
threads
);
}
break
;
case
LITE_POWER_RAND_HIGH
:
active_ids_
.
clear
();
if
(
big_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_HIGH
;
if
(
threads
>
big_core_size
)
{
LOG
(
WARNING
)
<<
"threads: "
<<
threads
<<
", exceed the big cores size: "
<<
big_core_size
;
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[(
i
+
shift_num
)
%
big_core_size
]);
}
}
}
else
{
mode_
=
LITE_POWER_LOW
;
LOG
(
WARNING
)
<<
"HIGH POWER MODE is not support, switch to little cores."
;
if
(
threads
>
small_core_size
)
{
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_RAND_LOW
:
active_ids_
.
clear
();
if
(
small_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_LOW
;
if
(
threads
>
small_core_size
)
{
LOG
(
WARNING
)
<<
"threads: "
<<
threads
<<
", exceed the little cores size: "
<<
small_core_size
;
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[(
i
+
shift_num
)
%
small_core_size
]);
}
}
}
else
{
mode_
=
LITE_POWER_HIGH
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores."
;
if
(
threads
>
big_core_size
)
{
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
}
//! fix multi-threads LITE_POWER_HIGH mode
if
(
mode_
==
LITE_POWER_NO_BIND
||
threads
>
1
)
{
int
threads
=
active_ids_
.
size
();
omp_set_num_threads
(
threads
);
}
else
{
if
(
check_online
(
active_ids_
))
{
BindDev
();
}
else
{
LOG
(
WARNING
)
<<
"core id "
<<
active_ids_
[
0
]
<<
" is offline, switch to NO BIND MODE"
;
int
threads
=
active_ids_
.
size
();
omp_set_num_threads
(
threads
);
}
return
cpu_num
;
#elif defined(TARGET_IOS)
int
cpu_num
=
0
;
size_t
len
=
sizeof
(
cpu_num
);
sysctlbyname
(
"hw.ncpu"
,
&
cpu_num
,
&
len
,
NULL
,
0
);
if
(
cpu_num
<
1
)
{
cpu_num
=
1
;
}
return
cpu_num
;
#else
if
(
big_core_size
>
0
)
{
active_ids_
=
{
big_core_ids_
[
0
]};
}
else
{
active_ids_
=
{
0
};
}
return
1
;
#endif
//! alloc memory for sgemm in this context
int
temp_mem_size
=
L2_cache_
[
active_ids_
[
0
]]
/
sizeof
(
float
);
workspace_
.
Resize
({
temp_mem_size
});
arch_
=
archs_
[
active_ids_
[
0
]];
}
bool
DeviceInfo
::
ExtendWorkspace
(
DDimLite
dims
)
{
auto
count
=
dims
.
product
();
auto
old
=
workspace_
.
dims
();
if
(
count
==
old
.
product
())
{
return
false
;
}
workspace_
.
Resize
({
static_cast
<
int64_t
>
(
count
+
L2_cache_
[
active_ids_
[
0
]]
/
sizeof
(
float
))});
return
true
;
}
// cache_id : 0 -> L1, 1 -> L2, 2 -> L3
void
set_cache_info
(
DeviceInfo
*
cpu_info
,
int
cache_id
,
int
argc
,
...)
{
va_list
arg_ptr
;
va_start
(
arg_ptr
,
argc
);
std
::
vector
<
int
>*
cache
;
switch
(
cache_id
)
{
case
0
:
cache
=
&
cpu_info
->
L1_cache_
;
break
;
case
1
:
cache
=
&
cpu_info
->
L2_cache_
;
break
;
case
2
:
cache
=
&
cpu_info
->
L3_cache_
;
break
;
default:
break
;
}
int
core_num
=
cpu_info
->
compute_core_num_
;
cache
->
resize
(
core_num
);
if
(
argc
==
1
)
{
int
cache_size
=
va_arg
(
arg_ptr
,
int
);
for
(
int
i
=
0
;
i
<
core_num
;
++
i
)
{
(
*
cache
)[
i
]
=
cache_size
;
}
}
else
{
int
big_core_num
=
cpu_info
->
big_core_ids_
.
size
();
int
little_core_num
=
cpu_info
->
little_core_ids_
.
size
();
int
big_core_cache_size
=
va_arg
(
arg_ptr
,
int
);
int
little_core_cache_size
=
va_arg
(
arg_ptr
,
int
);
for
(
int
i
=
0
;
i
<
big_core_num
;
++
i
)
{
(
*
cache
)[
cpu_info
->
big_core_ids_
[
i
]]
=
big_core_cache_size
;
}
for
(
int
i
=
0
;
i
<
little_core_num
;
++
i
)
{
(
*
cache
)[
cpu_info
->
little_core_ids_
[
i
]]
=
little_core_cache_size
;
}
}
va_end
(
arg_ptr
);
}
void
set_arch_info
(
DeviceInfo
*
cpu_info
,
int
argc
,
...)
{
va_list
arg_ptr
;
va_start
(
arg_ptr
,
argc
);
int
core_num
=
cpu_info
->
compute_core_num_
;
cpu_info
->
archs_
.
resize
(
core_num
);
if
(
argc
==
1
)
{
ARMArch
arch
=
(
ARMArch
)
va_arg
(
arg_ptr
,
int
);
for
(
int
i
=
0
;
i
<
core_num
;
++
i
)
{
cpu_info
->
archs_
[
i
]
=
arch
;
}
}
else
{
ARMArch
big_core_arch
=
(
ARMArch
)
va_arg
(
arg_ptr
,
int
);
ARMArch
little_core_arch
=
(
ARMArch
)
va_arg
(
arg_ptr
,
int
);
int
big_core_num
=
cpu_info
->
big_core_ids_
.
size
();
int
little_core_num
=
cpu_info
->
little_core_ids_
.
size
();
for
(
int
i
=
0
;
i
<
big_core_num
;
++
i
)
{
cpu_info
->
archs_
[
cpu_info
->
big_core_ids_
[
i
]]
=
big_core_arch
;
}
for
(
int
i
=
0
;
i
<
little_core_num
;
++
i
)
{
cpu_info
->
archs_
[
cpu_info
->
little_core_ids_
[
i
]]
=
little_core_arch
;
}
}
va_end
(
arg_ptr
);
}
bool
get_cpu_info_from_name
(
DeviceInfo
*
cpu_info
,
std
::
string
hardware_name
)
{
/* Snapdragon */
if
(
hardware_name
.
find
(
"SDM845"
)
!=
std
::
string
::
npos
)
{
// 845
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
2
,
kA75
,
kA55
);
set_cache_info
(
cpu_info
,
0
,
1
,
32
*
1024
);
set_cache_info
(
cpu_info
,
1
,
2
,
256
*
1024
,
128
*
1024
);
set_cache_info
(
cpu_info
,
2
,
1
,
2048
*
1024
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"SDM710"
)
!=
std
::
string
::
npos
)
{
// 710
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
};
set_arch_info
(
cpu_info
,
2
,
kA75
,
kA55
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MSM8998"
)
!=
std
::
string
::
npos
)
{
// 835
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
2
,
kA73
,
kA53
);
set_cache_info
(
cpu_info
,
0
,
2
,
64
*
1024
);
set_cache_info
(
cpu_info
,
1
,
2
,
1024
*
1024
,
/*real cache size is 2M, while that will get bad performace
on conv3x3s1 or gemm, set to 1M or 512K*/
1024
*
1024
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MSM8996"
)
!=
std
::
string
::
npos
)
{
// 820
cpu_info
->
compute_core_num_
=
4
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
big_core_ids_
=
{
2
,
3
};
cpu_info
->
little_core_ids_
=
{
0
,
1
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
0
,
0
};
set_arch_info
(
cpu_info
,
1
,
kA72
);
set_cache_info
(
cpu_info
,
0
,
1
,
24
*
1024
);
set_cache_info
(
cpu_info
,
1
,
2
,
1024
*
1024
,
512
*
1024
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"SDM660"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"SDM636"
)
!=
std
::
string
::
npos
)
{
// 660, 636
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
1
,
kA73
);
set_cache_info
(
cpu_info
,
0
,
2
,
64
*
1024
,
32
*
1024
);
set_cache_info
(
cpu_info
,
1
,
1
,
1024
*
1024
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MSM8976"
)
!=
std
::
string
::
npos
)
{
// 652,653
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
2
,
kA72
,
kA53
);
set_cache_info
(
cpu_info
,
0
,
1
,
32
*
1024
);
set_cache_info
(
cpu_info
,
1
,
2
,
1024
*
1024
,
512
*
1024
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MSM8953"
)
!=
std
::
string
::
npos
)
{
// 625
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{};
cpu_info
->
cluster_ids_
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
1
,
kA53
);
set_cache_info
(
cpu_info
,
0
,
1
,
32
*
1024
);
set_cache_info
(
cpu_info
,
1
,
1
,
1024
*
1024
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MSM8939"
)
!=
std
::
string
::
npos
)
{
// 615
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
little_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
cluster_ids_
=
{
0
,
0
,
0
,
0
,
1
,
1
,
1
,
1
};
set_arch_info
(
cpu_info
,
1
,
kA53
);
set_cache_info
(
cpu_info
,
0
,
1
,
32
*
1024
);
set_cache_info
(
cpu_info
,
1
,
2
,
512
*
1024
,
256
*
1024
);
return
true
;
/* MediaTek */
}
else
if
(
hardware_name
.
find
(
"MT6797"
)
!=
std
::
string
::
npos
)
{
// X20/X23/X25/X27
cpu_info
->
compute_core_num_
=
10
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
};
cpu_info
->
big_core_ids_
=
{
8
,
9
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
};
set_arch_info
(
cpu_info
,
2
,
kA72
,
kA53
);
set_cache_info
(
cpu_info
,
0
,
1
,
32
*
1024
);
set_cache_info
(
cpu_info
,
1
,
2
,
1024
*
1024
,
512
*
1024
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MT6799"
)
!=
std
::
string
::
npos
)
{
// X30
cpu_info
->
compute_core_num_
=
10
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
};
cpu_info
->
big_core_ids_
=
{
8
,
9
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
};
set_arch_info
(
cpu_info
,
2
,
kA73
,
kA53
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MT6795"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6762"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6755T"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6755S"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6753"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6752"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6750"
)
!=
std
::
string
::
npos
)
{
// X10, P22, P15/P18, MT6753, MT6752/MT6752M, MT6750
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{};
cpu_info
->
cluster_ids_
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
1
,
kA53
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MT6758"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6757"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6763"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6755M"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6755"
)
!=
std
::
string
::
npos
)
{
// P30, P20/P25, P23, P10
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
1
,
kA53
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MT6771"
)
!=
std
::
string
::
npos
)
{
// P60
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
2
,
kA73
,
kA53
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MT6765"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6739"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6738"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6737"
)
!=
std
::
string
::
npos
)
{
// A22, MT6739, MT6738, MT6767
cpu_info
->
compute_core_num_
=
4
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
big_core_ids_
=
{
0
,
0
,
0
,
0
};
cpu_info
->
little_core_ids_
=
{};
cpu_info
->
cluster_ids_
=
{
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
1
,
kA53
);
return
true
;
}
return
false
;
}
size_t
arm_get_meminfo
()
{
size_t
get_mem_size
()
{
#ifdef LITE_WITH_LINUX
// get cpu count from /proc/cpuinfo
FILE
*
fp
=
fopen
(
"/proc/meminfo"
,
"rb"
);
if
(
!
fp
)
{
return
1
;
}
size_t
memsize
=
0
;
char
line
[
1024
];
while
(
!
feof
(
fp
))
{
...
...
@@ -589,52 +96,18 @@ size_t arm_get_meminfo() {
}
sscanf
(
s
,
"MemTotal: %d kB"
,
&
memsize
);
}
fclose
(
fp
);
return
memsize
;
#elif defined(TARGET_IOS)
// to be implemented
printf
(
"not implemented
\n
"
);
return
0
;
#endif
}
int
arm_get_cpucount
()
{
#ifdef LITE_WITH_LINUX
// get cpu count from /sys/devices/system/cpu/cpunum/uevent
int
max_cpu_count
=
20
;
int
count
=
0
;
for
(
int
i
=
0
;
i
<
max_cpu_count
;
++
i
)
{
char
path
[
256
];
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/uevent"
,
i
);
FILE
*
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
break
;
}
count
++
;
fclose
(
fp
);
}
if
(
count
<
1
)
{
count
=
1
;
}
return
count
;
#elif defined(TARGET_IOS)
int
count
=
0
;
size_t
len
=
sizeof
(
count
);
sysctlbyname
(
"hw.ncpu"
,
&
count
,
&
len
,
NULL
,
0
);
if
(
count
<
1
)
{
count
=
1
;
}
return
count
;
#else
return
1
;
#endif
return
0
;
}
void
arm_get_cpu_arch
(
std
::
vector
<
ARMArch
>*
archs
)
{
#ifdef LITE_WITH_LINUX
void
get_cpu_arch
(
std
::
vector
<
ARMArch
>*
archs
,
const
int
cpu_num
)
{
archs
->
clear
();
#ifdef LITE_WITH_LINUX
//! get CPU ARCH
FILE
*
fp
=
fopen
(
"/proc/cpuinfo"
,
"rb"
);
if
(
!
fp
)
{
...
...
@@ -668,6 +141,29 @@ void arm_get_cpu_arch(std::vector<ARMArch>* archs) {
case
0xd0a
:
archs
->
push_back
(
kA75
);
break
;
case
0xd40
:
archs
->
push_back
(
kA76
);
break
;
case
0x804
:
// 855
archs
->
push_back
(
kA76
);
break
;
case
0x805
:
// 855
archs
->
push_back
(
kA55
);
break
;
case
0x802
:
// 845
archs
->
push_back
(
kA75
);
break
;
case
0x803
:
// 845
archs
->
push_back
(
kA55
);
break
;
case
0x801
:
// 835
archs
->
push_back
(
kA73
);
break
;
case
0x800
:
// 835
archs
->
push_back
(
kA73
);
...
...
@@ -677,49 +173,31 @@ void arm_get_cpu_arch(std::vector<ARMArch>* archs) {
archs
->
push_back
(
kA72
);
break
;
default:
LOG
(
ERROR
)
<<
"
unknow type"
;
LOG
(
ERROR
)
<<
"
Unknow cpu arch: "
<<
arch_id
;
archs
->
push_back
(
kARMArch_UNKOWN
);
}
}
}
fclose
(
fp
);
int
cpu_count
=
arm_get_cpucount
();
if
(
archs
->
size
()
<
cpu_count
)
{
for
(
int
i
=
archs
->
size
();
i
<
cpu_count
;
++
i
)
{
if
(
archs
->
size
()
<
cpu_num
)
{
for
(
int
i
=
archs
->
size
();
i
<
cpu_num
;
++
i
)
{
archs
->
push_back
(
archs
->
at
(
i
-
1
));
}
}
#endif
#ifdef TARGET_IOS
int
cpu_count
=
arm_get_cpucount
();
for
(
int
i
=
0
;
i
<
cpu_count
;
++
i
)
{
#elif defined(TARGET_IOS)
for
(
int
i
=
0
;
i
<
cpu_num
;
++
i
)
{
archs
->
push_back
(
APPLE
);
}
#else
for
(
int
i
=
0
;
i
<
cpu_num
;
++
i
)
{
archs
->
push_back
(
kARMArch_UNKOWN
);
}
#endif
}
#ifdef LITE_WITH_LINUX
void
set_default_cache
(
DeviceInfo
*
dev
)
{
int
cpu_count
=
arm_get_cpucount
();
dev
->
L1_cache_
.
resize
(
cpu_count
);
dev
->
L2_cache_
.
resize
(
cpu_count
);
dev
->
L3_cache_
.
resize
(
cpu_count
);
#ifdef TARGET_IOS
for
(
int
i
=
0
;
i
<
cpu_count
;
++
i
)
{
dev
->
L1_cache_
[
i
]
=
64
*
1024
;
dev
->
L2_cache_
[
i
]
=
2048
*
1024
;
dev
->
L3_cache_
[
i
]
=
0
;
}
#else
for
(
int
i
=
0
;
i
<
cpu_count
;
++
i
)
{
dev
->
L1_cache_
[
i
]
=
32
*
1024
;
dev
->
L2_cache_
[
i
]
=
512
*
1024
;
dev
->
L3_cache_
[
i
]
=
0
;
}
#endif
}
std
::
string
arm_get_cpu_name
()
{
std
::
string
get_cpu_name
()
{
FILE
*
fp
=
fopen
(
"/proc/cpuinfo"
,
"rb"
);
if
(
!
fp
)
{
return
""
;
...
...
@@ -739,122 +217,163 @@ std::string arm_get_cpu_name() {
return
""
;
}
int
get_max_freq_khz
(
int
cpuid
)
{
void
get_cpu_max_min_freq
(
int
cpu_id
,
int
*
max_freq
,
int
*
min_freq
)
{
*
max_freq
=
0
;
*
min_freq
=
0
;
// first try, for all possible cpu
char
path
[
256
];
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpufreq/stats/cpu%d/time_in_state"
,
cpuid
);
"/sys/devices/system/cpu/cpufreq/stats/cpu%d/time_in_state"
,
cpu_id
);
FILE
*
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
// second try, for online cpu
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/cpufreq/stats/time_in_state"
,
cpuid
);
cpu
_
id
);
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
// third try, for online cpu
// get max_freq
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq"
,
cpuid
);
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq"
,
cpu_id
);
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
return
-
1
;
return
;
}
int
max_freq_khz
=
-
1
;
fscanf
(
fp
,
"%d"
,
&
max_freq_khz
);
fscanf
(
fp
,
"%d"
,
max_freq
);
fclose
(
fp
);
return
max_freq_khz
;
// get min_freq
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_min_freq"
,
cpu_id
);
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
return
;
}
fscanf
(
fp
,
"%d"
,
min_freq
);
fclose
(
fp
);
return
;
}
}
int
max_freq_khz
=
0
;
*
min_freq
=
std
::
numeric_limits
<
int
>::
max
();
while
(
!
feof
(
fp
))
{
int
freq
_khz
=
0
;
int
nscan
=
fscanf
(
fp
,
"%d %*d"
,
&
freq
_khz
);
int
freq
=
0
;
int
nscan
=
fscanf
(
fp
,
"%d %*d"
,
&
freq
);
if
(
nscan
!=
1
)
{
break
;
}
if
(
freq_khz
>
max_freq_khz
)
{
max_freq_khz
=
freq_khz
;
if
(
freq
>
*
max_freq
)
{
*
max_freq
=
freq
;
}
if
(
freq
<
*
min_freq
)
{
*
min_freq
=
freq
;
}
}
fclose
(
fp
);
return
max_freq_khz
;
}
int
arm_sort_cpuid_by_max_frequency
(
int
cpu_count
,
std
::
vector
<
int
>*
cpuids
,
const
std
::
vector
<
int
>&
cpu_freq
,
std
::
vector
<
int
>*
cluster_ids
)
{
if
(
cpu_count
==
0
)
{
return
0
;
void
sort_cpuid_by_max_freq
(
const
std
::
vector
<
int
>&
max_freqs
,
std
::
vector
<
int
>*
cpu_ids
,
std
::
vector
<
int
>*
cluster_ids
)
{
int
cpu_num
=
max_freqs
.
size
();
if
(
cpu_num
==
0
)
{
return
;
}
cpuids
->
resize
(
cpu_count
);
cluster_ids
->
resize
(
cpu_count
);
for
(
int
i
=
0
;
i
<
cpu_count
;
i
++
)
{
cpuids
->
at
(
i
)
=
i
;
cpu_ids
->
resize
(
cpu_num
);
cluster_ids
->
resize
(
cpu_num
);
for
(
int
i
=
0
;
i
<
cpu_num
;
i
++
)
{
cpu_ids
->
at
(
i
)
=
i
;
}
// sort cpuid as big core first
// simple bubble sort
for
(
int
i
=
0
;
i
<
cpu_count
;
i
++
)
{
for
(
int
j
=
i
+
1
;
j
<
cpu_count
;
j
++
)
{
if
(
cpu_freq
[
i
]
<
cpu_freq
[
j
])
{
for
(
int
i
=
0
;
i
<
cpu_num
;
i
++
)
{
for
(
int
j
=
i
+
1
;
j
<
cpu_num
;
j
++
)
{
if
(
max_freqs
[
i
]
<
max_freqs
[
j
])
{
// swap
int
tmp
=
cpuids
->
at
(
i
);
cpu
ids
->
at
(
i
)
=
cpu
ids
->
at
(
j
);
cpuids
->
at
(
j
)
=
tmp
;
int
tmp
=
cpu
_
ids
->
at
(
i
);
cpu
_ids
->
at
(
i
)
=
cpu_
ids
->
at
(
j
);
cpu
_
ids
->
at
(
j
)
=
tmp
;
}
}
}
// SMP
int
mid_max_freq
_khz
=
(
cpu_freq
[
cpuids
->
at
(
0
)]
+
cpu_freq
[
cpuids
->
at
(
cpu_count
-
1
)])
/
2
;
int
mid_max_freq
=
(
max_freqs
[
cpu_ids
->
at
(
0
)]
+
max_freqs
[
cpu_ids
->
at
(
cpu_num
-
1
)])
/
2
;
for
(
int
i
=
0
;
i
<
cpu_
count
;
i
++
)
{
cpuids
->
at
(
i
)
=
i
;
if
(
cpu_freq
[
i
]
>=
mid_max_freq_khz
)
{
for
(
int
i
=
0
;
i
<
cpu_
num
;
i
++
)
{
cpu
_
ids
->
at
(
i
)
=
i
;
if
(
max_freqs
[
i
]
>=
mid_max_freq
)
{
cluster_ids
->
at
(
i
)
=
0
;
}
else
{
cluster_ids
->
at
(
i
)
=
1
;
}
}
return
0
;
}
int
check_online
(
const
std
::
vector
<
int
>&
core_ids
)
{
if
(
core_ids
.
size
()
==
0
)
{
return
0
;
void
get_cpu_cache_size
(
int
cpu_id
,
int
*
l1_cache_size
,
int
*
l2_cache_size
,
int
*
l3_cache_size
)
{
int
max_cache_idx_num
=
10
;
*
l1_cache_size
=
DEFAULT_L1_CACHE_SIZE
;
*
l2_cache_size
=
DEFAULT_L2_CACHE_SIZE
;
*
l3_cache_size
=
DEFAULT_L3_CACHE_SIZE
;
for
(
int
i
=
0
;
i
<
max_cache_idx_num
;
i
++
)
{
char
path
[
256
];
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/cache/index%d/level"
,
cpu_id
,
i
);
FILE
*
fp
=
fopen
(
path
,
"rb"
);
if
(
fp
)
{
int
level
=
-
1
;
fscanf
(
fp
,
"%d"
,
&
level
);
fclose
(
fp
);
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/cache/index%d/size"
,
cpu_id
,
i
);
fp
=
fopen
(
path
,
"rb"
);
if
(
fp
)
{
int
size
=
-
1
;
fscanf
(
fp
,
"%d"
,
&
size
);
fclose
(
fp
);
if
(
size
>=
0
)
{
if
(
level
==
1
)
{
*
l1_cache_size
=
size
*
1024
;
}
else
if
(
level
==
2
)
{
*
l2_cache_size
=
size
*
1024
;
}
else
if
(
level
==
3
)
{
*
l3_cache_size
=
size
*
1024
;
}
}
}
}
}
}
bool
check_cpu_online
(
const
std
::
vector
<
int
>&
cpu_ids
)
{
if
(
cpu_ids
.
size
()
==
0
)
{
return
false
;
}
char
path
[
256
];
int
online
=
1
;
for
(
int
i
=
0
;
i
<
c
ore
_ids
.
size
();
++
i
)
{
bool
all_online
=
true
;
for
(
int
i
=
0
;
i
<
c
pu
_ids
.
size
();
++
i
)
{
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/online"
,
c
ore
_ids
[
i
]);
c
pu
_ids
[
i
]);
FILE
*
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
return
0
;
int
is_online
=
0
;
if
(
fp
)
{
fscanf
(
fp
,
"%d"
,
&
is_online
);
fclose
(
fp
);
}
else
{
LOG
(
ERROR
)
<<
"Failed to query the online statue of CPU id:"
<<
cpu_ids
[
i
];
}
if
(
is_online
==
0
)
{
all_online
=
false
;
LOG
(
ERROR
)
<<
"CPU id:"
<<
cpu_ids
[
i
]
<<
" is offine"
;
}
int
cur_online
=
0
;
fscanf
(
fp
,
"%d"
,
&
cur_online
);
online
&=
cur_online
;
fclose
(
fp
);
}
return
online
;
return
all_
online
;
}
int
set_sched_affinity
(
const
std
::
vector
<
int
>&
cpuids
)
{
int
set_sched_affinity
(
const
std
::
vector
<
int
>&
cpu
_
ids
)
{
// #define CPU_SETSIZE 1024
// #define __NCPUBITS (8 * sizeof (unsigned long))
// typedef struct
...
...
@@ -870,20 +389,569 @@ int set_sched_affinity(const std::vector<int>& cpuids) {
#endif
cpu_set_t
mask
;
CPU_ZERO
(
&
mask
);
for
(
int
i
=
0
;
i
<
cpu
ids
.
size
();
i
++
)
{
CPU_SET
(
cpuids
[
i
],
&
mask
);
for
(
int
i
=
0
;
i
<
cpu
_ids
.
size
();
++
i
)
{
CPU_SET
(
cpu
_
ids
[
i
],
&
mask
);
}
int
syscallret
=
syscall
(
__NR_sched_setaffinity
,
pid
,
sizeof
(
mask
),
&
mask
);
if
(
syscallret
)
{
LOG
(
ERROR
)
<<
"syscall error "
<<
syscallret
;
return
-
1
;
}
return
0
;
}
bool
bind_threads
(
const
std
::
vector
<
int
>
cpu_ids
)
{
#ifdef ARM_WITH_OMP
int
thread_num
=
cpu_ids
.
size
();
omp_set_num_threads
(
thread_num
);
std
::
vector
<
int
>
ssarets
;
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
ssarets
.
push_back
(
0
);
}
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
thread_num
;
i
++
)
{
ssarets
[
i
]
=
set_sched_affinity
(
cpu_ids
);
}
for
(
int
i
=
0
;
i
<
thread_num
;
i
++
)
{
if
(
ssarets
[
i
]
!=
0
)
{
LOG
(
ERROR
)
<<
"Set cpu affinity failed, core id: "
<<
cpu_ids
[
i
];
return
false
;
}
}
#else // ARM_WITH_OMP
std
::
vector
<
int
>
first_cpu_id
;
first_cpu_id
.
push_back
(
cpu_ids
[
0
]);
int
ssaret
=
set_sched_affinity
(
first_cpu_id
);
if
(
ssaret
!=
0
)
{
LOG
(
ERROR
)
<<
"Set cpu affinity failed, core id: "
<<
cpu_ids
[
0
];
return
false
;
}
#endif // ARM_WITH_OMP
}
#endif // LITE_WITH_LINUX
// cache_id : 0 -> L1, 1 -> L2, 2 -> L3
void
DeviceInfo
::
SetCacheInfo
(
int
cache_id
,
int
argc
,
...)
{
va_list
arg_ptr
;
va_start
(
arg_ptr
,
argc
);
std
::
vector
<
int
>*
cache
;
switch
(
cache_id
)
{
case
0
:
cache
=
&
L1_cache_
;
break
;
case
1
:
cache
=
&
L2_cache_
;
break
;
case
2
:
cache
=
&
L3_cache_
;
break
;
default:
break
;
}
cache
->
resize
(
core_num_
);
if
(
argc
==
1
)
{
int
cache_size
=
va_arg
(
arg_ptr
,
int
);
for
(
int
i
=
0
;
i
<
core_num_
;
++
i
)
{
(
*
cache
)[
i
]
=
cache_size
;
}
}
else
{
int
big_core_num
=
big_core_ids_
.
size
();
int
little_core_num
=
little_core_ids_
.
size
();
int
big_core_cache_size
=
va_arg
(
arg_ptr
,
int
);
int
little_core_cache_size
=
va_arg
(
arg_ptr
,
int
);
for
(
int
i
=
0
;
i
<
big_core_num
;
++
i
)
{
(
*
cache
)[
big_core_ids_
[
i
]]
=
big_core_cache_size
;
}
for
(
int
i
=
0
;
i
<
little_core_num
;
++
i
)
{
(
*
cache
)[
little_core_ids_
[
i
]]
=
little_core_cache_size
;
}
}
va_end
(
arg_ptr
);
}
void
DeviceInfo
::
SetArchInfo
(
int
argc
,
...)
{
va_list
arg_ptr
;
va_start
(
arg_ptr
,
argc
);
archs_
.
resize
(
core_num_
);
if
(
argc
==
1
)
{
ARMArch
arch
=
(
ARMArch
)
va_arg
(
arg_ptr
,
int
);
for
(
int
i
=
0
;
i
<
core_num_
;
++
i
)
{
archs_
[
i
]
=
arch
;
}
}
else
{
ARMArch
big_core_arch
=
(
ARMArch
)
va_arg
(
arg_ptr
,
int
);
ARMArch
little_core_arch
=
(
ARMArch
)
va_arg
(
arg_ptr
,
int
);
int
big_core_num
=
big_core_ids_
.
size
();
int
little_core_num
=
little_core_ids_
.
size
();
for
(
int
i
=
0
;
i
<
big_core_num
;
++
i
)
{
archs_
[
big_core_ids_
[
i
]]
=
big_core_arch
;
}
for
(
int
i
=
0
;
i
<
little_core_num
;
++
i
)
{
archs_
[
little_core_ids_
[
i
]]
=
little_core_arch
;
}
}
va_end
(
arg_ptr
);
}
bool
DeviceInfo
::
SetCPUInfoByName
()
{
/* Snapdragon */
if
(
dev_name_
.
find
(
"SM8150"
)
!=
std
::
string
::
npos
)
{
// 855
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
SetArchInfo
(
2
,
kA76
,
kA55
);
SetCacheInfo
(
0
,
2
,
64
*
1024
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
256
*
1024
,
128
*
1024
);
SetCacheInfo
(
2
,
1
,
2048
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"SDM845"
)
!=
std
::
string
::
npos
)
{
// 845
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
SetArchInfo
(
2
,
kA75
,
kA55
);
SetCacheInfo
(
0
,
2
,
64
*
1024
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
256
*
1024
,
128
*
1024
);
SetCacheInfo
(
2
,
1
,
2048
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"SDM710"
)
!=
std
::
string
::
npos
)
{
// 710
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
};
SetArchInfo
(
2
,
kA75
,
kA55
);
SetCacheInfo
(
0
,
2
,
64
*
1024
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
256
*
1024
,
128
*
1024
);
SetCacheInfo
(
2
,
1
,
1024
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MSM8998"
)
!=
std
::
string
::
npos
)
{
// 835
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
SetArchInfo
(
2
,
kA73
,
kA53
);
SetCacheInfo
(
0
,
2
,
64
*
1024
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
1024
*
1024
,
/*real cache size is 2M, while that will get bad performace
on conv3x3s1 or gemm, set to 1M or 512K*/
1024
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MSM8996"
)
!=
std
::
string
::
npos
)
{
// 820
core_num_
=
4
;
core_ids_
=
{
0
,
1
,
2
,
3
};
big_core_ids_
=
{
2
,
3
};
little_core_ids_
=
{
0
,
1
};
cluster_ids_
=
{
1
,
1
,
0
,
0
};
SetArchInfo
(
1
,
kA72
);
SetCacheInfo
(
0
,
1
,
24
*
1024
);
SetCacheInfo
(
1
,
2
,
1024
*
1024
,
512
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"SDM660"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"SDM636"
)
!=
std
::
string
::
npos
)
{
// 660, 636
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
SetArchInfo
(
1
,
kA73
);
SetCacheInfo
(
0
,
2
,
64
*
1024
,
32
*
1024
);
SetCacheInfo
(
1
,
1
,
1024
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MSM8976"
)
!=
std
::
string
::
npos
)
{
// 652,653
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
SetArchInfo
(
2
,
kA72
,
kA53
);
SetCacheInfo
(
0
,
1
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
1024
*
1024
,
512
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MSM8953"
)
!=
std
::
string
::
npos
)
{
// 625
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
little_core_ids_
=
{};
cluster_ids_
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
SetArchInfo
(
1
,
kA53
);
SetCacheInfo
(
0
,
1
,
32
*
1024
);
SetCacheInfo
(
1
,
1
,
1024
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MSM8939"
)
!=
std
::
string
::
npos
)
{
// 615
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
0
,
1
,
2
,
3
};
little_core_ids_
=
{
4
,
5
,
6
,
7
};
cluster_ids_
=
{
0
,
0
,
0
,
0
,
1
,
1
,
1
,
1
};
SetArchInfo
(
1
,
kA53
);
SetCacheInfo
(
0
,
1
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
512
*
1024
,
256
*
1024
);
return
true
;
/* MediaTek */
}
else
if
(
dev_name_
.
find
(
"MT6797"
)
!=
std
::
string
::
npos
)
{
// X20/X23/X25/X27
core_num_
=
10
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
};
big_core_ids_
=
{
8
,
9
};
little_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
};
SetArchInfo
(
2
,
kA72
,
kA53
);
SetCacheInfo
(
0
,
1
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
1024
*
1024
,
512
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MT6799"
)
!=
std
::
string
::
npos
)
{
// X30
core_num_
=
10
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
};
big_core_ids_
=
{
8
,
9
};
little_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
};
SetArchInfo
(
2
,
kA73
,
kA53
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MT6795"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6762"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6755T"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6755S"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6753"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6752"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6750"
)
!=
std
::
string
::
npos
)
{
// X10, P22, P15/P18, MT6753, MT6752/MT6752M, MT6750
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
little_core_ids_
=
{};
cluster_ids_
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
SetArchInfo
(
1
,
kA53
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MT6758"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6757"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6763"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6755M"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6755"
)
!=
std
::
string
::
npos
)
{
// P30, P20/P25, P23, P10
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
SetArchInfo
(
1
,
kA53
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MT6771"
)
!=
std
::
string
::
npos
)
{
// P60
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
SetArchInfo
(
2
,
kA73
,
kA53
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MT6765"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6739"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6738"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6737"
)
!=
std
::
string
::
npos
)
{
// A22, MT6739, MT6738, MT6767
core_num_
=
4
;
core_ids_
=
{
0
,
1
,
2
,
3
};
big_core_ids_
=
{
0
,
1
,
2
,
3
};
little_core_ids_
=
{};
cluster_ids_
=
{
0
,
0
,
0
,
0
};
SetArchInfo
(
1
,
kA53
);
return
true
;
}
return
false
;
}
void
DeviceInfo
::
SetCPUInfoByProb
()
{
#ifdef LITE_WITH_LINUX
// get big.LITTLE cores by sorting CPU frequency
sort_cpuid_by_max_freq
(
max_freqs_
,
&
core_ids_
,
&
cluster_ids_
);
big_core_ids_
.
clear
();
little_core_ids_
.
clear
();
for
(
int
i
=
0
;
i
<
cluster_ids_
.
size
();
++
i
)
{
if
(
cluster_ids_
[
i
]
==
0
)
{
big_core_ids_
.
push_back
(
core_ids_
[
i
]);
}
else
{
little_core_ids_
.
push_back
(
core_ids_
[
i
]);
}
}
// get l1, l2, l3 cache size for each core
for
(
int
i
=
0
;
i
<
core_num_
;
i
++
)
{
get_cpu_cache_size
(
i
,
&
(
L1_cache_
[
i
]),
&
(
L2_cache_
[
i
]),
&
(
L3_cache_
[
i
]));
}
#endif // LITE_WITH_LINUX
}
void
DeviceInfo
::
RequestPowerFullMode
(
const
int
thread_num
)
{
int
big_core_size
=
big_core_ids_
.
size
();
int
little_core_size
=
little_core_ids_
.
size
();
active_ids_
.
clear
();
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
if
(
i
<
big_core_size
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
else
if
(
i
<
big_core_size
+
little_core_size
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
-
big_core_size
]);
}
}
mode_
=
LITE_POWER_FULL
;
}
void
DeviceInfo
::
RequestPowerHighMode
(
const
int
thread_num
)
{
int
big_core_size
=
big_core_ids_
.
size
();
int
little_core_size
=
little_core_ids_
.
size
();
active_ids_
.
clear
();
if
(
big_core_size
>
0
)
{
mode_
=
LITE_POWER_HIGH
;
if
(
thread_num
>
big_core_size
)
{
LOG
(
ERROR
)
<<
"Request thread num: "
<<
thread_num
<<
", exceed the big cores size: "
<<
big_core_size
<<
", truncate thread num to "
<<
big_core_size
;
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
}
else
{
mode_
=
LITE_POWER_LOW
;
LOG
(
ERROR
)
<<
"HIGH POWER MODE is not support, switch to little cores."
;
if
(
thread_num
>
little_core_size
)
{
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
}
}
void
DeviceInfo
::
RequestPowerLowMode
(
const
int
thread_num
)
{
int
big_core_size
=
big_core_ids_
.
size
();
int
little_core_size
=
little_core_ids_
.
size
();
active_ids_
.
clear
();
if
(
little_core_size
>
0
)
{
mode_
=
LITE_POWER_LOW
;
if
(
thread_num
>
little_core_size
)
{
LOG
(
WARNING
)
<<
"Request thread num: "
<<
thread_num
<<
", exceed the little cores size: "
<<
little_core_size
<<
", truncate thread num to "
<<
little_core_size
;
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
i
++
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
}
else
{
mode_
=
LITE_POWER_HIGH
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores"
;
if
(
thread_num
>
big_core_size
)
{
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
i
++
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
}
}
void
DeviceInfo
::
RequestPowerNoBindMode
(
const
int
thread_num
)
{
active_ids_
.
clear
();
for
(
int
i
=
0
;
i
<
thread_num
;
i
++
)
{
active_ids_
.
push_back
(
0
);
}
mode_
=
LITE_POWER_NO_BIND
;
}
void
DeviceInfo
::
RequestPowerRandHighMode
(
const
int
shift_num
,
const
int
thread_num
)
{
int
big_core_size
=
big_core_ids_
.
size
();
int
little_core_size
=
little_core_ids_
.
size
();
if
(
big_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_HIGH
;
if
(
thread_num
>
big_core_size
)
{
LOG
(
WARNING
)
<<
"Request thread num: "
<<
thread_num
<<
", exceed the big cores size: "
<<
big_core_size
<<
", truncate thread num to "
<<
big_core_size
;
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[(
i
+
shift_num
)
%
big_core_size
]);
}
}
}
else
{
mode_
=
LITE_POWER_LOW
;
LOG
(
WARNING
)
<<
"HIGH POWER MODE is not support, switch to little cores."
;
if
(
thread_num
>
little_core_size
)
{
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
}
}
void
DeviceInfo
::
RequestPowerRandLowMode
(
const
int
shift_num
,
const
int
thread_num
)
{
int
big_core_size
=
big_core_ids_
.
size
();
int
little_core_size
=
little_core_ids_
.
size
();
active_ids_
.
clear
();
if
(
little_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_LOW
;
if
(
thread_num
>
little_core_size
)
{
LOG
(
WARNING
)
<<
"Request thread num: "
<<
thread_num
<<
", exceed the little cores size: "
<<
little_core_size
<<
", truncate thread num to "
<<
little_core_size
;
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[(
i
+
shift_num
)
%
little_core_size
]);
}
}
}
else
{
mode_
=
LITE_POWER_HIGH
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores."
;
if
(
thread_num
>
big_core_size
)
{
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
}
}
int
DeviceInfo
::
Setup
()
{
core_num_
=
get_cpu_num
();
mem_size_
=
get_mem_size
();
get_cpu_arch
(
&
archs_
,
core_num_
);
// set defalut CPU info
SetCacheInfo
(
0
,
DEFAULT_L1_CACHE_SIZE
);
SetCacheInfo
(
1
,
DEFAULT_L2_CACHE_SIZE
);
SetCacheInfo
(
2
,
DEFAULT_L3_CACHE_SIZE
);
#ifdef LITE_WITH_LINUX
// get max&min freq
max_freqs_
.
resize
(
core_num_
);
min_freqs_
.
resize
(
core_num_
);
for
(
int
i
=
0
;
i
<
core_num_
;
++
i
)
{
int
max_freq
,
min_freq
;
get_cpu_max_min_freq
(
i
,
&
max_freq
,
&
min_freq
);
max_freqs_
[
i
]
=
max_freq
/
1000
;
min_freqs_
[
i
]
=
min_freq
/
1000
;
}
// get cache size and big.LITTLE core ids
dev_name_
=
get_cpu_name
();
if
(
!
SetCPUInfoByName
())
{
SetCPUInfoByProb
();
}
// output info
LOG
(
INFO
)
<<
"ARM multiprocessors name: "
<<
dev_name_
;
LOG
(
INFO
)
<<
"ARM multiprocessors number: "
<<
core_num_
;
for
(
int
i
=
0
;
i
<
core_num_
;
++
i
)
{
LOG
(
INFO
)
<<
"ARM multiprocessors ID: "
<<
core_ids_
[
i
]
<<
", max freq: "
<<
max_freqs_
[
i
]
<<
", min freq: "
<<
min_freqs_
[
i
]
<<
", cluster ID: "
<<
cluster_ids_
[
core_ids_
[
i
]]
<<
", CPU ARCH: A"
<<
archs_
[
i
];
}
LOG
(
INFO
)
<<
"L1 DataCache size is: "
;
for
(
int
i
=
0
;
i
<
core_num_
;
++
i
)
{
LOG
(
INFO
)
<<
L1_cache_
[
i
]
/
1024
<<
" KB"
;
}
LOG
(
INFO
)
<<
"L2 Cache size is: "
;
for
(
int
i
=
0
;
i
<
core_num_
;
++
i
)
{
LOG
(
INFO
)
<<
L2_cache_
[
i
]
/
1024
<<
" KB"
;
}
LOG
(
INFO
)
<<
"Total memory: "
<<
mem_size_
<<
"KB"
;
#endif
// set default run mode
SetRunMode
(
LITE_POWER_NO_BIND
,
1
);
// use single thread by default
return
0
;
}
void
DeviceInfo
::
SetRunMode
(
PowerMode
mode
,
int
thread_num
)
{
#ifdef ARM_WITH_OMP
thread_num
=
std
::
min
(
thread_num
,
core_num_
);
#else
thread_num
=
1
;
// force thread_num to 1 if OpenMP is disabled
#endif
#ifdef LITE_WITH_LINUX
int
big_core_size
=
big_core_ids_
.
size
();
int
little_core_size
=
little_core_ids_
.
size
();
int
big_little_core_size
=
big_core_size
+
little_core_size
;
thread_num
=
std
::
min
(
thread_num
,
big_little_core_size
);
count_
++
;
int
shift_num
=
(
count_
/
10
)
%
big_core_size
;
switch
(
mode
)
{
case
LITE_POWER_FULL
:
RequestPowerFullMode
(
thread_num
);
break
;
case
LITE_POWER_HIGH
:
RequestPowerHighMode
(
thread_num
);
break
;
case
LITE_POWER_LOW
:
RequestPowerLowMode
(
thread_num
);
break
;
case
LITE_POWER_NO_BIND
:
RequestPowerNoBindMode
(
thread_num
);
break
;
case
LITE_POWER_RAND_HIGH
:
RequestPowerRandHighMode
(
shift_num
,
thread_num
);
break
;
case
LITE_POWER_RAND_LOW
:
RequestPowerRandLowMode
(
shift_num
,
thread_num
);
break
;
default:
LOG
(
FATAL
)
<<
"Unsupported power mode: "
<<
mode
;
break
;
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
#ifdef ARM_WITH_OMP
omp_set_num_threads
(
active_ids_
.
size
());
#endif
if
(
mode_
!=
LITE_POWER_NO_BIND
)
{
if
(
check_cpu_online
(
active_ids_
))
{
bind_threads
(
active_ids_
);
}
else
{
LOG
(
WARNING
)
<<
"Some cores are offline, switch to NO BIND MODE"
;
mode_
=
LITE_POWER_NO_BIND
;
}
}
#else // LITE_WITH_LINUX
// only LITE_POWER_NO_BIND is supported in other OS
RequestPowerNoBindMode
(
thread_num
);
#ifdef ARM_WITH_OMP
omp_set_num_threads
(
active_ids_
.
size
());
#endif
#endif // LITE_WITH_LINUX
//! alloc memory for sgemm in this context
workspace_
.
Resize
(
{
static_cast
<
int64_t
>
(
L2_cache_
[
active_ids_
[
0
]]
/
sizeof
(
float
))});
arch_
=
archs_
[
active_ids_
[
0
]];
}
void
DeviceInfo
::
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
)
{
SetCacheInfo
(
0
,
l1size
);
SetCacheInfo
(
1
,
l2size
);
SetCacheInfo
(
2
,
l3size
);
workspace_
.
Resize
({
2
*
(
l1size
+
l2size
)});
}
bool
DeviceInfo
::
ExtendWorkspace
(
DDimLite
dims
)
{
auto
count
=
dims
.
product
();
auto
old
=
workspace_
.
dims
();
if
(
count
==
old
.
product
())
{
return
false
;
}
workspace_
.
Resize
({
static_cast
<
int64_t
>
(
count
+
L2_cache_
[
active_ids_
[
0
]]
/
sizeof
(
float
))});
return
true
;
}
#endif // LITE_WITH_ARM
...
...
paddle/fluid/lite/core/cpu_info.h
浏览文件 @
ce46ef22
...
...
@@ -14,6 +14,7 @@
#pragma once
#include <cstdarg>
#include <string>
#include <vector>
#include "paddle/fluid/lite/core/lite_tensor.h"
...
...
@@ -47,92 +48,73 @@ typedef enum {
class
DeviceInfo
{
public:
int
idx_
;
int
max_freq_
;
int
min_freq_
;
int
generate_arch_
;
int
compute_core_num_
;
int
max_memory_
;
int
sharemem_size_
;
std
::
string
device_name_
;
std
::
string
compute_ability_
;
std
::
vector
<
int
>
L1_cache_
;
std
::
vector
<
int
>
L2_cache_
;
std
::
vector
<
int
>
L3_cache_
;
std
::
vector
<
int
>
core_ids_
;
std
::
vector
<
int
>
big_core_ids_
;
std
::
vector
<
int
>
little_core_ids_
;
std
::
vector
<
int
>
cluster_ids_
;
std
::
vector
<
ARMArch
>
archs_
;
ARMArch
arch_
;
// LITE_POWER_HIGH stands for using big cores,
// LITE_POWER_LOW stands for using small core,
// LITE_POWER_FULL stands for using all cores
PowerMode
mode_
;
std
::
vector
<
int
>
active_ids_
;
TensorLite
workspace_
;
int64_t
count_
{
0
};
static
DeviceInfo
&
Global
()
{
static
auto
*
x
=
new
DeviceInfo
;
return
*
x
;
}
static
void
Init
()
{
auto
&
info
=
Global
();
InitInternal
(
&
info
)
;
static
int
Init
()
{
static
int
ret
=
Global
().
Setup
();
return
ret
;
}
void
SetRunMode
(
PowerMode
mode
,
int
threads
);
int
Setup
();
void
SetRunMode
(
PowerMode
mode
,
int
thread_num
);
void
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
);
void
SetArch
(
ARMArch
arch
)
{
arch_
=
arch
;
}
void
BindDev
();
PowerMode
mode
()
const
{
return
mode_
;
}
int
threads
()
const
{
return
active_ids_
.
size
();
}
ARMArch
arch
()
const
{
return
arch_
;
}
int
l1_cache_size
()
const
{
return
L1_cache_
[
active_ids_
[
0
]];
}
int
l2_cache_size
()
const
{
return
L2_cache_
[
active_ids_
[
0
]];
}
int
l3_cache_size
()
const
{
return
L3_cache_
[
active_ids_
[
0
]];
}
template
<
typename
T
>
T
*
workspace_data
()
{
return
workspace_
.
mutable_data
<
T
>
();
}
int
l1_cache_size
()
const
{
return
L1_cache_
[
active_ids_
[
0
]];
}
int
l2_cache_size
()
const
{
return
L2_cache_
[
active_ids_
[
0
]];
}
int
l3_cache_size
()
const
{
return
L3_cache_
[
active_ids_
[
0
]];
}
bool
ExtendWorkspace
(
DDimLite
dims
);
private:
DeviceInfo
()
=
default
;
static
void
InitInternal
(
DeviceInfo
*
dev
);
};
size_t
arm_get_meminfo
();
int
arm_get_cpucount
();
void
arm_get_cpu_arch
(
std
::
vector
<
ARMArch
>*
archs
);
bool
get_cpu_info_from_name
(
DeviceInfo
*
cpu_info
,
std
::
string
hardware_name
);
int
core_num_
;
std
::
vector
<
int
>
max_freqs_
;
std
::
vector
<
int
>
min_freqs_
;
int
mem_size_
;
std
::
string
dev_name_
;
#ifdef LITE_WITH_LINUX
void
set_default_cache
(
DeviceInfo
*
dev
);
std
::
string
arm_get_cpu_name
();
std
::
vector
<
int
>
L1_cache_
;
std
::
vector
<
int
>
L2_cache_
;
std
::
vector
<
int
>
L3_cache_
;
std
::
vector
<
int
>
core_ids_
;
std
::
vector
<
int
>
big_core_ids_
;
std
::
vector
<
int
>
little_core_ids_
;
std
::
vector
<
int
>
cluster_ids_
;
std
::
vector
<
ARMArch
>
archs_
;
int
get_max_freq_khz
(
int
cpuid
);
ARMArch
arch_
;
// LITE_POWER_HIGH stands for using big cores,
// LITE_POWER_LOW stands for using small core,
// LITE_POWER_FULL stands for using all cores
PowerMode
mode_
;
std
::
vector
<
int
>
active_ids_
;
TensorLite
workspace_
;
int64_t
count_
{
0
};
int
arm_sort_cpuid_by_max_frequency
(
int
cpu_count
,
std
::
vector
<
int
>*
cpuids
,
const
std
::
vector
<
int
>&
cpu_freq
,
std
::
vector
<
int
>*
cluster_ids
);
int
check_online
(
const
std
::
vector
<
int
>&
core_ids
);
int
set_sched_affinity
(
const
std
::
vector
<
int
>&
cpuids
);
void
SetCacheInfo
(
int
cache_id
,
int
argc
,
...);
void
SetArchInfo
(
int
argc
,
...);
bool
SetCPUInfoByName
();
void
SetCPUInfoByProb
();
void
RequestPowerFullMode
(
const
int
thread_num
);
void
RequestPowerHighMode
(
const
int
thread_num
);
void
RequestPowerLowMode
(
const
int
thread_num
);
void
RequestPowerNoBindMode
(
const
int
thread_num
);
void
RequestPowerRandHighMode
(
const
int
shift_num
,
const
int
thread_num
);
void
RequestPowerRandLowMode
(
const
int
shift_num
,
const
int
thread_num
);
#endif // LITE_WITH_LINUX
DeviceInfo
()
=
default
;
};
#endif // LITE_WITH_ARM
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录