Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
ce46ef22
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ce46ef22
编写于
6月 20, 2019
作者:
H
hong19860320
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
ARM cpu_info refine
test=develop
上级
251255bc
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
773 addition
and
724 deletion
+773
-724
paddle/fluid/lite/core/context.h
paddle/fluid/lite/core/context.h
+4
-5
paddle/fluid/lite/core/cpu_info.cc
paddle/fluid/lite/core/cpu_info.cc
+726
-658
paddle/fluid/lite/core/cpu_info.h
paddle/fluid/lite/core/cpu_info.h
+43
-61
未找到文件。
paddle/fluid/lite/core/context.h
浏览文件 @
ce46ef22
...
...
@@ -67,7 +67,7 @@ class Context<TargetType::kARM> {
ARMContext
&
operator
=
(
const
ARMContext
&
ctx
)
{}
// NOTE: InitOnce should only be used by ContextScheduler
void
InitOnce
()
{}
void
InitOnce
()
{
DeviceInfo
::
Init
();
}
void
CopyShared
(
const
ARMContext
*
ctx
)
{}
...
...
@@ -78,20 +78,19 @@ class Context<TargetType::kARM> {
return
DeviceInfo
::
Global
().
SetCache
(
l1size
,
l2size
,
l3size
);
}
void
SetArch
(
ARMArch
arch
)
{
return
DeviceInfo
::
Global
().
SetArch
(
arch
);
}
void
BindDev
()
{
return
DeviceInfo
::
Global
().
BindDev
();
}
PowerMode
mode
()
const
{
return
DeviceInfo
::
Global
().
mode
();
}
int
threads
()
const
{
return
DeviceInfo
::
Global
().
threads
();
}
ARMArch
arch
()
const
{
return
DeviceInfo
::
Global
().
arch
();
}
int
l1_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l1_cache_size
();
}
int
l2_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l2_cache_size
();
}
int
l3_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l3_cache_size
();
}
template
<
typename
T
>
T
*
workspace_data
()
{
return
DeviceInfo
::
Global
().
workspace_data
<
T
>
();
}
int
l1_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l1_cache_size
();
}
int
l2_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l2_cache_size
();
}
int
l3_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l3_cache_size
();
}
bool
ExtendWorkspace
(
DDimLite
dims
)
{
return
DeviceInfo
::
Global
().
ExtendWorkspace
(
dims
);
}
...
...
paddle/fluid/lite/core/cpu_info.cc
浏览文件 @
ce46ef22
...
...
@@ -29,7 +29,8 @@
#include <omp.h>
#endif
#include <cstdarg>
#include <algorithm>
#include <limits>
#include "paddle/fluid/lite/core/cpu_info.h"
namespace
paddle
{
...
...
@@ -37,549 +38,55 @@ namespace lite {
#ifdef LITE_WITH_ARM
void
DeviceInfo
::
InitInternal
(
DeviceInfo
*
dev
)
{
set_default_cache
(
dev
);
dev
->
compute_core_num_
=
arm_get_cpucount
();
dev
->
max_memory_
=
arm_get_meminfo
();
// get max freq
#ifdef LITE_WITH_LINUX
std
::
vector
<
int
>
max_freq
(
dev
->
compute_core_num_
);
for
(
int
i
=
0
;
i
<
dev
->
compute_core_num_
;
++
i
)
{
max_freq
[
i
]
=
get_max_freq_khz
(
i
)
/
1000
;
}
std
::
string
cpu_name
=
arm_get_cpu_name
();
if
(
get_cpu_info_from_name
(
dev
,
cpu_name
)
!=
true
)
{
arm_sort_cpuid_by_max_frequency
(
dev
->
compute_core_num_
,
&
dev
->
core_ids_
,
max_freq
,
&
dev
->
cluster_ids_
);
dev
->
big_core_ids_
.
clear
();
dev
->
little_core_ids_
.
clear
();
for
(
int
i
=
0
;
i
<
dev
->
cluster_ids_
.
size
();
++
i
)
{
if
(
dev
->
cluster_ids_
[
i
]
==
0
)
{
dev
->
big_core_ids_
.
push_back
(
dev
->
core_ids_
[
i
]);
}
else
{
dev
->
little_core_ids_
.
push_back
(
dev
->
core_ids_
[
i
]);
}
}
arm_get_cpu_arch
(
&
dev
->
archs_
);
}
LOG
(
INFO
)
<<
"ARM multiprocessors number: "
<<
dev
->
compute_core_num_
;
for
(
int
i
=
0
;
i
<
dev
->
compute_core_num_
;
++
i
)
{
LOG
(
INFO
)
<<
"ARM multiprocessors ID: "
<<
dev
->
core_ids_
[
i
]
<<
", frequence: "
<<
max_freq
[
i
]
<<
", cluster ID: "
<<
dev
->
cluster_ids_
[
dev
->
core_ids_
[
i
]]
<<
", CPU ARCH: A"
<<
dev
->
archs_
[
i
];
}
VLOG
(
1
)
<<
"L1 DataCache size is: "
;
for
(
int
i
=
0
;
i
<
dev
->
compute_core_num_
;
++
i
)
{
VLOG
(
1
)
<<
dev
->
L1_cache_
[
i
]
/
1024
<<
" KB"
;
}
VLOG
(
1
)
<<
"L2 Cache size is: "
;
for
(
int
i
=
0
;
i
<
dev
->
compute_core_num_
;
++
i
)
{
VLOG
(
1
)
<<
dev
->
L2_cache_
[
i
]
/
1024
<<
" KB"
;
}
VLOG
(
1
)
<<
"Total memory: "
<<
dev
->
max_memory_
<<
"KB"
;
dev
->
max_freq_
=
max_freq
[
0
];
for
(
int
j
=
1
;
j
<
dev
->
compute_core_num_
;
++
j
)
{
if
(
dev
->
max_freq_
<
max_freq
[
j
])
{
dev
->
max_freq_
=
max_freq
[
j
];
}
}
#elif defined(TARGET_IOS)
arm_get_cpu_arch
(
&
dev
->
archs_
);
#endif
dev
->
active_ids_
=
{
0
};
dev
->
mode_
=
LITE_POWER_HIGH
;
dev
->
workspace_
.
Resize
({
static_cast
<
int64_t
>
(
dev
->
L2_cache_
[
dev
->
active_ids_
[
0
]]
/
sizeof
(
float
))});
#ifdef TARGET_IOS
dev
->
arch_
=
APPLE
;
// use 6x8
const
int
DEFAULT_L1_CACHE_SIZE
=
64
*
1024
;
const
int
DEFAULT_L2_CACHE_SIZE
=
2048
*
1024
;
const
int
DEFAULT_L3_CACHE_SIZE
=
0
;
#else
if
(
dev
->
big_core_ids_
.
size
()
>
0
)
{
dev
->
arch_
=
dev
->
archs_
[
dev
->
big_core_ids_
[
0
]]
;
}
const
int
DEFAULT_L1_CACHE_SIZE
=
32
*
1024
;
const
int
DEFAULT_L2_CACHE_SIZE
=
512
*
1024
;
const
int
DEFAULT_L3_CACHE_SIZE
=
0
;
#endif
}
void
DeviceInfo
::
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
)
{
int
cpu_count
=
arm_get_cpucount
();
L1_cache_
.
resize
(
cpu_count
);
L2_cache_
.
resize
(
cpu_count
);
L3_cache_
.
resize
(
cpu_count
);
for
(
int
i
=
0
;
i
<
cpu_count
;
++
i
)
{
L1_cache_
[
i
]
=
l1size
;
L2_cache_
[
i
]
=
l2size
;
L3_cache_
[
i
]
=
l3size
;
}
workspace_
.
Resize
({
2
*
(
l1size
+
l2size
)});
}
void
DeviceInfo
::
BindDev
()
{
#ifdef ARM_WITH_OMP
int
num_threads
=
active_ids_
.
size
();
omp_set_num_threads
(
num_threads
);
int
get_cpu_num
()
{
#ifdef LITE_WITH_LINUX
std
::
vector
<
int
>
ssarets
;
for
(
int
j
=
0
;
j
<
num_threads
;
++
j
)
{
ssarets
.
push_back
(
0
);
}
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
num_threads
;
i
++
)
{
ssarets
[
i
]
=
set_sched_affinity
(
active_ids_
);
}
for
(
int
i
=
0
;
i
<
num_threads
;
i
++
)
{
if
(
ssarets
[
i
]
!=
0
)
{
LOG
(
ERROR
)
<<
"set cpu affinity failed, cpuID: "
<<
active_ids_
[
i
];
return
;
// get cpu count from /sys/devices/system/cpu/cpunum/uevent
int
max_cpu_num
=
20
;
int
cpu_num
=
0
;
for
(
int
i
=
0
;
i
<
max_cpu_num
;
++
i
)
{
char
path
[
256
];
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/uevent"
,
i
);
FILE
*
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
break
;
}
cpu_num
++
;
fclose
(
fp
);
}
#endif // LITE_WITH_LINUX
#else // ARM_WITH_OMP
#ifdef LITE_WITH_LINUX
std
::
vector
<
int
>
cpuid1
;
cpuid1
.
push_back
(
active_ids_
[
0
]);
int
ssaret
=
set_sched_affinity
(
cpuid1
);
if
(
ssaret
!=
0
)
{
printf
(
"set cpu affinity failed, cpuID: %d
\n
"
,
active_ids_
[
0
]);
return
;
}
#endif // LITE_WITH_LINUX
#endif // ARM_WITH_OMP
}
void
DeviceInfo
::
SetRunMode
(
PowerMode
mode
,
int
threads
)
{
int
big_core_size
=
big_core_ids_
.
size
();
int
small_core_size
=
little_core_ids_
.
size
();
if
(
threads
>
big_core_size
+
small_core_size
)
{
threads
=
big_core_size
+
small_core_size
;
if
(
cpu_num
<
1
)
{
cpu_num
=
1
;
}
#ifdef ARM_WITH_OMP
count_
++
;
int
shift_num
=
(
count_
/
10
)
%
big_core_size
;
switch
(
mode
)
{
case
LITE_POWER_FULL
:
mode_
=
mode
;
active_ids_
.
clear
();
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
if
(
i
<
big_core_size
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
else
{
active_ids_
.
push_back
(
little_core_ids_
[
i
-
big_core_size
]);
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_HIGH
:
active_ids_
.
clear
();
if
(
big_core_size
>
0
)
{
mode_
=
LITE_POWER_HIGH
;
if
(
threads
>
big_core_size
)
{
LOG
(
ERROR
)
<<
"threads: "
<<
threads
<<
", exceed the big cores size: "
<<
big_core_size
;
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
}
else
{
mode_
=
LITE_POWER_LOW
;
LOG
(
ERROR
)
<<
"HIGH POWER MODE is not support, switch to little cores."
;
if
(
threads
>
small_core_size
)
{
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_LOW
:
active_ids_
.
clear
();
if
(
small_core_size
>
0
)
{
mode_
=
LITE_POWER_LOW
;
if
(
threads
>
small_core_size
)
{
LOG
(
WARNING
)
<<
"threads: "
<<
threads
<<
", exceed the little cores size: "
<<
small_core_size
;
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
}
else
{
mode_
=
LITE_POWER_HIGH
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores"
;
if
(
threads
>
big_core_size
)
{
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_NO_BIND
:
mode_
=
LITE_POWER_NO_BIND
;
active_ids_
.
clear
();
if
(
threads
>
core_ids_
.
size
())
{
active_ids_
.
resize
(
core_ids_
.
size
());
}
else
{
active_ids_
.
resize
(
threads
);
}
break
;
case
LITE_POWER_RAND_HIGH
:
active_ids_
.
clear
();
if
(
big_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_HIGH
;
if
(
threads
>
big_core_size
)
{
LOG
(
WARNING
)
<<
"threads: "
<<
threads
<<
", exceed the big cores size: "
<<
big_core_size
;
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[(
i
+
shift_num
)
%
big_core_size
]);
}
}
}
else
{
mode_
=
LITE_POWER_LOW
;
LOG
(
WARNING
)
<<
"HIGH POWER MODE is not support, switch to little cores."
;
if
(
threads
>
small_core_size
)
{
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_RAND_LOW
:
active_ids_
.
clear
();
if
(
small_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_LOW
;
if
(
threads
>
small_core_size
)
{
LOG
(
WARNING
)
<<
"threads: "
<<
threads
<<
", exceed the little cores size: "
<<
small_core_size
;
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[(
i
+
shift_num
)
%
small_core_size
]);
}
}
}
else
{
mode_
=
LITE_POWER_HIGH
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores."
;
if
(
threads
>
big_core_size
)
{
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
}
//! fix multi-threads LITE_POWER_HIGH mode
if
(
mode_
==
LITE_POWER_NO_BIND
||
threads
>
1
)
{
int
threads
=
active_ids_
.
size
();
omp_set_num_threads
(
threads
);
}
else
{
if
(
check_online
(
active_ids_
))
{
BindDev
();
}
else
{
LOG
(
WARNING
)
<<
"core id "
<<
active_ids_
[
0
]
<<
" is offline, switch to NO BIND MODE"
;
int
threads
=
active_ids_
.
size
();
omp_set_num_threads
(
threads
);
}
return
cpu_num
;
#elif defined(TARGET_IOS)
int
cpu_num
=
0
;
size_t
len
=
sizeof
(
cpu_num
);
sysctlbyname
(
"hw.ncpu"
,
&
cpu_num
,
&
len
,
NULL
,
0
);
if
(
cpu_num
<
1
)
{
cpu_num
=
1
;
}
return
cpu_num
;
#else
if
(
big_core_size
>
0
)
{
active_ids_
=
{
big_core_ids_
[
0
]};
}
else
{
active_ids_
=
{
0
};
}
return
1
;
#endif
//! alloc memory for sgemm in this context
int
temp_mem_size
=
L2_cache_
[
active_ids_
[
0
]]
/
sizeof
(
float
);
workspace_
.
Resize
({
temp_mem_size
});
arch_
=
archs_
[
active_ids_
[
0
]];
}
bool
DeviceInfo
::
ExtendWorkspace
(
DDimLite
dims
)
{
auto
count
=
dims
.
product
();
auto
old
=
workspace_
.
dims
();
if
(
count
==
old
.
product
())
{
return
false
;
}
workspace_
.
Resize
({
static_cast
<
int64_t
>
(
count
+
L2_cache_
[
active_ids_
[
0
]]
/
sizeof
(
float
))});
return
true
;
}
// cache_id : 0 -> L1, 1 -> L2, 2 -> L3
void
set_cache_info
(
DeviceInfo
*
cpu_info
,
int
cache_id
,
int
argc
,
...)
{
va_list
arg_ptr
;
va_start
(
arg_ptr
,
argc
);
std
::
vector
<
int
>*
cache
;
switch
(
cache_id
)
{
case
0
:
cache
=
&
cpu_info
->
L1_cache_
;
break
;
case
1
:
cache
=
&
cpu_info
->
L2_cache_
;
break
;
case
2
:
cache
=
&
cpu_info
->
L3_cache_
;
break
;
default:
break
;
}
int
core_num
=
cpu_info
->
compute_core_num_
;
cache
->
resize
(
core_num
);
if
(
argc
==
1
)
{
int
cache_size
=
va_arg
(
arg_ptr
,
int
);
for
(
int
i
=
0
;
i
<
core_num
;
++
i
)
{
(
*
cache
)[
i
]
=
cache_size
;
}
}
else
{
int
big_core_num
=
cpu_info
->
big_core_ids_
.
size
();
int
little_core_num
=
cpu_info
->
little_core_ids_
.
size
();
int
big_core_cache_size
=
va_arg
(
arg_ptr
,
int
);
int
little_core_cache_size
=
va_arg
(
arg_ptr
,
int
);
for
(
int
i
=
0
;
i
<
big_core_num
;
++
i
)
{
(
*
cache
)[
cpu_info
->
big_core_ids_
[
i
]]
=
big_core_cache_size
;
}
for
(
int
i
=
0
;
i
<
little_core_num
;
++
i
)
{
(
*
cache
)[
cpu_info
->
little_core_ids_
[
i
]]
=
little_core_cache_size
;
}
}
va_end
(
arg_ptr
);
}
void
set_arch_info
(
DeviceInfo
*
cpu_info
,
int
argc
,
...)
{
va_list
arg_ptr
;
va_start
(
arg_ptr
,
argc
);
int
core_num
=
cpu_info
->
compute_core_num_
;
cpu_info
->
archs_
.
resize
(
core_num
);
if
(
argc
==
1
)
{
ARMArch
arch
=
(
ARMArch
)
va_arg
(
arg_ptr
,
int
);
for
(
int
i
=
0
;
i
<
core_num
;
++
i
)
{
cpu_info
->
archs_
[
i
]
=
arch
;
}
}
else
{
ARMArch
big_core_arch
=
(
ARMArch
)
va_arg
(
arg_ptr
,
int
);
ARMArch
little_core_arch
=
(
ARMArch
)
va_arg
(
arg_ptr
,
int
);
int
big_core_num
=
cpu_info
->
big_core_ids_
.
size
();
int
little_core_num
=
cpu_info
->
little_core_ids_
.
size
();
for
(
int
i
=
0
;
i
<
big_core_num
;
++
i
)
{
cpu_info
->
archs_
[
cpu_info
->
big_core_ids_
[
i
]]
=
big_core_arch
;
}
for
(
int
i
=
0
;
i
<
little_core_num
;
++
i
)
{
cpu_info
->
archs_
[
cpu_info
->
little_core_ids_
[
i
]]
=
little_core_arch
;
}
}
va_end
(
arg_ptr
);
}
bool
get_cpu_info_from_name
(
DeviceInfo
*
cpu_info
,
std
::
string
hardware_name
)
{
/* Snapdragon */
if
(
hardware_name
.
find
(
"SDM845"
)
!=
std
::
string
::
npos
)
{
// 845
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
2
,
kA75
,
kA55
);
set_cache_info
(
cpu_info
,
0
,
1
,
32
*
1024
);
set_cache_info
(
cpu_info
,
1
,
2
,
256
*
1024
,
128
*
1024
);
set_cache_info
(
cpu_info
,
2
,
1
,
2048
*
1024
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"SDM710"
)
!=
std
::
string
::
npos
)
{
// 710
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
};
set_arch_info
(
cpu_info
,
2
,
kA75
,
kA55
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MSM8998"
)
!=
std
::
string
::
npos
)
{
// 835
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
2
,
kA73
,
kA53
);
set_cache_info
(
cpu_info
,
0
,
2
,
64
*
1024
);
set_cache_info
(
cpu_info
,
1
,
2
,
1024
*
1024
,
/*real cache size is 2M, while that will get bad performace
on conv3x3s1 or gemm, set to 1M or 512K*/
1024
*
1024
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MSM8996"
)
!=
std
::
string
::
npos
)
{
// 820
cpu_info
->
compute_core_num_
=
4
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
big_core_ids_
=
{
2
,
3
};
cpu_info
->
little_core_ids_
=
{
0
,
1
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
0
,
0
};
set_arch_info
(
cpu_info
,
1
,
kA72
);
set_cache_info
(
cpu_info
,
0
,
1
,
24
*
1024
);
set_cache_info
(
cpu_info
,
1
,
2
,
1024
*
1024
,
512
*
1024
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"SDM660"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"SDM636"
)
!=
std
::
string
::
npos
)
{
// 660, 636
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
1
,
kA73
);
set_cache_info
(
cpu_info
,
0
,
2
,
64
*
1024
,
32
*
1024
);
set_cache_info
(
cpu_info
,
1
,
1
,
1024
*
1024
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MSM8976"
)
!=
std
::
string
::
npos
)
{
// 652,653
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
2
,
kA72
,
kA53
);
set_cache_info
(
cpu_info
,
0
,
1
,
32
*
1024
);
set_cache_info
(
cpu_info
,
1
,
2
,
1024
*
1024
,
512
*
1024
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MSM8953"
)
!=
std
::
string
::
npos
)
{
// 625
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{};
cpu_info
->
cluster_ids_
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
1
,
kA53
);
set_cache_info
(
cpu_info
,
0
,
1
,
32
*
1024
);
set_cache_info
(
cpu_info
,
1
,
1
,
1024
*
1024
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MSM8939"
)
!=
std
::
string
::
npos
)
{
// 615
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
little_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
cluster_ids_
=
{
0
,
0
,
0
,
0
,
1
,
1
,
1
,
1
};
set_arch_info
(
cpu_info
,
1
,
kA53
);
set_cache_info
(
cpu_info
,
0
,
1
,
32
*
1024
);
set_cache_info
(
cpu_info
,
1
,
2
,
512
*
1024
,
256
*
1024
);
return
true
;
/* MediaTek */
}
else
if
(
hardware_name
.
find
(
"MT6797"
)
!=
std
::
string
::
npos
)
{
// X20/X23/X25/X27
cpu_info
->
compute_core_num_
=
10
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
};
cpu_info
->
big_core_ids_
=
{
8
,
9
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
};
set_arch_info
(
cpu_info
,
2
,
kA72
,
kA53
);
set_cache_info
(
cpu_info
,
0
,
1
,
32
*
1024
);
set_cache_info
(
cpu_info
,
1
,
2
,
1024
*
1024
,
512
*
1024
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MT6799"
)
!=
std
::
string
::
npos
)
{
// X30
cpu_info
->
compute_core_num_
=
10
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
};
cpu_info
->
big_core_ids_
=
{
8
,
9
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
};
set_arch_info
(
cpu_info
,
2
,
kA73
,
kA53
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MT6795"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6762"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6755T"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6755S"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6753"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6752"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6750"
)
!=
std
::
string
::
npos
)
{
// X10, P22, P15/P18, MT6753, MT6752/MT6752M, MT6750
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{};
cpu_info
->
cluster_ids_
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
1
,
kA53
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MT6758"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6757"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6763"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6755M"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6755"
)
!=
std
::
string
::
npos
)
{
// P30, P20/P25, P23, P10
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
1
,
kA53
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MT6771"
)
!=
std
::
string
::
npos
)
{
// P60
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
2
,
kA73
,
kA53
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MT6765"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6739"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6738"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6737"
)
!=
std
::
string
::
npos
)
{
// A22, MT6739, MT6738, MT6767
cpu_info
->
compute_core_num_
=
4
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
big_core_ids_
=
{
0
,
0
,
0
,
0
};
cpu_info
->
little_core_ids_
=
{};
cpu_info
->
cluster_ids_
=
{
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
1
,
kA53
);
return
true
;
}
return
false
;
}
size_t
arm_get_meminfo
()
{
size_t
get_mem_size
()
{
#ifdef LITE_WITH_LINUX
// get cpu count from /proc/cpuinfo
FILE
*
fp
=
fopen
(
"/proc/meminfo"
,
"rb"
);
if
(
!
fp
)
{
return
1
;
}
size_t
memsize
=
0
;
char
line
[
1024
];
while
(
!
feof
(
fp
))
{
...
...
@@ -589,52 +96,18 @@ size_t arm_get_meminfo() {
}
sscanf
(
s
,
"MemTotal: %d kB"
,
&
memsize
);
}
fclose
(
fp
);
return
memsize
;
#elif defined(TARGET_IOS)
// to be implemented
printf
(
"not implemented
\n
"
);
return
0
;
#endif
}
int
arm_get_cpucount
()
{
#ifdef LITE_WITH_LINUX
// get cpu count from /sys/devices/system/cpu/cpunum/uevent
int
max_cpu_count
=
20
;
int
count
=
0
;
for
(
int
i
=
0
;
i
<
max_cpu_count
;
++
i
)
{
char
path
[
256
];
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/uevent"
,
i
);
FILE
*
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
break
;
}
count
++
;
fclose
(
fp
);
}
if
(
count
<
1
)
{
count
=
1
;
}
return
count
;
#elif defined(TARGET_IOS)
int
count
=
0
;
size_t
len
=
sizeof
(
count
);
sysctlbyname
(
"hw.ncpu"
,
&
count
,
&
len
,
NULL
,
0
);
if
(
count
<
1
)
{
count
=
1
;
}
return
count
;
#else
return
1
;
#endif
return
0
;
}
void
arm_get_cpu_arch
(
std
::
vector
<
ARMArch
>*
archs
)
{
#ifdef LITE_WITH_LINUX
void
get_cpu_arch
(
std
::
vector
<
ARMArch
>*
archs
,
const
int
cpu_num
)
{
archs
->
clear
();
#ifdef LITE_WITH_LINUX
//! get CPU ARCH
FILE
*
fp
=
fopen
(
"/proc/cpuinfo"
,
"rb"
);
if
(
!
fp
)
{
...
...
@@ -668,6 +141,29 @@ void arm_get_cpu_arch(std::vector<ARMArch>* archs) {
case
0xd0a
:
archs
->
push_back
(
kA75
);
break
;
case
0xd40
:
archs
->
push_back
(
kA76
);
break
;
case
0x804
:
// 855
archs
->
push_back
(
kA76
);
break
;
case
0x805
:
// 855
archs
->
push_back
(
kA55
);
break
;
case
0x802
:
// 845
archs
->
push_back
(
kA75
);
break
;
case
0x803
:
// 845
archs
->
push_back
(
kA55
);
break
;
case
0x801
:
// 835
archs
->
push_back
(
kA73
);
break
;
case
0x800
:
// 835
archs
->
push_back
(
kA73
);
...
...
@@ -677,49 +173,31 @@ void arm_get_cpu_arch(std::vector<ARMArch>* archs) {
archs
->
push_back
(
kA72
);
break
;
default:
LOG
(
ERROR
)
<<
"
unknow type"
;
LOG
(
ERROR
)
<<
"
Unknow cpu arch: "
<<
arch_id
;
archs
->
push_back
(
kARMArch_UNKOWN
);
}
}
}
fclose
(
fp
);
int
cpu_count
=
arm_get_cpucount
();
if
(
archs
->
size
()
<
cpu_count
)
{
for
(
int
i
=
archs
->
size
();
i
<
cpu_count
;
++
i
)
{
if
(
archs
->
size
()
<
cpu_num
)
{
for
(
int
i
=
archs
->
size
();
i
<
cpu_num
;
++
i
)
{
archs
->
push_back
(
archs
->
at
(
i
-
1
));
}
}
#endif
#ifdef TARGET_IOS
int
cpu_count
=
arm_get_cpucount
();
for
(
int
i
=
0
;
i
<
cpu_count
;
++
i
)
{
#elif defined(TARGET_IOS)
for
(
int
i
=
0
;
i
<
cpu_num
;
++
i
)
{
archs
->
push_back
(
APPLE
);
}
#else
for
(
int
i
=
0
;
i
<
cpu_num
;
++
i
)
{
archs
->
push_back
(
kARMArch_UNKOWN
);
}
#endif
}
#ifdef LITE_WITH_LINUX
void
set_default_cache
(
DeviceInfo
*
dev
)
{
int
cpu_count
=
arm_get_cpucount
();
dev
->
L1_cache_
.
resize
(
cpu_count
);
dev
->
L2_cache_
.
resize
(
cpu_count
);
dev
->
L3_cache_
.
resize
(
cpu_count
);
#ifdef TARGET_IOS
for
(
int
i
=
0
;
i
<
cpu_count
;
++
i
)
{
dev
->
L1_cache_
[
i
]
=
64
*
1024
;
dev
->
L2_cache_
[
i
]
=
2048
*
1024
;
dev
->
L3_cache_
[
i
]
=
0
;
}
#else
for
(
int
i
=
0
;
i
<
cpu_count
;
++
i
)
{
dev
->
L1_cache_
[
i
]
=
32
*
1024
;
dev
->
L2_cache_
[
i
]
=
512
*
1024
;
dev
->
L3_cache_
[
i
]
=
0
;
}
#endif
}
std
::
string
arm_get_cpu_name
()
{
std
::
string
get_cpu_name
()
{
FILE
*
fp
=
fopen
(
"/proc/cpuinfo"
,
"rb"
);
if
(
!
fp
)
{
return
""
;
...
...
@@ -739,122 +217,163 @@ std::string arm_get_cpu_name() {
return
""
;
}
int
get_max_freq_khz
(
int
cpuid
)
{
void
get_cpu_max_min_freq
(
int
cpu_id
,
int
*
max_freq
,
int
*
min_freq
)
{
*
max_freq
=
0
;
*
min_freq
=
0
;
// first try, for all possible cpu
char
path
[
256
];
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpufreq/stats/cpu%d/time_in_state"
,
cpuid
);
"/sys/devices/system/cpu/cpufreq/stats/cpu%d/time_in_state"
,
cpu_id
);
FILE
*
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
// second try, for online cpu
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/cpufreq/stats/time_in_state"
,
cpuid
);
cpu
_
id
);
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
// third try, for online cpu
// get max_freq
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq"
,
cpuid
);
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq"
,
cpu_id
);
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
return
-
1
;
return
;
}
int
max_freq_khz
=
-
1
;
fscanf
(
fp
,
"%d"
,
&
max_freq_khz
);
fscanf
(
fp
,
"%d"
,
max_freq
);
fclose
(
fp
);
return
max_freq_khz
;
// get min_freq
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_min_freq"
,
cpu_id
);
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
return
;
}
fscanf
(
fp
,
"%d"
,
min_freq
);
fclose
(
fp
);
return
;
}
}
int
max_freq_khz
=
0
;
*
min_freq
=
std
::
numeric_limits
<
int
>::
max
();
while
(
!
feof
(
fp
))
{
int
freq
_khz
=
0
;
int
nscan
=
fscanf
(
fp
,
"%d %*d"
,
&
freq
_khz
);
int
freq
=
0
;
int
nscan
=
fscanf
(
fp
,
"%d %*d"
,
&
freq
);
if
(
nscan
!=
1
)
{
break
;
}
if
(
freq_khz
>
max_freq_khz
)
{
max_freq_khz
=
freq_khz
;
if
(
freq
>
*
max_freq
)
{
*
max_freq
=
freq
;
}
if
(
freq
<
*
min_freq
)
{
*
min_freq
=
freq
;
}
}
fclose
(
fp
);
return
max_freq_khz
;
}
int
arm_sort_cpuid_by_max_frequency
(
int
cpu_count
,
std
::
vector
<
int
>*
cpuids
,
const
std
::
vector
<
int
>&
cpu_freq
,
std
::
vector
<
int
>*
cluster_ids
)
{
if
(
cpu_count
==
0
)
{
return
0
;
void
sort_cpuid_by_max_freq
(
const
std
::
vector
<
int
>&
max_freqs
,
std
::
vector
<
int
>*
cpu_ids
,
std
::
vector
<
int
>*
cluster_ids
)
{
int
cpu_num
=
max_freqs
.
size
();
if
(
cpu_num
==
0
)
{
return
;
}
cpuids
->
resize
(
cpu_count
);
cluster_ids
->
resize
(
cpu_count
);
for
(
int
i
=
0
;
i
<
cpu_count
;
i
++
)
{
cpuids
->
at
(
i
)
=
i
;
cpu_ids
->
resize
(
cpu_num
);
cluster_ids
->
resize
(
cpu_num
);
for
(
int
i
=
0
;
i
<
cpu_num
;
i
++
)
{
cpu_ids
->
at
(
i
)
=
i
;
}
// sort cpuid as big core first
// simple bubble sort
for
(
int
i
=
0
;
i
<
cpu_count
;
i
++
)
{
for
(
int
j
=
i
+
1
;
j
<
cpu_count
;
j
++
)
{
if
(
cpu_freq
[
i
]
<
cpu_freq
[
j
])
{
for
(
int
i
=
0
;
i
<
cpu_num
;
i
++
)
{
for
(
int
j
=
i
+
1
;
j
<
cpu_num
;
j
++
)
{
if
(
max_freqs
[
i
]
<
max_freqs
[
j
])
{
// swap
int
tmp
=
cpuids
->
at
(
i
);
cpu
ids
->
at
(
i
)
=
cpu
ids
->
at
(
j
);
cpuids
->
at
(
j
)
=
tmp
;
int
tmp
=
cpu
_
ids
->
at
(
i
);
cpu
_ids
->
at
(
i
)
=
cpu_
ids
->
at
(
j
);
cpu
_
ids
->
at
(
j
)
=
tmp
;
}
}
}
// SMP
int
mid_max_freq
_khz
=
(
cpu_freq
[
cpuids
->
at
(
0
)]
+
cpu_freq
[
cpuids
->
at
(
cpu_count
-
1
)])
/
2
;
int
mid_max_freq
=
(
max_freqs
[
cpu_ids
->
at
(
0
)]
+
max_freqs
[
cpu_ids
->
at
(
cpu_num
-
1
)])
/
2
;
for
(
int
i
=
0
;
i
<
cpu_
count
;
i
++
)
{
cpuids
->
at
(
i
)
=
i
;
if
(
cpu_freq
[
i
]
>=
mid_max_freq_khz
)
{
for
(
int
i
=
0
;
i
<
cpu_
num
;
i
++
)
{
cpu
_
ids
->
at
(
i
)
=
i
;
if
(
max_freqs
[
i
]
>=
mid_max_freq
)
{
cluster_ids
->
at
(
i
)
=
0
;
}
else
{
cluster_ids
->
at
(
i
)
=
1
;
}
}
return
0
;
}
int
check_online
(
const
std
::
vector
<
int
>&
core_ids
)
{
if
(
core_ids
.
size
()
==
0
)
{
return
0
;
void
get_cpu_cache_size
(
int
cpu_id
,
int
*
l1_cache_size
,
int
*
l2_cache_size
,
int
*
l3_cache_size
)
{
int
max_cache_idx_num
=
10
;
*
l1_cache_size
=
DEFAULT_L1_CACHE_SIZE
;
*
l2_cache_size
=
DEFAULT_L2_CACHE_SIZE
;
*
l3_cache_size
=
DEFAULT_L3_CACHE_SIZE
;
for
(
int
i
=
0
;
i
<
max_cache_idx_num
;
i
++
)
{
char
path
[
256
];
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/cache/index%d/level"
,
cpu_id
,
i
);
FILE
*
fp
=
fopen
(
path
,
"rb"
);
if
(
fp
)
{
int
level
=
-
1
;
fscanf
(
fp
,
"%d"
,
&
level
);
fclose
(
fp
);
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/cache/index%d/size"
,
cpu_id
,
i
);
fp
=
fopen
(
path
,
"rb"
);
if
(
fp
)
{
int
size
=
-
1
;
fscanf
(
fp
,
"%d"
,
&
size
);
fclose
(
fp
);
if
(
size
>=
0
)
{
if
(
level
==
1
)
{
*
l1_cache_size
=
size
*
1024
;
}
else
if
(
level
==
2
)
{
*
l2_cache_size
=
size
*
1024
;
}
else
if
(
level
==
3
)
{
*
l3_cache_size
=
size
*
1024
;
}
}
}
}
}
}
bool
check_cpu_online
(
const
std
::
vector
<
int
>&
cpu_ids
)
{
if
(
cpu_ids
.
size
()
==
0
)
{
return
false
;
}
char
path
[
256
];
int
online
=
1
;
for
(
int
i
=
0
;
i
<
c
ore
_ids
.
size
();
++
i
)
{
bool
all_online
=
true
;
for
(
int
i
=
0
;
i
<
c
pu
_ids
.
size
();
++
i
)
{
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/online"
,
c
ore
_ids
[
i
]);
c
pu
_ids
[
i
]);
FILE
*
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
return
0
;
int
is_online
=
0
;
if
(
fp
)
{
fscanf
(
fp
,
"%d"
,
&
is_online
);
fclose
(
fp
);
}
else
{
LOG
(
ERROR
)
<<
"Failed to query the online statue of CPU id:"
<<
cpu_ids
[
i
];
}
if
(
is_online
==
0
)
{
all_online
=
false
;
LOG
(
ERROR
)
<<
"CPU id:"
<<
cpu_ids
[
i
]
<<
" is offine"
;
}
int
cur_online
=
0
;
fscanf
(
fp
,
"%d"
,
&
cur_online
);
online
&=
cur_online
;
fclose
(
fp
);
}
return
online
;
return
all_
online
;
}
int
set_sched_affinity
(
const
std
::
vector
<
int
>&
cpuids
)
{
int
set_sched_affinity
(
const
std
::
vector
<
int
>&
cpu
_
ids
)
{
// #define CPU_SETSIZE 1024
// #define __NCPUBITS (8 * sizeof (unsigned long))
// typedef struct
...
...
@@ -870,20 +389,569 @@ int set_sched_affinity(const std::vector<int>& cpuids) {
#endif
cpu_set_t
mask
;
CPU_ZERO
(
&
mask
);
for
(
int
i
=
0
;
i
<
cpu
ids
.
size
();
i
++
)
{
CPU_SET
(
cpuids
[
i
],
&
mask
);
for
(
int
i
=
0
;
i
<
cpu
_ids
.
size
();
++
i
)
{
CPU_SET
(
cpu
_
ids
[
i
],
&
mask
);
}
int
syscallret
=
syscall
(
__NR_sched_setaffinity
,
pid
,
sizeof
(
mask
),
&
mask
);
if
(
syscallret
)
{
LOG
(
ERROR
)
<<
"syscall error "
<<
syscallret
;
return
-
1
;
}
return
0
;
}
bool
bind_threads
(
const
std
::
vector
<
int
>
cpu_ids
)
{
#ifdef ARM_WITH_OMP
int
thread_num
=
cpu_ids
.
size
();
omp_set_num_threads
(
thread_num
);
std
::
vector
<
int
>
ssarets
;
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
ssarets
.
push_back
(
0
);
}
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
thread_num
;
i
++
)
{
ssarets
[
i
]
=
set_sched_affinity
(
cpu_ids
);
}
for
(
int
i
=
0
;
i
<
thread_num
;
i
++
)
{
if
(
ssarets
[
i
]
!=
0
)
{
LOG
(
ERROR
)
<<
"Set cpu affinity failed, core id: "
<<
cpu_ids
[
i
];
return
false
;
}
}
#else // ARM_WITH_OMP
std
::
vector
<
int
>
first_cpu_id
;
first_cpu_id
.
push_back
(
cpu_ids
[
0
]);
int
ssaret
=
set_sched_affinity
(
first_cpu_id
);
if
(
ssaret
!=
0
)
{
LOG
(
ERROR
)
<<
"Set cpu affinity failed, core id: "
<<
cpu_ids
[
0
];
return
false
;
}
#endif // ARM_WITH_OMP
}
#endif // LITE_WITH_LINUX
// cache_id : 0 -> L1, 1 -> L2, 2 -> L3
void
DeviceInfo
::
SetCacheInfo
(
int
cache_id
,
int
argc
,
...)
{
va_list
arg_ptr
;
va_start
(
arg_ptr
,
argc
);
std
::
vector
<
int
>*
cache
;
switch
(
cache_id
)
{
case
0
:
cache
=
&
L1_cache_
;
break
;
case
1
:
cache
=
&
L2_cache_
;
break
;
case
2
:
cache
=
&
L3_cache_
;
break
;
default:
break
;
}
cache
->
resize
(
core_num_
);
if
(
argc
==
1
)
{
int
cache_size
=
va_arg
(
arg_ptr
,
int
);
for
(
int
i
=
0
;
i
<
core_num_
;
++
i
)
{
(
*
cache
)[
i
]
=
cache_size
;
}
}
else
{
int
big_core_num
=
big_core_ids_
.
size
();
int
little_core_num
=
little_core_ids_
.
size
();
int
big_core_cache_size
=
va_arg
(
arg_ptr
,
int
);
int
little_core_cache_size
=
va_arg
(
arg_ptr
,
int
);
for
(
int
i
=
0
;
i
<
big_core_num
;
++
i
)
{
(
*
cache
)[
big_core_ids_
[
i
]]
=
big_core_cache_size
;
}
for
(
int
i
=
0
;
i
<
little_core_num
;
++
i
)
{
(
*
cache
)[
little_core_ids_
[
i
]]
=
little_core_cache_size
;
}
}
va_end
(
arg_ptr
);
}
void
DeviceInfo
::
SetArchInfo
(
int
argc
,
...)
{
va_list
arg_ptr
;
va_start
(
arg_ptr
,
argc
);
archs_
.
resize
(
core_num_
);
if
(
argc
==
1
)
{
ARMArch
arch
=
(
ARMArch
)
va_arg
(
arg_ptr
,
int
);
for
(
int
i
=
0
;
i
<
core_num_
;
++
i
)
{
archs_
[
i
]
=
arch
;
}
}
else
{
ARMArch
big_core_arch
=
(
ARMArch
)
va_arg
(
arg_ptr
,
int
);
ARMArch
little_core_arch
=
(
ARMArch
)
va_arg
(
arg_ptr
,
int
);
int
big_core_num
=
big_core_ids_
.
size
();
int
little_core_num
=
little_core_ids_
.
size
();
for
(
int
i
=
0
;
i
<
big_core_num
;
++
i
)
{
archs_
[
big_core_ids_
[
i
]]
=
big_core_arch
;
}
for
(
int
i
=
0
;
i
<
little_core_num
;
++
i
)
{
archs_
[
little_core_ids_
[
i
]]
=
little_core_arch
;
}
}
va_end
(
arg_ptr
);
}
bool
DeviceInfo
::
SetCPUInfoByName
()
{
/* Snapdragon */
if
(
dev_name_
.
find
(
"SM8150"
)
!=
std
::
string
::
npos
)
{
// 855
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
SetArchInfo
(
2
,
kA76
,
kA55
);
SetCacheInfo
(
0
,
2
,
64
*
1024
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
256
*
1024
,
128
*
1024
);
SetCacheInfo
(
2
,
1
,
2048
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"SDM845"
)
!=
std
::
string
::
npos
)
{
// 845
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
SetArchInfo
(
2
,
kA75
,
kA55
);
SetCacheInfo
(
0
,
2
,
64
*
1024
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
256
*
1024
,
128
*
1024
);
SetCacheInfo
(
2
,
1
,
2048
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"SDM710"
)
!=
std
::
string
::
npos
)
{
// 710
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
};
SetArchInfo
(
2
,
kA75
,
kA55
);
SetCacheInfo
(
0
,
2
,
64
*
1024
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
256
*
1024
,
128
*
1024
);
SetCacheInfo
(
2
,
1
,
1024
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MSM8998"
)
!=
std
::
string
::
npos
)
{
// 835
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
SetArchInfo
(
2
,
kA73
,
kA53
);
SetCacheInfo
(
0
,
2
,
64
*
1024
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
1024
*
1024
,
/*real cache size is 2M, while that will get bad performace
on conv3x3s1 or gemm, set to 1M or 512K*/
1024
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MSM8996"
)
!=
std
::
string
::
npos
)
{
// 820
core_num_
=
4
;
core_ids_
=
{
0
,
1
,
2
,
3
};
big_core_ids_
=
{
2
,
3
};
little_core_ids_
=
{
0
,
1
};
cluster_ids_
=
{
1
,
1
,
0
,
0
};
SetArchInfo
(
1
,
kA72
);
SetCacheInfo
(
0
,
1
,
24
*
1024
);
SetCacheInfo
(
1
,
2
,
1024
*
1024
,
512
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"SDM660"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"SDM636"
)
!=
std
::
string
::
npos
)
{
// 660, 636
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
SetArchInfo
(
1
,
kA73
);
SetCacheInfo
(
0
,
2
,
64
*
1024
,
32
*
1024
);
SetCacheInfo
(
1
,
1
,
1024
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MSM8976"
)
!=
std
::
string
::
npos
)
{
// 652,653
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
SetArchInfo
(
2
,
kA72
,
kA53
);
SetCacheInfo
(
0
,
1
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
1024
*
1024
,
512
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MSM8953"
)
!=
std
::
string
::
npos
)
{
// 625
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
little_core_ids_
=
{};
cluster_ids_
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
SetArchInfo
(
1
,
kA53
);
SetCacheInfo
(
0
,
1
,
32
*
1024
);
SetCacheInfo
(
1
,
1
,
1024
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MSM8939"
)
!=
std
::
string
::
npos
)
{
// 615
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
0
,
1
,
2
,
3
};
little_core_ids_
=
{
4
,
5
,
6
,
7
};
cluster_ids_
=
{
0
,
0
,
0
,
0
,
1
,
1
,
1
,
1
};
SetArchInfo
(
1
,
kA53
);
SetCacheInfo
(
0
,
1
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
512
*
1024
,
256
*
1024
);
return
true
;
/* MediaTek */
}
else
if
(
dev_name_
.
find
(
"MT6797"
)
!=
std
::
string
::
npos
)
{
// X20/X23/X25/X27
core_num_
=
10
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
};
big_core_ids_
=
{
8
,
9
};
little_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
};
SetArchInfo
(
2
,
kA72
,
kA53
);
SetCacheInfo
(
0
,
1
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
1024
*
1024
,
512
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MT6799"
)
!=
std
::
string
::
npos
)
{
// X30
core_num_
=
10
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
};
big_core_ids_
=
{
8
,
9
};
little_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
};
SetArchInfo
(
2
,
kA73
,
kA53
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MT6795"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6762"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6755T"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6755S"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6753"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6752"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6750"
)
!=
std
::
string
::
npos
)
{
// X10, P22, P15/P18, MT6753, MT6752/MT6752M, MT6750
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
little_core_ids_
=
{};
cluster_ids_
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
SetArchInfo
(
1
,
kA53
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MT6758"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6757"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6763"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6755M"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6755"
)
!=
std
::
string
::
npos
)
{
// P30, P20/P25, P23, P10
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
SetArchInfo
(
1
,
kA53
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MT6771"
)
!=
std
::
string
::
npos
)
{
// P60
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
SetArchInfo
(
2
,
kA73
,
kA53
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MT6765"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6739"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6738"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6737"
)
!=
std
::
string
::
npos
)
{
// A22, MT6739, MT6738, MT6767
core_num_
=
4
;
core_ids_
=
{
0
,
1
,
2
,
3
};
big_core_ids_
=
{
0
,
1
,
2
,
3
};
little_core_ids_
=
{};
cluster_ids_
=
{
0
,
0
,
0
,
0
};
SetArchInfo
(
1
,
kA53
);
return
true
;
}
return
false
;
}
void
DeviceInfo
::
SetCPUInfoByProb
()
{
#ifdef LITE_WITH_LINUX
// get big.LITTLE cores by sorting CPU frequency
sort_cpuid_by_max_freq
(
max_freqs_
,
&
core_ids_
,
&
cluster_ids_
);
big_core_ids_
.
clear
();
little_core_ids_
.
clear
();
for
(
int
i
=
0
;
i
<
cluster_ids_
.
size
();
++
i
)
{
if
(
cluster_ids_
[
i
]
==
0
)
{
big_core_ids_
.
push_back
(
core_ids_
[
i
]);
}
else
{
little_core_ids_
.
push_back
(
core_ids_
[
i
]);
}
}
// get l1, l2, l3 cache size for each core
for
(
int
i
=
0
;
i
<
core_num_
;
i
++
)
{
get_cpu_cache_size
(
i
,
&
(
L1_cache_
[
i
]),
&
(
L2_cache_
[
i
]),
&
(
L3_cache_
[
i
]));
}
#endif // LITE_WITH_LINUX
}
void
DeviceInfo
::
RequestPowerFullMode
(
const
int
thread_num
)
{
int
big_core_size
=
big_core_ids_
.
size
();
int
little_core_size
=
little_core_ids_
.
size
();
active_ids_
.
clear
();
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
if
(
i
<
big_core_size
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
else
if
(
i
<
big_core_size
+
little_core_size
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
-
big_core_size
]);
}
}
mode_
=
LITE_POWER_FULL
;
}
void
DeviceInfo
::
RequestPowerHighMode
(
const
int
thread_num
)
{
int
big_core_size
=
big_core_ids_
.
size
();
int
little_core_size
=
little_core_ids_
.
size
();
active_ids_
.
clear
();
if
(
big_core_size
>
0
)
{
mode_
=
LITE_POWER_HIGH
;
if
(
thread_num
>
big_core_size
)
{
LOG
(
ERROR
)
<<
"Request thread num: "
<<
thread_num
<<
", exceed the big cores size: "
<<
big_core_size
<<
", truncate thread num to "
<<
big_core_size
;
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
}
else
{
mode_
=
LITE_POWER_LOW
;
LOG
(
ERROR
)
<<
"HIGH POWER MODE is not support, switch to little cores."
;
if
(
thread_num
>
little_core_size
)
{
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
}
}
void
DeviceInfo
::
RequestPowerLowMode
(
const
int
thread_num
)
{
int
big_core_size
=
big_core_ids_
.
size
();
int
little_core_size
=
little_core_ids_
.
size
();
active_ids_
.
clear
();
if
(
little_core_size
>
0
)
{
mode_
=
LITE_POWER_LOW
;
if
(
thread_num
>
little_core_size
)
{
LOG
(
WARNING
)
<<
"Request thread num: "
<<
thread_num
<<
", exceed the little cores size: "
<<
little_core_size
<<
", truncate thread num to "
<<
little_core_size
;
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
i
++
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
}
else
{
mode_
=
LITE_POWER_HIGH
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores"
;
if
(
thread_num
>
big_core_size
)
{
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
i
++
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
}
}
void
DeviceInfo
::
RequestPowerNoBindMode
(
const
int
thread_num
)
{
active_ids_
.
clear
();
for
(
int
i
=
0
;
i
<
thread_num
;
i
++
)
{
active_ids_
.
push_back
(
0
);
}
mode_
=
LITE_POWER_NO_BIND
;
}
void
DeviceInfo
::
RequestPowerRandHighMode
(
const
int
shift_num
,
const
int
thread_num
)
{
int
big_core_size
=
big_core_ids_
.
size
();
int
little_core_size
=
little_core_ids_
.
size
();
if
(
big_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_HIGH
;
if
(
thread_num
>
big_core_size
)
{
LOG
(
WARNING
)
<<
"Request thread num: "
<<
thread_num
<<
", exceed the big cores size: "
<<
big_core_size
<<
", truncate thread num to "
<<
big_core_size
;
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[(
i
+
shift_num
)
%
big_core_size
]);
}
}
}
else
{
mode_
=
LITE_POWER_LOW
;
LOG
(
WARNING
)
<<
"HIGH POWER MODE is not support, switch to little cores."
;
if
(
thread_num
>
little_core_size
)
{
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
}
}
void
DeviceInfo
::
RequestPowerRandLowMode
(
const
int
shift_num
,
const
int
thread_num
)
{
int
big_core_size
=
big_core_ids_
.
size
();
int
little_core_size
=
little_core_ids_
.
size
();
active_ids_
.
clear
();
if
(
little_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_LOW
;
if
(
thread_num
>
little_core_size
)
{
LOG
(
WARNING
)
<<
"Request thread num: "
<<
thread_num
<<
", exceed the little cores size: "
<<
little_core_size
<<
", truncate thread num to "
<<
little_core_size
;
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[(
i
+
shift_num
)
%
little_core_size
]);
}
}
}
else
{
mode_
=
LITE_POWER_HIGH
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores."
;
if
(
thread_num
>
big_core_size
)
{
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
}
}
int
DeviceInfo
::
Setup
()
{
core_num_
=
get_cpu_num
();
mem_size_
=
get_mem_size
();
get_cpu_arch
(
&
archs_
,
core_num_
);
// set defalut CPU info
SetCacheInfo
(
0
,
DEFAULT_L1_CACHE_SIZE
);
SetCacheInfo
(
1
,
DEFAULT_L2_CACHE_SIZE
);
SetCacheInfo
(
2
,
DEFAULT_L3_CACHE_SIZE
);
#ifdef LITE_WITH_LINUX
// get max&min freq
max_freqs_
.
resize
(
core_num_
);
min_freqs_
.
resize
(
core_num_
);
for
(
int
i
=
0
;
i
<
core_num_
;
++
i
)
{
int
max_freq
,
min_freq
;
get_cpu_max_min_freq
(
i
,
&
max_freq
,
&
min_freq
);
max_freqs_
[
i
]
=
max_freq
/
1000
;
min_freqs_
[
i
]
=
min_freq
/
1000
;
}
// get cache size and big.LITTLE core ids
dev_name_
=
get_cpu_name
();
if
(
!
SetCPUInfoByName
())
{
SetCPUInfoByProb
();
}
// output info
LOG
(
INFO
)
<<
"ARM multiprocessors name: "
<<
dev_name_
;
LOG
(
INFO
)
<<
"ARM multiprocessors number: "
<<
core_num_
;
for
(
int
i
=
0
;
i
<
core_num_
;
++
i
)
{
LOG
(
INFO
)
<<
"ARM multiprocessors ID: "
<<
core_ids_
[
i
]
<<
", max freq: "
<<
max_freqs_
[
i
]
<<
", min freq: "
<<
min_freqs_
[
i
]
<<
", cluster ID: "
<<
cluster_ids_
[
core_ids_
[
i
]]
<<
", CPU ARCH: A"
<<
archs_
[
i
];
}
LOG
(
INFO
)
<<
"L1 DataCache size is: "
;
for
(
int
i
=
0
;
i
<
core_num_
;
++
i
)
{
LOG
(
INFO
)
<<
L1_cache_
[
i
]
/
1024
<<
" KB"
;
}
LOG
(
INFO
)
<<
"L2 Cache size is: "
;
for
(
int
i
=
0
;
i
<
core_num_
;
++
i
)
{
LOG
(
INFO
)
<<
L2_cache_
[
i
]
/
1024
<<
" KB"
;
}
LOG
(
INFO
)
<<
"Total memory: "
<<
mem_size_
<<
"KB"
;
#endif
// set default run mode
SetRunMode
(
LITE_POWER_NO_BIND
,
1
);
// use single thread by default
return
0
;
}
void
DeviceInfo
::
SetRunMode
(
PowerMode
mode
,
int
thread_num
)
{
#ifdef ARM_WITH_OMP
thread_num
=
std
::
min
(
thread_num
,
core_num_
);
#else
thread_num
=
1
;
// force thread_num to 1 if OpenMP is disabled
#endif
#ifdef LITE_WITH_LINUX
int
big_core_size
=
big_core_ids_
.
size
();
int
little_core_size
=
little_core_ids_
.
size
();
int
big_little_core_size
=
big_core_size
+
little_core_size
;
thread_num
=
std
::
min
(
thread_num
,
big_little_core_size
);
count_
++
;
int
shift_num
=
(
count_
/
10
)
%
big_core_size
;
switch
(
mode
)
{
case
LITE_POWER_FULL
:
RequestPowerFullMode
(
thread_num
);
break
;
case
LITE_POWER_HIGH
:
RequestPowerHighMode
(
thread_num
);
break
;
case
LITE_POWER_LOW
:
RequestPowerLowMode
(
thread_num
);
break
;
case
LITE_POWER_NO_BIND
:
RequestPowerNoBindMode
(
thread_num
);
break
;
case
LITE_POWER_RAND_HIGH
:
RequestPowerRandHighMode
(
shift_num
,
thread_num
);
break
;
case
LITE_POWER_RAND_LOW
:
RequestPowerRandLowMode
(
shift_num
,
thread_num
);
break
;
default:
LOG
(
FATAL
)
<<
"Unsupported power mode: "
<<
mode
;
break
;
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
#ifdef ARM_WITH_OMP
omp_set_num_threads
(
active_ids_
.
size
());
#endif
if
(
mode_
!=
LITE_POWER_NO_BIND
)
{
if
(
check_cpu_online
(
active_ids_
))
{
bind_threads
(
active_ids_
);
}
else
{
LOG
(
WARNING
)
<<
"Some cores are offline, switch to NO BIND MODE"
;
mode_
=
LITE_POWER_NO_BIND
;
}
}
#else // LITE_WITH_LINUX
// only LITE_POWER_NO_BIND is supported in other OS
RequestPowerNoBindMode
(
thread_num
);
#ifdef ARM_WITH_OMP
omp_set_num_threads
(
active_ids_
.
size
());
#endif
#endif // LITE_WITH_LINUX
//! alloc memory for sgemm in this context
workspace_
.
Resize
(
{
static_cast
<
int64_t
>
(
L2_cache_
[
active_ids_
[
0
]]
/
sizeof
(
float
))});
arch_
=
archs_
[
active_ids_
[
0
]];
}
void
DeviceInfo
::
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
)
{
SetCacheInfo
(
0
,
l1size
);
SetCacheInfo
(
1
,
l2size
);
SetCacheInfo
(
2
,
l3size
);
workspace_
.
Resize
({
2
*
(
l1size
+
l2size
)});
}
bool
DeviceInfo
::
ExtendWorkspace
(
DDimLite
dims
)
{
auto
count
=
dims
.
product
();
auto
old
=
workspace_
.
dims
();
if
(
count
==
old
.
product
())
{
return
false
;
}
workspace_
.
Resize
({
static_cast
<
int64_t
>
(
count
+
L2_cache_
[
active_ids_
[
0
]]
/
sizeof
(
float
))});
return
true
;
}
#endif // LITE_WITH_ARM
...
...
paddle/fluid/lite/core/cpu_info.h
浏览文件 @
ce46ef22
...
...
@@ -14,6 +14,7 @@
#pragma once
#include <cstdarg>
#include <string>
#include <vector>
#include "paddle/fluid/lite/core/lite_tensor.h"
...
...
@@ -47,92 +48,73 @@ typedef enum {
class
DeviceInfo
{
public:
int
idx_
;
int
max_freq_
;
int
min_freq_
;
int
generate_arch_
;
int
compute_core_num_
;
int
max_memory_
;
int
sharemem_size_
;
std
::
string
device_name_
;
std
::
string
compute_ability_
;
std
::
vector
<
int
>
L1_cache_
;
std
::
vector
<
int
>
L2_cache_
;
std
::
vector
<
int
>
L3_cache_
;
std
::
vector
<
int
>
core_ids_
;
std
::
vector
<
int
>
big_core_ids_
;
std
::
vector
<
int
>
little_core_ids_
;
std
::
vector
<
int
>
cluster_ids_
;
std
::
vector
<
ARMArch
>
archs_
;
ARMArch
arch_
;
// LITE_POWER_HIGH stands for using big cores,
// LITE_POWER_LOW stands for using small core,
// LITE_POWER_FULL stands for using all cores
PowerMode
mode_
;
std
::
vector
<
int
>
active_ids_
;
TensorLite
workspace_
;
int64_t
count_
{
0
};
static
DeviceInfo
&
Global
()
{
static
auto
*
x
=
new
DeviceInfo
;
return
*
x
;
}
static
void
Init
()
{
auto
&
info
=
Global
();
InitInternal
(
&
info
)
;
static
int
Init
()
{
static
int
ret
=
Global
().
Setup
();
return
ret
;
}
void
SetRunMode
(
PowerMode
mode
,
int
threads
);
int
Setup
();
void
SetRunMode
(
PowerMode
mode
,
int
thread_num
);
void
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
);
void
SetArch
(
ARMArch
arch
)
{
arch_
=
arch
;
}
void
BindDev
();
PowerMode
mode
()
const
{
return
mode_
;
}
int
threads
()
const
{
return
active_ids_
.
size
();
}
ARMArch
arch
()
const
{
return
arch_
;
}
int
l1_cache_size
()
const
{
return
L1_cache_
[
active_ids_
[
0
]];
}
int
l2_cache_size
()
const
{
return
L2_cache_
[
active_ids_
[
0
]];
}
int
l3_cache_size
()
const
{
return
L3_cache_
[
active_ids_
[
0
]];
}
template
<
typename
T
>
T
*
workspace_data
()
{
return
workspace_
.
mutable_data
<
T
>
();
}
int
l1_cache_size
()
const
{
return
L1_cache_
[
active_ids_
[
0
]];
}
int
l2_cache_size
()
const
{
return
L2_cache_
[
active_ids_
[
0
]];
}
int
l3_cache_size
()
const
{
return
L3_cache_
[
active_ids_
[
0
]];
}
bool
ExtendWorkspace
(
DDimLite
dims
);
private:
DeviceInfo
()
=
default
;
static
void
InitInternal
(
DeviceInfo
*
dev
);
};
size_t
arm_get_meminfo
();
int
arm_get_cpucount
();
void
arm_get_cpu_arch
(
std
::
vector
<
ARMArch
>*
archs
);
bool
get_cpu_info_from_name
(
DeviceInfo
*
cpu_info
,
std
::
string
hardware_name
);
int
core_num_
;
std
::
vector
<
int
>
max_freqs_
;
std
::
vector
<
int
>
min_freqs_
;
int
mem_size_
;
std
::
string
dev_name_
;
#ifdef LITE_WITH_LINUX
void
set_default_cache
(
DeviceInfo
*
dev
);
std
::
string
arm_get_cpu_name
();
std
::
vector
<
int
>
L1_cache_
;
std
::
vector
<
int
>
L2_cache_
;
std
::
vector
<
int
>
L3_cache_
;
std
::
vector
<
int
>
core_ids_
;
std
::
vector
<
int
>
big_core_ids_
;
std
::
vector
<
int
>
little_core_ids_
;
std
::
vector
<
int
>
cluster_ids_
;
std
::
vector
<
ARMArch
>
archs_
;
int
get_max_freq_khz
(
int
cpuid
);
ARMArch
arch_
;
// LITE_POWER_HIGH stands for using big cores,
// LITE_POWER_LOW stands for using small core,
// LITE_POWER_FULL stands for using all cores
PowerMode
mode_
;
std
::
vector
<
int
>
active_ids_
;
TensorLite
workspace_
;
int64_t
count_
{
0
};
int
arm_sort_cpuid_by_max_frequency
(
int
cpu_count
,
std
::
vector
<
int
>*
cpuids
,
const
std
::
vector
<
int
>&
cpu_freq
,
std
::
vector
<
int
>*
cluster_ids
);
int
check_online
(
const
std
::
vector
<
int
>&
core_ids
);
int
set_sched_affinity
(
const
std
::
vector
<
int
>&
cpuids
);
void
SetCacheInfo
(
int
cache_id
,
int
argc
,
...);
void
SetArchInfo
(
int
argc
,
...);
bool
SetCPUInfoByName
();
void
SetCPUInfoByProb
();
void
RequestPowerFullMode
(
const
int
thread_num
);
void
RequestPowerHighMode
(
const
int
thread_num
);
void
RequestPowerLowMode
(
const
int
thread_num
);
void
RequestPowerNoBindMode
(
const
int
thread_num
);
void
RequestPowerRandHighMode
(
const
int
shift_num
,
const
int
thread_num
);
void
RequestPowerRandLowMode
(
const
int
shift_num
,
const
int
thread_num
);
#endif // LITE_WITH_LINUX
DeviceInfo
()
=
default
;
};
#endif // LITE_WITH_ARM
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录