Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
ce46ef22
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
“4d8345e3ac0ba79a17359a72e940ade284c0b1a9”上不存在“paddle/phi/kernels/cpu/shape_kernel.cc”
提交
ce46ef22
编写于
6月 20, 2019
作者:
H
hong19860320
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
ARM cpu_info refine
test=develop
上级
251255bc
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
773 addition
and
724 deletion
+773
-724
paddle/fluid/lite/core/context.h
paddle/fluid/lite/core/context.h
+4
-5
paddle/fluid/lite/core/cpu_info.cc
paddle/fluid/lite/core/cpu_info.cc
+726
-658
paddle/fluid/lite/core/cpu_info.h
paddle/fluid/lite/core/cpu_info.h
+43
-61
未找到文件。
paddle/fluid/lite/core/context.h
浏览文件 @
ce46ef22
...
@@ -67,7 +67,7 @@ class Context<TargetType::kARM> {
...
@@ -67,7 +67,7 @@ class Context<TargetType::kARM> {
ARMContext
&
operator
=
(
const
ARMContext
&
ctx
)
{}
ARMContext
&
operator
=
(
const
ARMContext
&
ctx
)
{}
// NOTE: InitOnce should only be used by ContextScheduler
// NOTE: InitOnce should only be used by ContextScheduler
void
InitOnce
()
{}
void
InitOnce
()
{
DeviceInfo
::
Init
();
}
void
CopyShared
(
const
ARMContext
*
ctx
)
{}
void
CopyShared
(
const
ARMContext
*
ctx
)
{}
...
@@ -78,20 +78,19 @@ class Context<TargetType::kARM> {
...
@@ -78,20 +78,19 @@ class Context<TargetType::kARM> {
return
DeviceInfo
::
Global
().
SetCache
(
l1size
,
l2size
,
l3size
);
return
DeviceInfo
::
Global
().
SetCache
(
l1size
,
l2size
,
l3size
);
}
}
void
SetArch
(
ARMArch
arch
)
{
return
DeviceInfo
::
Global
().
SetArch
(
arch
);
}
void
SetArch
(
ARMArch
arch
)
{
return
DeviceInfo
::
Global
().
SetArch
(
arch
);
}
void
BindDev
()
{
return
DeviceInfo
::
Global
().
BindDev
();
}
PowerMode
mode
()
const
{
return
DeviceInfo
::
Global
().
mode
();
}
PowerMode
mode
()
const
{
return
DeviceInfo
::
Global
().
mode
();
}
int
threads
()
const
{
return
DeviceInfo
::
Global
().
threads
();
}
int
threads
()
const
{
return
DeviceInfo
::
Global
().
threads
();
}
ARMArch
arch
()
const
{
return
DeviceInfo
::
Global
().
arch
();
}
ARMArch
arch
()
const
{
return
DeviceInfo
::
Global
().
arch
();
}
int
l1_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l1_cache_size
();
}
int
l2_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l2_cache_size
();
}
int
l3_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l3_cache_size
();
}
template
<
typename
T
>
template
<
typename
T
>
T
*
workspace_data
()
{
T
*
workspace_data
()
{
return
DeviceInfo
::
Global
().
workspace_data
<
T
>
();
return
DeviceInfo
::
Global
().
workspace_data
<
T
>
();
}
}
int
l1_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l1_cache_size
();
}
int
l2_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l2_cache_size
();
}
int
l3_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l3_cache_size
();
}
bool
ExtendWorkspace
(
DDimLite
dims
)
{
bool
ExtendWorkspace
(
DDimLite
dims
)
{
return
DeviceInfo
::
Global
().
ExtendWorkspace
(
dims
);
return
DeviceInfo
::
Global
().
ExtendWorkspace
(
dims
);
}
}
...
...
paddle/fluid/lite/core/cpu_info.cc
浏览文件 @
ce46ef22
...
@@ -29,7 +29,8 @@
...
@@ -29,7 +29,8 @@
#include <omp.h>
#include <omp.h>
#endif
#endif
#include <cstdarg>
#include <algorithm>
#include <limits>
#include "paddle/fluid/lite/core/cpu_info.h"
#include "paddle/fluid/lite/core/cpu_info.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -37,853 +38,920 @@ namespace lite {
...
@@ -37,853 +38,920 @@ namespace lite {
#ifdef LITE_WITH_ARM
#ifdef LITE_WITH_ARM
void
DeviceInfo
::
InitInternal
(
DeviceInfo
*
dev
)
{
#ifdef TARGET_IOS
set_default_cache
(
dev
);
const
int
DEFAULT_L1_CACHE_SIZE
=
64
*
1024
;
dev
->
compute_core_num_
=
arm_get_cpucount
();
const
int
DEFAULT_L2_CACHE_SIZE
=
2048
*
1024
;
dev
->
max_memory_
=
arm_get_meminfo
();
const
int
DEFAULT_L3_CACHE_SIZE
=
0
;
#else
const
int
DEFAULT_L1_CACHE_SIZE
=
32
*
1024
;
const
int
DEFAULT_L2_CACHE_SIZE
=
512
*
1024
;
const
int
DEFAULT_L3_CACHE_SIZE
=
0
;
#endif
// get max freq
int
get_cpu_num
()
{
#ifdef LITE_WITH_LINUX
#ifdef LITE_WITH_LINUX
std
::
vector
<
int
>
max_freq
(
dev
->
compute_core_num_
);
// get cpu count from /sys/devices/system/cpu/cpunum/uevent
for
(
int
i
=
0
;
i
<
dev
->
compute_core_num_
;
++
i
)
{
int
max_cpu_num
=
20
;
max_freq
[
i
]
=
get_max_freq_khz
(
i
)
/
1000
;
int
cpu_num
=
0
;
}
for
(
int
i
=
0
;
i
<
max_cpu_num
;
++
i
)
{
std
::
string
cpu_name
=
arm_get_cpu_name
();
char
path
[
256
];
if
(
get_cpu_info_from_name
(
dev
,
cpu_name
)
!=
true
)
{
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/uevent"
,
i
);
arm_sort_cpuid_by_max_frequency
(
dev
->
compute_core_num_
,
&
dev
->
core_ids_
,
FILE
*
fp
=
fopen
(
path
,
"rb"
);
max_freq
,
&
dev
->
cluster_ids_
);
if
(
!
fp
)
{
dev
->
big_core_ids_
.
clear
();
break
;
dev
->
little_core_ids_
.
clear
();
}
for
(
int
i
=
0
;
i
<
dev
->
cluster_ids_
.
size
();
++
i
)
{
cpu_num
++
;
if
(
dev
->
cluster_ids_
[
i
]
==
0
)
{
fclose
(
fp
);
dev
->
big_core_ids_
.
push_back
(
dev
->
core_ids_
[
i
]);
}
else
{
dev
->
little_core_ids_
.
push_back
(
dev
->
core_ids_
[
i
]);
}
}
if
(
cpu_num
<
1
)
{
cpu_num
=
1
;
}
}
arm_get_cpu_arch
(
&
dev
->
archs_
);
return
cpu_num
;
#elif defined(TARGET_IOS)
int
cpu_num
=
0
;
size_t
len
=
sizeof
(
cpu_num
);
sysctlbyname
(
"hw.ncpu"
,
&
cpu_num
,
&
len
,
NULL
,
0
);
if
(
cpu_num
<
1
)
{
cpu_num
=
1
;
}
}
return
cpu_num
;
#else
return
1
;
#endif
}
LOG
(
INFO
)
<<
"ARM multiprocessors number: "
<<
dev
->
compute_core_num_
;
size_t
get_mem_size
()
{
for
(
int
i
=
0
;
i
<
dev
->
compute_core_num_
;
++
i
)
{
#ifdef LITE_WITH_LINUX
LOG
(
INFO
)
<<
"ARM multiprocessors ID: "
<<
dev
->
core_ids_
[
i
]
// get cpu count from /proc/cpuinfo
<<
", frequence: "
<<
max_freq
[
i
]
FILE
*
fp
=
fopen
(
"/proc/meminfo"
,
"rb"
);
<<
", cluster ID: "
<<
dev
->
cluster_ids_
[
dev
->
core_ids_
[
i
]]
if
(
!
fp
)
{
<<
", CPU ARCH: A"
<<
dev
->
archs_
[
i
]
;
return
1
;
}
}
VLOG
(
1
)
<<
"L1 DataCache size is: "
;
size_t
memsize
=
0
;
for
(
int
i
=
0
;
i
<
dev
->
compute_core_num_
;
++
i
)
{
char
line
[
1024
];
VLOG
(
1
)
<<
dev
->
L1_cache_
[
i
]
/
1024
<<
" KB"
;
while
(
!
feof
(
fp
))
{
char
*
s
=
fgets
(
line
,
1024
,
fp
);
if
(
!
s
)
{
break
;
}
}
VLOG
(
1
)
<<
"L2 Cache size is: "
;
sscanf
(
s
,
"MemTotal: %d kB"
,
&
memsize
);
for
(
int
i
=
0
;
i
<
dev
->
compute_core_num_
;
++
i
)
{
VLOG
(
1
)
<<
dev
->
L2_cache_
[
i
]
/
1024
<<
" KB"
;
}
}
VLOG
(
1
)
<<
"Total memory: "
<<
dev
->
max_memory_
<<
"KB"
;
fclose
(
fp
);
return
memsize
;
#elif defined(TARGET_IOS)
// to be implemented
printf
(
"not implemented
\n
"
);
#endif
return
0
;
}
dev
->
max_freq_
=
max_freq
[
0
];
void
get_cpu_arch
(
std
::
vector
<
ARMArch
>*
archs
,
const
int
cpu_num
)
{
for
(
int
j
=
1
;
j
<
dev
->
compute_core_num_
;
++
j
)
{
archs
->
clear
();
if
(
dev
->
max_freq_
<
max_freq
[
j
])
{
#ifdef LITE_WITH_LINUX
dev
->
max_freq_
=
max_freq
[
j
];
//! get CPU ARCH
FILE
*
fp
=
fopen
(
"/proc/cpuinfo"
,
"rb"
);
if
(
!
fp
)
{
return
;
}
char
line
[
1024
];
while
(
!
feof
(
fp
))
{
char
*
s
=
fgets
(
line
,
1024
,
fp
);
if
(
!
s
)
{
break
;
}
if
(
strstr
(
line
,
"part"
)
!=
NULL
)
{
int
arch_id
=
0
;
sscanf
(
s
,
"CPU part
\t
: %x"
,
&
arch_id
);
switch
(
arch_id
)
{
case
0xd03
:
archs
->
push_back
(
kA53
);
break
;
case
0xd05
:
archs
->
push_back
(
kA55
);
break
;
case
0xd07
:
archs
->
push_back
(
kA57
);
break
;
case
0xd08
:
archs
->
push_back
(
kA72
);
break
;
case
0xd09
:
archs
->
push_back
(
kA73
);
break
;
case
0xd0a
:
archs
->
push_back
(
kA75
);
break
;
case
0xd40
:
archs
->
push_back
(
kA76
);
break
;
case
0x804
:
// 855
archs
->
push_back
(
kA76
);
break
;
case
0x805
:
// 855
archs
->
push_back
(
kA55
);
break
;
case
0x802
:
// 845
archs
->
push_back
(
kA75
);
break
;
case
0x803
:
// 845
archs
->
push_back
(
kA55
);
break
;
case
0x801
:
// 835
archs
->
push_back
(
kA73
);
break
;
case
0x800
:
// 835
archs
->
push_back
(
kA73
);
break
;
case
0x205
:
// 820
archs
->
push_back
(
kA72
);
break
;
default:
LOG
(
ERROR
)
<<
"Unknow cpu arch: "
<<
arch_id
;
archs
->
push_back
(
kARMArch_UNKOWN
);
}
}
}
fclose
(
fp
);
if
(
archs
->
size
()
<
cpu_num
)
{
for
(
int
i
=
archs
->
size
();
i
<
cpu_num
;
++
i
)
{
archs
->
push_back
(
archs
->
at
(
i
-
1
));
}
}
}
}
#elif defined(TARGET_IOS)
#elif defined(TARGET_IOS)
arm_get_cpu_arch
(
&
dev
->
archs_
);
for
(
int
i
=
0
;
i
<
cpu_num
;
++
i
)
{
#endif
archs
->
push_back
(
APPLE
);
dev
->
active_ids_
=
{
0
};
}
dev
->
mode_
=
LITE_POWER_HIGH
;
dev
->
workspace_
.
Resize
({
static_cast
<
int64_t
>
(
dev
->
L2_cache_
[
dev
->
active_ids_
[
0
]]
/
sizeof
(
float
))});
#ifdef TARGET_IOS
dev
->
arch_
=
APPLE
;
// use 6x8
#else
#else
if
(
dev
->
big_core_ids_
.
size
()
>
0
)
{
for
(
int
i
=
0
;
i
<
cpu_num
;
++
i
)
{
dev
->
arch_
=
dev
->
archs_
[
dev
->
big_core_ids_
[
0
]]
;
archs
->
push_back
(
kARMArch_UNKOWN
)
;
}
}
#endif
#endif
}
}
void
DeviceInfo
::
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
)
{
int
cpu_count
=
arm_get_cpucount
();
L1_cache_
.
resize
(
cpu_count
);
L2_cache_
.
resize
(
cpu_count
);
L3_cache_
.
resize
(
cpu_count
);
for
(
int
i
=
0
;
i
<
cpu_count
;
++
i
)
{
L1_cache_
[
i
]
=
l1size
;
L2_cache_
[
i
]
=
l2size
;
L3_cache_
[
i
]
=
l3size
;
}
workspace_
.
Resize
({
2
*
(
l1size
+
l2size
)});
}
void
DeviceInfo
::
BindDev
()
{
#ifdef ARM_WITH_OMP
int
num_threads
=
active_ids_
.
size
();
omp_set_num_threads
(
num_threads
);
#ifdef LITE_WITH_LINUX
#ifdef LITE_WITH_LINUX
std
::
vector
<
int
>
ssarets
;
for
(
int
j
=
0
;
j
<
num_threads
;
++
j
)
{
std
::
string
get_cpu_name
()
{
ssarets
.
push_back
(
0
);
FILE
*
fp
=
fopen
(
"/proc/cpuinfo"
,
"rb"
);
}
if
(
!
fp
)
{
#pragma omp parallel for
return
""
;
for
(
int
i
=
0
;
i
<
num_threads
;
i
++
)
{
ssarets
[
i
]
=
set_sched_affinity
(
active_ids_
);
}
}
for
(
int
i
=
0
;
i
<
num_threads
;
i
++
)
{
char
line
[
1024
];
if
(
ssarets
[
i
]
!=
0
)
{
while
(
!
feof
(
fp
))
{
LOG
(
ERROR
)
<<
"set cpu affinity failed, cpuID: "
<<
active_ids_
[
i
];
char
*
s
=
fgets
(
line
,
1024
,
fp
);
return
;
if
(
!
s
)
{
break
;
}
}
if
(
strstr
(
line
,
"Hardware"
)
!=
NULL
)
{
fclose
(
fp
);
return
std
::
string
(
line
);
}
}
#endif // LITE_WITH_LINUX
#else // ARM_WITH_OMP
#ifdef LITE_WITH_LINUX
std
::
vector
<
int
>
cpuid1
;
cpuid1
.
push_back
(
active_ids_
[
0
]);
int
ssaret
=
set_sched_affinity
(
cpuid1
);
if
(
ssaret
!=
0
)
{
printf
(
"set cpu affinity failed, cpuID: %d
\n
"
,
active_ids_
[
0
]);
return
;
}
}
#endif // LITE_WITH_LINUX
fclose
(
fp
);
#endif // ARM_WITH_OMP
return
""
;
}
}
void
DeviceInfo
::
SetRunMode
(
PowerMode
mode
,
int
threads
)
{
void
get_cpu_max_min_freq
(
int
cpu_id
,
int
*
max_freq
,
int
*
min_freq
)
{
int
big_core_size
=
big_core_ids_
.
size
();
*
max_freq
=
0
;
int
small_core_size
=
little_core_ids_
.
size
();
*
min_freq
=
0
;
if
(
threads
>
big_core_size
+
small_core_size
)
{
// first try, for all possible cpu
threads
=
big_core_size
+
small_core_size
;
char
path
[
256
];
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpufreq/stats/cpu%d/time_in_state"
,
cpu_id
);
FILE
*
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
// second try, for online cpu
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/cpufreq/stats/time_in_state"
,
cpu_id
);
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
// third try, for online cpu
// get max_freq
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq"
,
cpu_id
);
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
return
;
}
}
#ifdef ARM_WITH_OMP
fscanf
(
fp
,
"%d"
,
max_freq
);
count_
++
;
fclose
(
fp
);
int
shift_num
=
(
count_
/
10
)
%
big_core_size
;
// get min_freq
switch
(
mode
)
{
snprintf
(
path
,
sizeof
(
path
),
case
LITE_POWER_FULL
:
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_min_freq"
,
mode_
=
mode
;
cpu_id
);
active_ids_
.
clear
();
fp
=
fopen
(
path
,
"rb"
);
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
if
(
!
fp
)
{
if
(
i
<
big_core_size
)
{
return
;
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
else
{
active_ids_
.
push_back
(
little_core_ids_
[
i
-
big_core_size
]);
}
}
fscanf
(
fp
,
"%d"
,
min_freq
);
fclose
(
fp
);
return
;
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
}
*
min_freq
=
std
::
numeric_limits
<
int
>::
max
();
while
(
!
feof
(
fp
))
{
int
freq
=
0
;
int
nscan
=
fscanf
(
fp
,
"%d %*d"
,
&
freq
);
if
(
nscan
!=
1
)
{
break
;
break
;
case
LITE_POWER_HIGH
:
active_ids_
.
clear
();
if
(
big_core_size
>
0
)
{
mode_
=
LITE_POWER_HIGH
;
if
(
threads
>
big_core_size
)
{
LOG
(
ERROR
)
<<
"threads: "
<<
threads
<<
", exceed the big cores size: "
<<
big_core_size
;
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
if
(
freq
>
*
max_freq
)
{
*
max_freq
=
freq
;
}
}
}
else
{
if
(
freq
<
*
min_freq
)
{
mode_
=
LITE_POWER_LOW
;
*
min_freq
=
freq
;
LOG
(
ERROR
)
<<
"HIGH POWER MODE is not support, switch to little cores."
;
if
(
threads
>
small_core_size
)
{
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
}
}
fclose
(
fp
);
}
void
sort_cpuid_by_max_freq
(
const
std
::
vector
<
int
>&
max_freqs
,
std
::
vector
<
int
>*
cpu_ids
,
std
::
vector
<
int
>*
cluster_ids
)
{
int
cpu_num
=
max_freqs
.
size
();
if
(
cpu_num
==
0
)
{
return
;
}
}
if
(
active_ids_
.
size
()
==
0
)
{
cpu_ids
->
resize
(
cpu_num
);
active_ids_
.
push_back
(
0
);
cluster_ids
->
resize
(
cpu_num
);
for
(
int
i
=
0
;
i
<
cpu_num
;
i
++
)
{
cpu_ids
->
at
(
i
)
=
i
;
}
}
break
;
// sort cpuid as big core first
case
LITE_POWER_LOW
:
// simple bubble sort
active_ids_
.
clear
();
for
(
int
i
=
0
;
i
<
cpu_num
;
i
++
)
{
if
(
small_core_size
>
0
)
{
for
(
int
j
=
i
+
1
;
j
<
cpu_num
;
j
++
)
{
mode_
=
LITE_POWER_LOW
;
if
(
max_freqs
[
i
]
<
max_freqs
[
j
])
{
if
(
threads
>
small_core_size
)
{
// swap
LOG
(
WARNING
)
<<
"threads: "
<<
threads
int
tmp
=
cpu_ids
->
at
(
i
);
<<
", exceed the little cores size: "
<<
small_core_size
;
cpu_ids
->
at
(
i
)
=
cpu_ids
->
at
(
j
);
active_ids_
=
little_core_ids_
;
cpu_ids
->
at
(
j
)
=
tmp
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
}
}
}
// SMP
int
mid_max_freq
=
(
max_freqs
[
cpu_ids
->
at
(
0
)]
+
max_freqs
[
cpu_ids
->
at
(
cpu_num
-
1
)])
/
2
;
for
(
int
i
=
0
;
i
<
cpu_num
;
i
++
)
{
cpu_ids
->
at
(
i
)
=
i
;
if
(
max_freqs
[
i
]
>=
mid_max_freq
)
{
cluster_ids
->
at
(
i
)
=
0
;
}
else
{
}
else
{
mode_
=
LITE_POWER_HIGH
;
cluster_ids
->
at
(
i
)
=
1
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores"
;
if
(
threads
>
big_core_size
)
{
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
}
}
}
void
get_cpu_cache_size
(
int
cpu_id
,
int
*
l1_cache_size
,
int
*
l2_cache_size
,
int
*
l3_cache_size
)
{
int
max_cache_idx_num
=
10
;
*
l1_cache_size
=
DEFAULT_L1_CACHE_SIZE
;
*
l2_cache_size
=
DEFAULT_L2_CACHE_SIZE
;
*
l3_cache_size
=
DEFAULT_L3_CACHE_SIZE
;
for
(
int
i
=
0
;
i
<
max_cache_idx_num
;
i
++
)
{
char
path
[
256
];
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/cache/index%d/level"
,
cpu_id
,
i
);
FILE
*
fp
=
fopen
(
path
,
"rb"
);
if
(
fp
)
{
int
level
=
-
1
;
fscanf
(
fp
,
"%d"
,
&
level
);
fclose
(
fp
);
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/cache/index%d/size"
,
cpu_id
,
i
);
fp
=
fopen
(
path
,
"rb"
);
if
(
fp
)
{
int
size
=
-
1
;
fscanf
(
fp
,
"%d"
,
&
size
);
fclose
(
fp
);
if
(
size
>=
0
)
{
if
(
level
==
1
)
{
*
l1_cache_size
=
size
*
1024
;
}
else
if
(
level
==
2
)
{
*
l2_cache_size
=
size
*
1024
;
}
else
if
(
level
==
3
)
{
*
l3_cache_size
=
size
*
1024
;
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
}
break
;
case
LITE_POWER_NO_BIND
:
mode_
=
LITE_POWER_NO_BIND
;
active_ids_
.
clear
();
if
(
threads
>
core_ids_
.
size
())
{
active_ids_
.
resize
(
core_ids_
.
size
());
}
else
{
active_ids_
.
resize
(
threads
);
}
}
break
;
case
LITE_POWER_RAND_HIGH
:
active_ids_
.
clear
();
if
(
big_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_HIGH
;
if
(
threads
>
big_core_size
)
{
LOG
(
WARNING
)
<<
"threads: "
<<
threads
<<
", exceed the big cores size: "
<<
big_core_size
;
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[(
i
+
shift_num
)
%
big_core_size
]);
}
}
}
}
}
else
{
}
mode_
=
LITE_POWER_LOW
;
LOG
(
WARNING
)
bool
check_cpu_online
(
const
std
::
vector
<
int
>&
cpu_ids
)
{
<<
"HIGH POWER MODE is not support, switch to little cores."
;
if
(
cpu_ids
.
size
()
==
0
)
{
if
(
threads
>
small_core_size
)
{
return
false
;
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
char
path
[
256
];
bool
all_online
=
true
;
for
(
int
i
=
0
;
i
<
cpu_ids
.
size
();
++
i
)
{
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/online"
,
cpu_ids
[
i
]);
FILE
*
fp
=
fopen
(
path
,
"rb"
);
int
is_online
=
0
;
if
(
fp
)
{
fscanf
(
fp
,
"%d"
,
&
is_online
);
fclose
(
fp
);
}
else
{
LOG
(
ERROR
)
<<
"Failed to query the online statue of CPU id:"
<<
cpu_ids
[
i
];
}
}
if
(
is_online
==
0
)
{
all_online
=
false
;
LOG
(
ERROR
)
<<
"CPU id:"
<<
cpu_ids
[
i
]
<<
" is offine"
;
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_RAND_LOW
:
active_ids_
.
clear
();
if
(
small_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_LOW
;
if
(
threads
>
small_core_size
)
{
LOG
(
WARNING
)
<<
"threads: "
<<
threads
<<
", exceed the little cores size: "
<<
small_core_size
;
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[(
i
+
shift_num
)
%
small_core_size
]);
}
}
}
else
{
mode_
=
LITE_POWER_HIGH
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores."
;
if
(
threads
>
big_core_size
)
{
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
return
all_online
;
}
int
set_sched_affinity
(
const
std
::
vector
<
int
>&
cpu_ids
)
{
// #define CPU_SETSIZE 1024
// #define __NCPUBITS (8 * sizeof (unsigned long))
// typedef struct
// {
// unsigned long __bits[CPU_SETSIZE / __NCPUBITS];
// } cpu_set_t;
// set affinity for thread
#ifdef __GLIBC__
pid_t
pid
=
syscall
(
SYS_gettid
);
#else
pid_t
pid
=
gettid
();
#endif
cpu_set_t
mask
;
CPU_ZERO
(
&
mask
);
for
(
int
i
=
0
;
i
<
cpu_ids
.
size
();
++
i
)
{
CPU_SET
(
cpu_ids
[
i
],
&
mask
);
}
}
int
syscallret
=
syscall
(
__NR_sched_setaffinity
,
pid
,
sizeof
(
mask
),
&
mask
);
if
(
syscallret
)
{
return
-
1
;
}
}
if
(
active_ids_
.
size
()
==
0
)
{
return
0
;
active_ids_
.
push_back
(
0
);
}
bool
bind_threads
(
const
std
::
vector
<
int
>
cpu_ids
)
{
#ifdef ARM_WITH_OMP
int
thread_num
=
cpu_ids
.
size
();
omp_set_num_threads
(
thread_num
);
std
::
vector
<
int
>
ssarets
;
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
ssarets
.
push_back
(
0
);
}
}
break
;
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
thread_num
;
i
++
)
{
ssarets
[
i
]
=
set_sched_affinity
(
cpu_ids
);
}
}
//! fix multi-threads LITE_POWER_HIGH mode
for
(
int
i
=
0
;
i
<
thread_num
;
i
++
)
{
if
(
mode_
==
LITE_POWER_NO_BIND
||
threads
>
1
)
{
if
(
ssarets
[
i
]
!=
0
)
{
int
threads
=
active_ids_
.
size
();
LOG
(
ERROR
)
<<
"Set cpu affinity failed, core id: "
<<
cpu_ids
[
i
];
omp_set_num_threads
(
threads
);
return
false
;
}
else
{
if
(
check_online
(
active_ids_
))
{
BindDev
();
}
else
{
LOG
(
WARNING
)
<<
"core id "
<<
active_ids_
[
0
]
<<
" is offline, switch to NO BIND MODE"
;
int
threads
=
active_ids_
.
size
();
omp_set_num_threads
(
threads
);
}
}
}
}
#else
#else // ARM_WITH_OMP
if
(
big_core_size
>
0
)
{
std
::
vector
<
int
>
first_cpu_id
;
active_ids_
=
{
big_core_ids_
[
0
]};
first_cpu_id
.
push_back
(
cpu_ids
[
0
]);
}
else
{
int
ssaret
=
set_sched_affinity
(
first_cpu_id
);
active_ids_
=
{
0
};
if
(
ssaret
!=
0
)
{
}
LOG
(
ERROR
)
<<
"Set cpu affinity failed, core id: "
<<
cpu_ids
[
0
];
#endif
//! alloc memory for sgemm in this context
int
temp_mem_size
=
L2_cache_
[
active_ids_
[
0
]]
/
sizeof
(
float
);
workspace_
.
Resize
({
temp_mem_size
});
arch_
=
archs_
[
active_ids_
[
0
]];
}
bool
DeviceInfo
::
ExtendWorkspace
(
DDimLite
dims
)
{
auto
count
=
dims
.
product
();
auto
old
=
workspace_
.
dims
();
if
(
count
==
old
.
product
())
{
return
false
;
return
false
;
}
}
#endif // ARM_WITH_OMP
workspace_
.
Resize
({
static_cast
<
int64_t
>
(
count
+
L2_cache_
[
active_ids_
[
0
]]
/
sizeof
(
float
))});
return
true
;
}
}
#endif // LITE_WITH_LINUX
// cache_id : 0 -> L1, 1 -> L2, 2 -> L3
// cache_id : 0 -> L1, 1 -> L2, 2 -> L3
void
set_cache_info
(
DeviceInfo
*
cpu_info
,
int
cache_id
,
int
argc
,
...)
{
void
DeviceInfo
::
SetCacheInfo
(
int
cache_id
,
int
argc
,
...)
{
va_list
arg_ptr
;
va_list
arg_ptr
;
va_start
(
arg_ptr
,
argc
);
va_start
(
arg_ptr
,
argc
);
std
::
vector
<
int
>*
cache
;
std
::
vector
<
int
>*
cache
;
switch
(
cache_id
)
{
switch
(
cache_id
)
{
case
0
:
case
0
:
cache
=
&
cpu_info
->
L1_cache_
;
cache
=
&
L1_cache_
;
break
;
break
;
case
1
:
case
1
:
cache
=
&
cpu_info
->
L2_cache_
;
cache
=
&
L2_cache_
;
break
;
break
;
case
2
:
case
2
:
cache
=
&
cpu_info
->
L3_cache_
;
cache
=
&
L3_cache_
;
break
;
break
;
default:
default:
break
;
break
;
}
}
int
core_num
=
cpu_info
->
compute_core_num_
;
cache
->
resize
(
core_num_
);
cache
->
resize
(
core_num
);
if
(
argc
==
1
)
{
if
(
argc
==
1
)
{
int
cache_size
=
va_arg
(
arg_ptr
,
int
);
int
cache_size
=
va_arg
(
arg_ptr
,
int
);
for
(
int
i
=
0
;
i
<
core_num
;
++
i
)
{
for
(
int
i
=
0
;
i
<
core_num
_
;
++
i
)
{
(
*
cache
)[
i
]
=
cache_size
;
(
*
cache
)[
i
]
=
cache_size
;
}
}
}
else
{
}
else
{
int
big_core_num
=
cpu_info
->
big_core_ids_
.
size
();
int
big_core_num
=
big_core_ids_
.
size
();
int
little_core_num
=
cpu_info
->
little_core_ids_
.
size
();
int
little_core_num
=
little_core_ids_
.
size
();
int
big_core_cache_size
=
va_arg
(
arg_ptr
,
int
);
int
big_core_cache_size
=
va_arg
(
arg_ptr
,
int
);
int
little_core_cache_size
=
va_arg
(
arg_ptr
,
int
);
int
little_core_cache_size
=
va_arg
(
arg_ptr
,
int
);
for
(
int
i
=
0
;
i
<
big_core_num
;
++
i
)
{
for
(
int
i
=
0
;
i
<
big_core_num
;
++
i
)
{
(
*
cache
)[
cpu_info
->
big_core_ids_
[
i
]]
=
big_core_cache_size
;
(
*
cache
)[
big_core_ids_
[
i
]]
=
big_core_cache_size
;
}
}
for
(
int
i
=
0
;
i
<
little_core_num
;
++
i
)
{
for
(
int
i
=
0
;
i
<
little_core_num
;
++
i
)
{
(
*
cache
)[
cpu_info
->
little_core_ids_
[
i
]]
=
little_core_cache_size
;
(
*
cache
)[
little_core_ids_
[
i
]]
=
little_core_cache_size
;
}
}
}
}
va_end
(
arg_ptr
);
va_end
(
arg_ptr
);
}
}
void
set_arch_info
(
DeviceInfo
*
cpu_info
,
int
argc
,
...)
{
void
DeviceInfo
::
SetArchInfo
(
int
argc
,
...)
{
va_list
arg_ptr
;
va_list
arg_ptr
;
va_start
(
arg_ptr
,
argc
);
va_start
(
arg_ptr
,
argc
);
int
core_num
=
cpu_info
->
compute_core_num_
;
archs_
.
resize
(
core_num_
);
cpu_info
->
archs_
.
resize
(
core_num
);
if
(
argc
==
1
)
{
if
(
argc
==
1
)
{
ARMArch
arch
=
(
ARMArch
)
va_arg
(
arg_ptr
,
int
);
ARMArch
arch
=
(
ARMArch
)
va_arg
(
arg_ptr
,
int
);
for
(
int
i
=
0
;
i
<
core_num
;
++
i
)
{
for
(
int
i
=
0
;
i
<
core_num
_
;
++
i
)
{
cpu_info
->
archs_
[
i
]
=
arch
;
archs_
[
i
]
=
arch
;
}
}
}
else
{
}
else
{
ARMArch
big_core_arch
=
(
ARMArch
)
va_arg
(
arg_ptr
,
int
);
ARMArch
big_core_arch
=
(
ARMArch
)
va_arg
(
arg_ptr
,
int
);
ARMArch
little_core_arch
=
(
ARMArch
)
va_arg
(
arg_ptr
,
int
);
ARMArch
little_core_arch
=
(
ARMArch
)
va_arg
(
arg_ptr
,
int
);
int
big_core_num
=
cpu_info
->
big_core_ids_
.
size
();
int
big_core_num
=
big_core_ids_
.
size
();
int
little_core_num
=
cpu_info
->
little_core_ids_
.
size
();
int
little_core_num
=
little_core_ids_
.
size
();
for
(
int
i
=
0
;
i
<
big_core_num
;
++
i
)
{
for
(
int
i
=
0
;
i
<
big_core_num
;
++
i
)
{
cpu_info
->
archs_
[
cpu_info
->
big_core_ids_
[
i
]]
=
big_core_arch
;
archs_
[
big_core_ids_
[
i
]]
=
big_core_arch
;
}
}
for
(
int
i
=
0
;
i
<
little_core_num
;
++
i
)
{
for
(
int
i
=
0
;
i
<
little_core_num
;
++
i
)
{
cpu_info
->
archs_
[
cpu_info
->
little_core_ids_
[
i
]]
=
little_core_arch
;
archs_
[
little_core_ids_
[
i
]]
=
little_core_arch
;
}
}
}
}
va_end
(
arg_ptr
);
va_end
(
arg_ptr
);
}
}
bool
get_cpu_info_from_name
(
DeviceInfo
*
cpu_info
,
std
::
string
hardware_name
)
{
bool
DeviceInfo
::
SetCPUInfoByName
(
)
{
/* Snapdragon */
/* Snapdragon */
if
(
hardware_name
.
find
(
"SDM845"
)
!=
std
::
string
::
npos
)
{
// 84
5
if
(
dev_name_
.
find
(
"SM8150"
)
!=
std
::
string
::
npos
)
{
// 85
5
c
pu_info
->
compute_c
ore_num_
=
8
;
core_num_
=
8
;
c
pu_info
->
c
ore_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
c
pu_info
->
c
luster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
2
,
kA75
,
kA55
);
SetArchInfo
(
2
,
kA76
,
kA55
);
set_cache_info
(
cpu_info
,
0
,
1
,
32
*
1024
);
SetCacheInfo
(
0
,
2
,
64
*
1024
,
32
*
1024
);
set_cache_info
(
cpu_info
,
1
,
2
,
256
*
1024
,
128
*
1024
);
SetCacheInfo
(
1
,
2
,
256
*
1024
,
128
*
1024
);
set_cache_info
(
cpu_info
,
2
,
1
,
2048
*
1024
);
SetCacheInfo
(
2
,
1
,
2048
*
1024
);
return
true
;
return
true
;
}
else
if
(
dev_name_
.
find
(
"SDM845"
)
!=
std
::
string
::
npos
)
{
// 845
}
else
if
(
hardware_name
.
find
(
"SDM710"
)
!=
std
::
string
::
npos
)
{
// 710
core_num_
=
8
;
cpu_info
->
compute_core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
};
SetArchInfo
(
2
,
kA75
,
kA55
);
set_arch_info
(
cpu_info
,
2
,
kA75
,
kA55
);
SetCacheInfo
(
0
,
2
,
64
*
1024
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
256
*
1024
,
128
*
1024
);
SetCacheInfo
(
2
,
1
,
2048
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"SDM710"
)
!=
std
::
string
::
npos
)
{
// 710
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
};
SetArchInfo
(
2
,
kA75
,
kA55
);
SetCacheInfo
(
0
,
2
,
64
*
1024
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
256
*
1024
,
128
*
1024
);
SetCacheInfo
(
2
,
1
,
1024
*
1024
);
return
true
;
return
true
;
}
else
if
(
hardware_name
.
find
(
"MSM8998"
)
!=
std
::
string
::
npos
)
{
// 835
}
else
if
(
dev_name_
.
find
(
"MSM8998"
)
!=
std
::
string
::
npos
)
{
// 835
c
pu_info
->
compute_c
ore_num_
=
8
;
core_num_
=
8
;
c
pu_info
->
c
ore_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
c
pu_info
->
c
luster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
2
,
kA73
,
kA53
);
SetArchInfo
(
2
,
kA73
,
kA53
);
set_cache_info
(
cpu_info
,
0
,
2
,
64
*
1024
);
SetCacheInfo
(
0
,
2
,
64
*
1024
,
32
*
1024
);
set_cache_info
(
cpu_info
,
1
,
2
,
1024
*
1024
,
SetCacheInfo
(
1
,
2
,
1024
*
1024
,
/*real cache size is 2M, while that will get bad performace
/*real cache size is 2M, while that will get bad performace
on conv3x3s1 or gemm, set to 1M or 512K*/
on conv3x3s1 or gemm, set to 1M or 512K*/
1024
*
1024
);
1024
*
1024
);
return
true
;
return
true
;
}
else
if
(
dev_name_
.
find
(
"MSM8996"
)
!=
std
::
string
::
npos
)
{
// 820
}
else
if
(
hardware_name
.
find
(
"MSM8996"
)
!=
std
::
string
::
npos
)
{
// 820
core_num_
=
4
;
cpu_info
->
compute_core_num_
=
4
;
core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
};
big_core_ids_
=
{
2
,
3
};
cpu_info
->
big_core_ids_
=
{
2
,
3
};
little_core_ids_
=
{
0
,
1
};
cpu_info
->
little_core_ids_
=
{
0
,
1
};
cluster_ids_
=
{
1
,
1
,
0
,
0
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
0
,
0
};
SetArchInfo
(
1
,
kA72
);
set_arch_info
(
cpu_info
,
1
,
kA72
);
SetCacheInfo
(
0
,
1
,
24
*
1024
);
set_cache_info
(
cpu_info
,
0
,
1
,
24
*
1024
);
SetCacheInfo
(
1
,
2
,
1024
*
1024
,
512
*
1024
);
set_cache_info
(
cpu_info
,
1
,
2
,
1024
*
1024
,
512
*
1024
);
return
true
;
return
true
;
}
else
if
(
dev_name_
.
find
(
"SDM660"
)
!=
std
::
string
::
npos
||
}
else
if
(
hardware_name
.
find
(
"SDM660"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"SDM636"
)
!=
std
::
string
::
npos
)
{
// 660, 636
hardware_name
.
find
(
"SDM636"
)
!=
std
::
string
::
npos
)
{
// 660, 636
core_num_
=
8
;
cpu_info
->
compute_core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
SetArchInfo
(
1
,
kA73
);
set_arch_info
(
cpu_info
,
1
,
kA73
);
SetCacheInfo
(
0
,
2
,
64
*
1024
,
32
*
1024
);
set_cache_info
(
cpu_info
,
0
,
2
,
64
*
1024
,
32
*
1024
);
SetCacheInfo
(
1
,
1
,
1024
*
1024
);
set_cache_info
(
cpu_info
,
1
,
1
,
1024
*
1024
);
return
true
;
return
true
;
}
else
if
(
dev_name_
.
find
(
"MSM8976"
)
!=
std
::
string
::
npos
)
{
// 652,653
}
else
if
(
hardware_name
.
find
(
"MSM8976"
)
!=
std
::
string
::
npos
)
{
// 652,653
core_num_
=
8
;
cpu_info
->
compute_core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
SetArchInfo
(
2
,
kA72
,
kA53
);
set_arch_info
(
cpu_info
,
2
,
kA72
,
kA53
);
SetCacheInfo
(
0
,
1
,
32
*
1024
);
set_cache_info
(
cpu_info
,
0
,
1
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
1024
*
1024
,
512
*
1024
);
set_cache_info
(
cpu_info
,
1
,
2
,
1024
*
1024
,
512
*
1024
);
return
true
;
return
true
;
}
else
if
(
dev_name_
.
find
(
"MSM8953"
)
!=
std
::
string
::
npos
)
{
// 625
}
else
if
(
hardware_name
.
find
(
"MSM8953"
)
!=
std
::
string
::
npos
)
{
// 625
core_num_
=
8
;
cpu_info
->
compute_core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
little_core_ids_
=
{};
cpu_info
->
little_core_ids_
=
{};
cluster_ids_
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
cpu_info
->
cluster_ids_
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
SetArchInfo
(
1
,
kA53
);
set_arch_info
(
cpu_info
,
1
,
kA53
);
SetCacheInfo
(
0
,
1
,
32
*
1024
);
set_cache_info
(
cpu_info
,
0
,
1
,
32
*
1024
);
SetCacheInfo
(
1
,
1
,
1024
*
1024
);
set_cache_info
(
cpu_info
,
1
,
1
,
1024
*
1024
);
return
true
;
return
true
;
}
else
if
(
dev_name_
.
find
(
"MSM8939"
)
!=
std
::
string
::
npos
)
{
// 615
}
else
if
(
hardware_name
.
find
(
"MSM8939"
)
!=
std
::
string
::
npos
)
{
// 615
core_num_
=
8
;
cpu_info
->
compute_core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
big_core_ids_
=
{
0
,
1
,
2
,
3
};
little_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{
4
,
5
,
6
,
7
};
cluster_ids_
=
{
0
,
0
,
0
,
0
,
1
,
1
,
1
,
1
};
cpu_info
->
cluster_ids_
=
{
0
,
0
,
0
,
0
,
1
,
1
,
1
,
1
};
SetArchInfo
(
1
,
kA53
);
set_arch_info
(
cpu_info
,
1
,
kA53
);
SetCacheInfo
(
0
,
1
,
32
*
1024
);
set_cache_info
(
cpu_info
,
0
,
1
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
512
*
1024
,
256
*
1024
);
set_cache_info
(
cpu_info
,
1
,
2
,
512
*
1024
,
256
*
1024
);
return
true
;
return
true
;
/* MediaTek */
/* MediaTek */
}
else
if
(
dev_name_
.
find
(
"MT6797"
)
!=
}
else
if
(
hardware_name
.
find
(
"MT6797"
)
!=
std
::
string
::
npos
)
{
// X20/X23/X25/X27
std
::
string
::
npos
)
{
// X20/X23/X25/X27
c
pu_info
->
compute_c
ore_num_
=
10
;
core_num_
=
10
;
c
pu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
};
c
ore_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
};
cpu_info
->
big_core_ids_
=
{
8
,
9
};
big_core_ids_
=
{
8
,
9
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
c
pu_info
->
c
luster_ids_
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
};
set_arch_info
(
cpu_info
,
2
,
kA72
,
kA53
);
SetArchInfo
(
2
,
kA72
,
kA53
);
set_cache_info
(
cpu_info
,
0
,
1
,
32
*
1024
);
SetCacheInfo
(
0
,
1
,
32
*
1024
);
set_cache_info
(
cpu_info
,
1
,
2
,
1024
*
1024
,
512
*
1024
);
SetCacheInfo
(
1
,
2
,
1024
*
1024
,
512
*
1024
);
return
true
;
return
true
;
}
else
if
(
dev_name_
.
find
(
"MT6799"
)
!=
std
::
string
::
npos
)
{
// X30
}
else
if
(
hardware_name
.
find
(
"MT6799"
)
!=
std
::
string
::
npos
)
{
// X30
core_num_
=
10
;
cpu_info
->
compute_core_num_
=
10
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
};
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
};
big_core_ids_
=
{
8
,
9
};
cpu_info
->
big_core_ids_
=
{
8
,
9
};
little_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
};
SetArchInfo
(
2
,
kA73
,
kA53
);
set_arch_info
(
cpu_info
,
2
,
kA73
,
kA53
);
return
true
;
return
true
;
}
else
if
(
dev_name_
.
find
(
"MT6795"
)
!=
std
::
string
::
npos
||
}
else
if
(
hardware_name
.
find
(
"MT6795"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6762"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6762"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6755T"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6755T"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6755S"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6755S"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6753"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6753"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6752"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6752"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6750"
)
!=
std
::
string
::
npos
)
{
hardware_name
.
find
(
"MT6750"
)
!=
std
::
string
::
npos
)
{
// X10, P22, P15/P18, MT6753, MT6752/MT6752M, MT6750
// X10, P22, P15/P18, MT6753, MT6752/MT6752M, MT6750
c
pu_info
->
compute_c
ore_num_
=
8
;
core_num_
=
8
;
c
pu_info
->
c
ore_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{};
little_core_ids_
=
{};
c
pu_info
->
c
luster_ids_
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
cluster_ids_
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
1
,
kA53
);
SetArchInfo
(
1
,
kA53
);
return
true
;
return
true
;
}
else
if
(
dev_name_
.
find
(
"MT6758"
)
!=
std
::
string
::
npos
||
}
else
if
(
hardware_name
.
find
(
"MT6758"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6757"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6757"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6763"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6763"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6755M"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6755M"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6755"
)
!=
hardware_name
.
find
(
"MT6755"
)
!=
std
::
string
::
npos
)
{
// P30, P20/P25, P23, P10
std
::
string
::
npos
)
{
// P30, P20/P25, P23, P10
c
pu_info
->
compute_c
ore_num_
=
8
;
core_num_
=
8
;
c
pu_info
->
c
ore_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
c
pu_info
->
c
luster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
1
,
kA53
);
SetArchInfo
(
1
,
kA53
);
return
true
;
return
true
;
}
else
if
(
dev_name_
.
find
(
"MT6771"
)
!=
std
::
string
::
npos
)
{
// P60
}
else
if
(
hardware_name
.
find
(
"MT6771"
)
!=
std
::
string
::
npos
)
{
// P60
core_num_
=
8
;
cpu_info
->
compute_core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
SetArchInfo
(
2
,
kA73
,
kA53
);
set_arch_info
(
cpu_info
,
2
,
kA73
,
kA53
);
return
true
;
return
true
;
}
else
if
(
dev_name_
.
find
(
"MT6765"
)
!=
std
::
string
::
npos
||
}
else
if
(
hardware_name
.
find
(
"MT6765"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6739"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6739"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6738"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6738"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6737"
)
!=
hardware_name
.
find
(
"MT6737"
)
!=
std
::
string
::
npos
)
{
// A22, MT6739, MT6738, MT6767
std
::
string
::
npos
)
{
// A22, MT6739, MT6738, MT6767
c
pu_info
->
compute_c
ore_num_
=
4
;
core_num_
=
4
;
c
pu_info
->
c
ore_ids_
=
{
0
,
1
,
2
,
3
};
core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
big_core_ids_
=
{
0
,
0
,
0
,
0
};
big_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
little_core_ids_
=
{};
little_core_ids_
=
{};
c
pu_info
->
c
luster_ids_
=
{
0
,
0
,
0
,
0
};
cluster_ids_
=
{
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
1
,
kA53
);
SetArchInfo
(
1
,
kA53
);
return
true
;
return
true
;
}
}
return
false
;
return
false
;
}
}
size_t
arm_get_meminfo
()
{
void
DeviceInfo
::
SetCPUInfoByProb
()
{
#ifdef LITE_WITH_LINUX
#ifdef LITE_WITH_LINUX
// get cpu count from /proc/cpuinfo
// get big.LITTLE cores by sorting CPU frequency
FILE
*
fp
=
fopen
(
"/proc/meminfo"
,
"rb"
);
sort_cpuid_by_max_freq
(
max_freqs_
,
&
core_ids_
,
&
cluster_ids_
);
if
(
!
fp
)
{
big_core_ids_
.
clear
();
return
1
;
little_core_ids_
.
clear
();
for
(
int
i
=
0
;
i
<
cluster_ids_
.
size
();
++
i
)
{
if
(
cluster_ids_
[
i
]
==
0
)
{
big_core_ids_
.
push_back
(
core_ids_
[
i
]);
}
else
{
little_core_ids_
.
push_back
(
core_ids_
[
i
]);
}
}
size_t
memsize
=
0
;
char
line
[
1024
];
while
(
!
feof
(
fp
))
{
char
*
s
=
fgets
(
line
,
1024
,
fp
);
if
(
!
s
)
{
break
;
}
}
sscanf
(
s
,
"MemTotal: %d kB"
,
&
memsize
);
// get l1, l2, l3 cache size for each core
for
(
int
i
=
0
;
i
<
core_num_
;
i
++
)
{
get_cpu_cache_size
(
i
,
&
(
L1_cache_
[
i
]),
&
(
L2_cache_
[
i
]),
&
(
L3_cache_
[
i
]));
}
}
#endif // LITE_WITH_LINUX
fclose
(
fp
);
return
memsize
;
#elif defined(TARGET_IOS)
// to be implemented
printf
(
"not implemented
\n
"
);
return
0
;
#endif
}
}
int
arm_get_cpucount
()
{
void
DeviceInfo
::
RequestPowerFullMode
(
const
int
thread_num
)
{
#ifdef LITE_WITH_LINUX
int
big_core_size
=
big_core_ids_
.
size
();
// get cpu count from /sys/devices/system/cpu/cpunum/uevent
int
little_core_size
=
little_core_ids_
.
size
();
int
max_cpu_count
=
20
;
active_ids_
.
clear
();
int
count
=
0
;
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
for
(
int
i
=
0
;
i
<
max_cpu_count
;
++
i
)
{
if
(
i
<
big_core_size
)
{
char
path
[
256
];
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/uevent"
,
i
);
}
else
if
(
i
<
big_core_size
+
little_core_size
)
{
FILE
*
fp
=
fopen
(
path
,
"rb"
);
active_ids_
.
push_back
(
little_core_ids_
[
i
-
big_core_size
]);
if
(
!
fp
)
{
break
;
}
count
++
;
fclose
(
fp
);
}
if
(
count
<
1
)
{
count
=
1
;
}
}
return
count
;
#elif defined(TARGET_IOS)
int
count
=
0
;
size_t
len
=
sizeof
(
count
);
sysctlbyname
(
"hw.ncpu"
,
&
count
,
&
len
,
NULL
,
0
);
if
(
count
<
1
)
{
count
=
1
;
}
}
return
count
;
mode_
=
LITE_POWER_FULL
;
#else
return
1
;
#endif
}
}
void
arm_get_cpu_arch
(
std
::
vector
<
ARMArch
>*
archs
)
{
void
DeviceInfo
::
RequestPowerHighMode
(
const
int
thread_num
)
{
#ifdef LITE_WITH_LINUX
int
big_core_size
=
big_core_ids_
.
size
();
archs
->
clear
();
int
little_core_size
=
little_core_ids_
.
size
();
//! get CPU ARCH
active_ids_
.
clear
();
FILE
*
fp
=
fopen
(
"/proc/cpuinfo"
,
"rb"
);
if
(
big_core_size
>
0
)
{
if
(
!
fp
)
{
mode_
=
LITE_POWER_HIGH
;
return
;
if
(
thread_num
>
big_core_size
)
{
}
LOG
(
ERROR
)
<<
"Request thread num: "
<<
thread_num
char
line
[
1024
];
<<
", exceed the big cores size: "
<<
big_core_size
while
(
!
feof
(
fp
))
{
<<
", truncate thread num to "
<<
big_core_size
;
char
*
s
=
fgets
(
line
,
1024
,
fp
);
active_ids_
=
big_core_ids_
;
if
(
!
s
)
{
}
else
{
break
;
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
}
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
if
(
strstr
(
line
,
"part"
)
!=
NULL
)
{
int
arch_id
=
0
;
sscanf
(
s
,
"CPU part
\t
: %x"
,
&
arch_id
);
switch
(
arch_id
)
{
case
0xd03
:
archs
->
push_back
(
kA53
);
break
;
case
0xd05
:
archs
->
push_back
(
kA55
);
break
;
case
0xd07
:
archs
->
push_back
(
kA57
);
break
;
case
0xd08
:
archs
->
push_back
(
kA72
);
break
;
case
0xd09
:
archs
->
push_back
(
kA73
);
break
;
case
0xd0a
:
archs
->
push_back
(
kA75
);
break
;
case
0x800
:
// 835
archs
->
push_back
(
kA73
);
break
;
case
0x205
:
// 820
archs
->
push_back
(
kA72
);
break
;
default:
LOG
(
ERROR
)
<<
"unknow type"
;
archs
->
push_back
(
kARMArch_UNKOWN
);
}
}
}
}
}
fclose
(
fp
);
}
else
{
int
cpu_count
=
arm_get_cpucount
();
mode_
=
LITE_POWER_LOW
;
if
(
archs
->
size
()
<
cpu_count
)
{
LOG
(
ERROR
)
<<
"HIGH POWER MODE is not support, switch to little cores."
;
for
(
int
i
=
archs
->
size
();
i
<
cpu_count
;
++
i
)
{
if
(
thread_num
>
little_core_size
)
{
archs
->
push_back
(
archs
->
at
(
i
-
1
));
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
}
}
#endif
#ifdef TARGET_IOS
int
cpu_count
=
arm_get_cpucount
();
for
(
int
i
=
0
;
i
<
cpu_count
;
++
i
)
{
archs
->
push_back
(
APPLE
);
}
}
#endif
}
}
#ifdef LITE_WITH_LINUX
void
DeviceInfo
::
RequestPowerLowMode
(
const
int
thread_num
)
{
int
big_core_size
=
big_core_ids_
.
size
();
void
set_default_cache
(
DeviceInfo
*
dev
)
{
int
little_core_size
=
little_core_ids_
.
size
();
int
cpu_count
=
arm_get_cpucount
();
active_ids_
.
clear
();
dev
->
L1_cache_
.
resize
(
cpu_count
);
if
(
little_core_size
>
0
)
{
dev
->
L2_cache_
.
resize
(
cpu_count
);
mode_
=
LITE_POWER_LOW
;
dev
->
L3_cache_
.
resize
(
cpu_count
);
if
(
thread_num
>
little_core_size
)
{
#ifdef TARGET_IOS
LOG
(
WARNING
)
<<
"Request thread num: "
<<
thread_num
for
(
int
i
=
0
;
i
<
cpu_count
;
++
i
)
{
<<
", exceed the little cores size: "
<<
little_core_size
dev
->
L1_cache_
[
i
]
=
64
*
1024
;
<<
", truncate thread num to "
<<
little_core_size
;
dev
->
L2_cache_
[
i
]
=
2048
*
1024
;
active_ids_
=
little_core_ids_
;
dev
->
L3_cache_
[
i
]
=
0
;
}
else
{
}
for
(
int
i
=
0
;
i
<
thread_num
;
i
++
)
{
#else
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
for
(
int
i
=
0
;
i
<
cpu_count
;
++
i
)
{
dev
->
L1_cache_
[
i
]
=
32
*
1024
;
dev
->
L2_cache_
[
i
]
=
512
*
1024
;
dev
->
L3_cache_
[
i
]
=
0
;
}
}
#endif
}
std
::
string
arm_get_cpu_name
()
{
FILE
*
fp
=
fopen
(
"/proc/cpuinfo"
,
"rb"
);
if
(
!
fp
)
{
return
""
;
}
}
char
line
[
1024
];
}
else
{
while
(
!
feof
(
fp
))
{
mode_
=
LITE_POWER_HIGH
;
char
*
s
=
fgets
(
line
,
1024
,
fp
);
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores"
;
if
(
!
s
)
{
if
(
thread_num
>
big_core_size
)
{
break
;
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
i
++
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
if
(
strstr
(
line
,
"Hardware"
)
!=
NULL
)
{
fclose
(
fp
);
return
std
::
string
(
line
);
}
}
}
}
fclose
(
fp
);
return
""
;
}
}
int
get_max_freq_khz
(
int
cpuid
)
{
void
DeviceInfo
::
RequestPowerNoBindMode
(
const
int
thread_num
)
{
// first try, for all possible cpu
active_ids_
.
clear
();
char
path
[
256
];
for
(
int
i
=
0
;
i
<
thread_num
;
i
++
)
{
snprintf
(
path
,
sizeof
(
path
),
active_ids_
.
push_back
(
0
);
"/sys/devices/system/cpu/cpufreq/stats/cpu%d/time_in_state"
,
cpuid
);
FILE
*
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
// second try, for online cpu
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/cpufreq/stats/time_in_state"
,
cpuid
);
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
// third try, for online cpu
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq"
,
cpuid
);
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
return
-
1
;
}
}
mode_
=
LITE_POWER_NO_BIND
;
}
int
max_freq_khz
=
-
1
;
void
DeviceInfo
::
RequestPowerRandHighMode
(
const
int
shift_num
,
fscanf
(
fp
,
"%d"
,
&
max_freq_khz
);
const
int
thread_num
)
{
int
big_core_size
=
big_core_ids_
.
size
();
fclose
(
fp
);
int
little_core_size
=
little_core_ids_
.
size
();
if
(
big_core_size
>
0
)
{
return
max_freq_khz
;
mode_
=
LITE_POWER_RAND_HIGH
;
if
(
thread_num
>
big_core_size
)
{
LOG
(
WARNING
)
<<
"Request thread num: "
<<
thread_num
<<
", exceed the big cores size: "
<<
big_core_size
<<
", truncate thread num to "
<<
big_core_size
;
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[(
i
+
shift_num
)
%
big_core_size
]);
}
}
}
}
}
else
{
int
max_freq_khz
=
0
;
mode_
=
LITE_POWER_LOW
;
while
(
!
feof
(
fp
))
{
LOG
(
WARNING
)
<<
"HIGH POWER MODE is not support, switch to little cores."
;
int
freq_khz
=
0
;
if
(
thread_num
>
little_core_size
)
{
int
nscan
=
fscanf
(
fp
,
"%d %*d"
,
&
freq_khz
);
active_ids_
=
little_core_ids_
;
if
(
nscan
!=
1
)
{
}
else
{
break
;
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
if
(
freq_khz
>
max_freq_khz
)
{
max_freq_khz
=
freq_khz
;
}
}
}
}
fclose
(
fp
);
return
max_freq_khz
;
}
}
int
arm_sort_cpuid_by_max_frequency
(
int
cpu_count
,
std
::
vector
<
int
>*
cpuids
,
void
DeviceInfo
::
RequestPowerRandLowMode
(
const
int
shift_num
,
const
std
::
vector
<
int
>&
cpu_freq
,
const
int
thread_num
)
{
std
::
vector
<
int
>*
cluster_ids
)
{
int
big_core_size
=
big_core_ids_
.
size
();
if
(
cpu_count
==
0
)
{
int
little_core_size
=
little_core_ids_
.
size
();
return
0
;
active_ids_
.
clear
();
}
if
(
little_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_LOW
;
cpuids
->
resize
(
cpu_count
);
if
(
thread_num
>
little_core_size
)
{
cluster_ids
->
resize
(
cpu_count
);
LOG
(
WARNING
)
<<
"Request thread num: "
<<
thread_num
<<
", exceed the little cores size: "
<<
little_core_size
for
(
int
i
=
0
;
i
<
cpu_count
;
i
++
)
{
<<
", truncate thread num to "
<<
little_core_size
;
cpuids
->
at
(
i
)
=
i
;
active_ids_
=
little_core_ids_
;
}
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
// sort cpuid as big core first
active_ids_
.
push_back
(
// simple bubble sort
little_core_ids_
[(
i
+
shift_num
)
%
little_core_size
]);
for
(
int
i
=
0
;
i
<
cpu_count
;
i
++
)
{
for
(
int
j
=
i
+
1
;
j
<
cpu_count
;
j
++
)
{
if
(
cpu_freq
[
i
]
<
cpu_freq
[
j
])
{
// swap
int
tmp
=
cpuids
->
at
(
i
);
cpuids
->
at
(
i
)
=
cpuids
->
at
(
j
);
cpuids
->
at
(
j
)
=
tmp
;
}
}
}
}
}
// SMP
int
mid_max_freq_khz
=
(
cpu_freq
[
cpuids
->
at
(
0
)]
+
cpu_freq
[
cpuids
->
at
(
cpu_count
-
1
)])
/
2
;
for
(
int
i
=
0
;
i
<
cpu_count
;
i
++
)
{
cpuids
->
at
(
i
)
=
i
;
if
(
cpu_freq
[
i
]
>=
mid_max_freq_khz
)
{
cluster_ids
->
at
(
i
)
=
0
;
}
else
{
}
else
{
cluster_ids
->
at
(
i
)
=
1
;
mode_
=
LITE_POWER_HIGH
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores."
;
if
(
thread_num
>
big_core_size
)
{
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
}
}
}
return
0
;
}
}
int
check_online
(
const
std
::
vector
<
int
>&
core_ids
)
{
int
DeviceInfo
::
Setup
()
{
if
(
core_ids
.
size
()
==
0
)
{
core_num_
=
get_cpu_num
();
return
0
;
mem_size_
=
get_mem_size
();
}
get_cpu_arch
(
&
archs_
,
core_num_
);
char
path
[
256
];
// set defalut CPU info
int
online
=
1
;
SetCacheInfo
(
0
,
DEFAULT_L1_CACHE_SIZE
);
for
(
int
i
=
0
;
i
<
core_ids
.
size
();
++
i
)
{
SetCacheInfo
(
1
,
DEFAULT_L2_CACHE_SIZE
);
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/online"
,
SetCacheInfo
(
2
,
DEFAULT_L3_CACHE_SIZE
);
core_ids
[
i
]);
#ifdef LITE_WITH_LINUX
FILE
*
fp
=
fopen
(
path
,
"rb"
);
// get max&min freq
if
(
!
fp
)
{
max_freqs_
.
resize
(
core_num_
);
min_freqs_
.
resize
(
core_num_
);
for
(
int
i
=
0
;
i
<
core_num_
;
++
i
)
{
int
max_freq
,
min_freq
;
get_cpu_max_min_freq
(
i
,
&
max_freq
,
&
min_freq
);
max_freqs_
[
i
]
=
max_freq
/
1000
;
min_freqs_
[
i
]
=
min_freq
/
1000
;
}
// get cache size and big.LITTLE core ids
dev_name_
=
get_cpu_name
();
if
(
!
SetCPUInfoByName
())
{
SetCPUInfoByProb
();
}
// output info
LOG
(
INFO
)
<<
"ARM multiprocessors name: "
<<
dev_name_
;
LOG
(
INFO
)
<<
"ARM multiprocessors number: "
<<
core_num_
;
for
(
int
i
=
0
;
i
<
core_num_
;
++
i
)
{
LOG
(
INFO
)
<<
"ARM multiprocessors ID: "
<<
core_ids_
[
i
]
<<
", max freq: "
<<
max_freqs_
[
i
]
<<
", min freq: "
<<
min_freqs_
[
i
]
<<
", cluster ID: "
<<
cluster_ids_
[
core_ids_
[
i
]]
<<
", CPU ARCH: A"
<<
archs_
[
i
];
}
LOG
(
INFO
)
<<
"L1 DataCache size is: "
;
for
(
int
i
=
0
;
i
<
core_num_
;
++
i
)
{
LOG
(
INFO
)
<<
L1_cache_
[
i
]
/
1024
<<
" KB"
;
}
LOG
(
INFO
)
<<
"L2 Cache size is: "
;
for
(
int
i
=
0
;
i
<
core_num_
;
++
i
)
{
LOG
(
INFO
)
<<
L2_cache_
[
i
]
/
1024
<<
" KB"
;
}
LOG
(
INFO
)
<<
"Total memory: "
<<
mem_size_
<<
"KB"
;
#endif
// set default run mode
SetRunMode
(
LITE_POWER_NO_BIND
,
1
);
// use single thread by default
return
0
;
return
0
;
}
int
cur_online
=
0
;
fscanf
(
fp
,
"%d"
,
&
cur_online
);
online
&=
cur_online
;
fclose
(
fp
);
}
return
online
;
}
}
int
set_sched_affinity
(
const
std
::
vector
<
int
>&
cpuids
)
{
void
DeviceInfo
::
SetRunMode
(
PowerMode
mode
,
int
thread_num
)
{
// #define CPU_SETSIZE 1024
#ifdef ARM_WITH_OMP
// #define __NCPUBITS (8 * sizeof (unsigned long))
thread_num
=
std
::
min
(
thread_num
,
core_num_
);
// typedef struct
// {
// unsigned long __bits[CPU_SETSIZE / __NCPUBITS];
// } cpu_set_t;
// set affinity for thread
#ifdef __GLIBC__
pid_t
pid
=
syscall
(
SYS_gettid
);
#else
#else
pid_t
pid
=
gettid
();
thread_num
=
1
;
// force thread_num to 1 if OpenMP is disabled
#endif
#endif
cpu_set_t
mask
;
#ifdef LITE_WITH_LINUX
CPU_ZERO
(
&
mask
);
int
big_core_size
=
big_core_ids_
.
size
();
for
(
int
i
=
0
;
i
<
cpuids
.
size
();
i
++
)
{
int
little_core_size
=
little_core_ids_
.
size
();
CPU_SET
(
cpuids
[
i
],
&
mask
);
int
big_little_core_size
=
big_core_size
+
little_core_size
;
thread_num
=
std
::
min
(
thread_num
,
big_little_core_size
);
count_
++
;
int
shift_num
=
(
count_
/
10
)
%
big_core_size
;
switch
(
mode
)
{
case
LITE_POWER_FULL
:
RequestPowerFullMode
(
thread_num
);
break
;
case
LITE_POWER_HIGH
:
RequestPowerHighMode
(
thread_num
);
break
;
case
LITE_POWER_LOW
:
RequestPowerLowMode
(
thread_num
);
break
;
case
LITE_POWER_NO_BIND
:
RequestPowerNoBindMode
(
thread_num
);
break
;
case
LITE_POWER_RAND_HIGH
:
RequestPowerRandHighMode
(
shift_num
,
thread_num
);
break
;
case
LITE_POWER_RAND_LOW
:
RequestPowerRandLowMode
(
shift_num
,
thread_num
);
break
;
default:
LOG
(
FATAL
)
<<
"Unsupported power mode: "
<<
mode
;
break
;
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
#ifdef ARM_WITH_OMP
omp_set_num_threads
(
active_ids_
.
size
());
#endif
if
(
mode_
!=
LITE_POWER_NO_BIND
)
{
if
(
check_cpu_online
(
active_ids_
))
{
bind_threads
(
active_ids_
);
}
else
{
LOG
(
WARNING
)
<<
"Some cores are offline, switch to NO BIND MODE"
;
mode_
=
LITE_POWER_NO_BIND
;
}
}
int
syscallret
=
syscall
(
__NR_sched_setaffinity
,
pid
,
sizeof
(
mask
),
&
mask
);
if
(
syscallret
)
{
LOG
(
ERROR
)
<<
"syscall error "
<<
syscallret
;
return
-
1
;
}
}
#else // LITE_WITH_LINUX
// only LITE_POWER_NO_BIND is supported in other OS
RequestPowerNoBindMode
(
thread_num
);
#ifdef ARM_WITH_OMP
omp_set_num_threads
(
active_ids_
.
size
());
#endif
#endif // LITE_WITH_LINUX
//! alloc memory for sgemm in this context
workspace_
.
Resize
(
{
static_cast
<
int64_t
>
(
L2_cache_
[
active_ids_
[
0
]]
/
sizeof
(
float
))});
arch_
=
archs_
[
active_ids_
[
0
]];
}
return
0
;
void
DeviceInfo
::
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
)
{
SetCacheInfo
(
0
,
l1size
);
SetCacheInfo
(
1
,
l2size
);
SetCacheInfo
(
2
,
l3size
);
workspace_
.
Resize
({
2
*
(
l1size
+
l2size
)});
}
}
#endif // LITE_WITH_LINUX
bool
DeviceInfo
::
ExtendWorkspace
(
DDimLite
dims
)
{
auto
count
=
dims
.
product
();
auto
old
=
workspace_
.
dims
();
if
(
count
==
old
.
product
())
{
return
false
;
}
workspace_
.
Resize
({
static_cast
<
int64_t
>
(
count
+
L2_cache_
[
active_ids_
[
0
]]
/
sizeof
(
float
))});
return
true
;
}
#endif // LITE_WITH_ARM
#endif // LITE_WITH_ARM
...
...
paddle/fluid/lite/core/cpu_info.h
浏览文件 @
ce46ef22
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
#pragma once
#pragma once
#include <cstdarg>
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/lite/core/lite_tensor.h"
#include "paddle/fluid/lite/core/lite_tensor.h"
...
@@ -47,92 +48,73 @@ typedef enum {
...
@@ -47,92 +48,73 @@ typedef enum {
class
DeviceInfo
{
class
DeviceInfo
{
public:
public:
int
idx_
;
int
max_freq_
;
int
min_freq_
;
int
generate_arch_
;
int
compute_core_num_
;
int
max_memory_
;
int
sharemem_size_
;
std
::
string
device_name_
;
std
::
string
compute_ability_
;
std
::
vector
<
int
>
L1_cache_
;
std
::
vector
<
int
>
L2_cache_
;
std
::
vector
<
int
>
L3_cache_
;
std
::
vector
<
int
>
core_ids_
;
std
::
vector
<
int
>
big_core_ids_
;
std
::
vector
<
int
>
little_core_ids_
;
std
::
vector
<
int
>
cluster_ids_
;
std
::
vector
<
ARMArch
>
archs_
;
ARMArch
arch_
;
// LITE_POWER_HIGH stands for using big cores,
// LITE_POWER_LOW stands for using small core,
// LITE_POWER_FULL stands for using all cores
PowerMode
mode_
;
std
::
vector
<
int
>
active_ids_
;
TensorLite
workspace_
;
int64_t
count_
{
0
};
static
DeviceInfo
&
Global
()
{
static
DeviceInfo
&
Global
()
{
static
auto
*
x
=
new
DeviceInfo
;
static
auto
*
x
=
new
DeviceInfo
;
return
*
x
;
return
*
x
;
}
}
static
void
Init
()
{
static
int
Init
()
{
auto
&
info
=
Global
();
static
int
ret
=
Global
().
Setup
();
InitInternal
(
&
info
)
;
return
ret
;
}
}
void
SetRunMode
(
PowerMode
mode
,
int
threads
);
int
Setup
();
void
SetRunMode
(
PowerMode
mode
,
int
thread_num
);
void
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
);
void
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
);
void
SetArch
(
ARMArch
arch
)
{
arch_
=
arch
;
}
void
SetArch
(
ARMArch
arch
)
{
arch_
=
arch
;
}
void
BindDev
();
PowerMode
mode
()
const
{
return
mode_
;
}
PowerMode
mode
()
const
{
return
mode_
;
}
int
threads
()
const
{
return
active_ids_
.
size
();
}
int
threads
()
const
{
return
active_ids_
.
size
();
}
ARMArch
arch
()
const
{
return
arch_
;
}
ARMArch
arch
()
const
{
return
arch_
;
}
int
l1_cache_size
()
const
{
return
L1_cache_
[
active_ids_
[
0
]];
}
int
l2_cache_size
()
const
{
return
L2_cache_
[
active_ids_
[
0
]];
}
int
l3_cache_size
()
const
{
return
L3_cache_
[
active_ids_
[
0
]];
}
template
<
typename
T
>
template
<
typename
T
>
T
*
workspace_data
()
{
T
*
workspace_data
()
{
return
workspace_
.
mutable_data
<
T
>
();
return
workspace_
.
mutable_data
<
T
>
();
}
}
int
l1_cache_size
()
const
{
return
L1_cache_
[
active_ids_
[
0
]];
}
int
l2_cache_size
()
const
{
return
L2_cache_
[
active_ids_
[
0
]];
}
int
l3_cache_size
()
const
{
return
L3_cache_
[
active_ids_
[
0
]];
}
bool
ExtendWorkspace
(
DDimLite
dims
);
bool
ExtendWorkspace
(
DDimLite
dims
);
private:
private:
DeviceInfo
()
=
default
;
int
core_num_
;
static
void
InitInternal
(
DeviceInfo
*
dev
);
std
::
vector
<
int
>
max_freqs_
;
};
std
::
vector
<
int
>
min_freqs_
;
int
mem_size_
;
size_t
arm_get_meminfo
();
std
::
string
dev_name_
;
int
arm_get_cpucount
();
void
arm_get_cpu_arch
(
std
::
vector
<
ARMArch
>*
archs
);
bool
get_cpu_info_from_name
(
DeviceInfo
*
cpu_info
,
std
::
string
hardware_name
);
#ifdef LITE_WITH_LINUX
std
::
vector
<
int
>
L1_cache_
;
std
::
vector
<
int
>
L2_cache_
;
void
set_default_cache
(
DeviceInfo
*
dev
);
std
::
vector
<
int
>
L3_cache_
;
std
::
vector
<
int
>
core_ids_
;
std
::
string
arm_get_cpu_name
();
std
::
vector
<
int
>
big_core_ids_
;
std
::
vector
<
int
>
little_core_ids_
;
std
::
vector
<
int
>
cluster_ids_
;
std
::
vector
<
ARMArch
>
archs_
;
int
get_max_freq_khz
(
int
cpuid
);
ARMArch
arch_
;
// LITE_POWER_HIGH stands for using big cores,
// LITE_POWER_LOW stands for using small core,
// LITE_POWER_FULL stands for using all cores
PowerMode
mode_
;
std
::
vector
<
int
>
active_ids_
;
TensorLite
workspace_
;
int64_t
count_
{
0
};
int
arm_sort_cpuid_by_max_frequency
(
int
cpu_count
,
std
::
vector
<
int
>*
cpuids
,
void
SetCacheInfo
(
int
cache_id
,
int
argc
,
...);
const
std
::
vector
<
int
>&
cpu_freq
,
void
SetArchInfo
(
int
argc
,
...);
std
::
vector
<
int
>*
cluster_ids
);
bool
SetCPUInfoByName
();
int
check_online
(
const
std
::
vector
<
int
>&
core_ids
);
void
SetCPUInfoByProb
();
int
set_sched_affinity
(
const
std
::
vector
<
int
>&
cpuids
);
void
RequestPowerFullMode
(
const
int
thread_num
);
void
RequestPowerHighMode
(
const
int
thread_num
);
void
RequestPowerLowMode
(
const
int
thread_num
);
void
RequestPowerNoBindMode
(
const
int
thread_num
);
void
RequestPowerRandHighMode
(
const
int
shift_num
,
const
int
thread_num
);
void
RequestPowerRandLowMode
(
const
int
shift_num
,
const
int
thread_num
);
#endif // LITE_WITH_LINUX
DeviceInfo
()
=
default
;
};
#endif // LITE_WITH_ARM
#endif // LITE_WITH_ARM
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录