Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
58bf3c48
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
58bf3c48
编写于
6月 21, 2019
作者:
N
nhzlx
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'incubate/lite' of
http://10.87.145.36/inference/paddlelite
into xzl/incubate/lite
上级
7e2ecbd6
758db8df
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
1314 addition
and
869 deletion
+1314
-869
.gitlab-ci.yml
.gitlab-ci.yml
+76
-28
paddle/fluid/lite/api/cxx_api_bin.cc
paddle/fluid/lite/api/cxx_api_bin.cc
+4
-3
paddle/fluid/lite/core/context.cc
paddle/fluid/lite/core/context.cc
+1
-316
paddle/fluid/lite/core/context.h
paddle/fluid/lite/core/context.h
+19
-25
paddle/fluid/lite/core/cpu_info.cc
paddle/fluid/lite/core/cpu_info.cc
+747
-417
paddle/fluid/lite/core/cpu_info.h
paddle/fluid/lite/core/cpu_info.h
+55
-58
paddle/fluid/lite/kernels/arm/conv_compute.cc
paddle/fluid/lite/kernels/arm/conv_compute.cc
+0
-2
paddle/fluid/lite/kernels/arm/fc_compute.cc
paddle/fluid/lite/kernels/arm/fc_compute.cc
+0
-1
paddle/fluid/lite/kernels/arm/mul_compute.cc
paddle/fluid/lite/kernels/arm/mul_compute.cc
+0
-1
paddle/fluid/lite/kernels/arm/pool_compute.cc
paddle/fluid/lite/kernels/arm/pool_compute.cc
+0
-1
paddle/fluid/lite/kernels/x86/CMakeLists.txt
paddle/fluid/lite/kernels/x86/CMakeLists.txt
+3
-0
paddle/fluid/lite/kernels/x86/batch_norm_compute.cc
paddle/fluid/lite/kernels/x86/batch_norm_compute.cc
+30
-0
paddle/fluid/lite/kernels/x86/batch_norm_compute.h
paddle/fluid/lite/kernels/x86/batch_norm_compute.h
+158
-0
paddle/fluid/lite/kernels/x86/batch_norm_compute_test.cc
paddle/fluid/lite/kernels/x86/batch_norm_compute_test.cc
+139
-0
paddle/fluid/lite/tools/build.sh
paddle/fluid/lite/tools/build.sh
+82
-17
未找到文件。
.gitlab-ci.yml
浏览文件 @
58bf3c48
...
@@ -2,6 +2,20 @@ before_script:
...
@@ -2,6 +2,20 @@ before_script:
-
env
-
env
-
export CI_USER_DIR=$(pwd)
-
export CI_USER_DIR=$(pwd)
# prepare ccache
-
apt install ccache
# for proxy
-
export http_proxy=$CI_PROXY
-
export https_proxy=$CI_PROXY
# merge the latest code
-
git config --global user.email "you@example.com"
-
git config --global user.name "Your Name"
-
git fetch origin incubate/lite
-
git merge --no-ff origin/incubate/lite
image
:
$SERVER_LITE_DOCKER_IMAGE
image
:
$SERVER_LITE_DOCKER_IMAGE
stages
:
stages
:
...
@@ -14,19 +28,13 @@ check:prebuilt:
...
@@ -14,19 +28,13 @@ check:prebuilt:
-
lite
-
lite
stage
:
ci
stage
:
ci
script
:
script
:
# prepare for pre-commit
-
rm -rf ~/.pip
-
rm -rf ~/.pip
-
export http_proxy=$CI_PROXY
-
export https_proxy=$CI_PROXY
-
pip install pre-commit
-
pip install pre-commit
-
pre-commit install
-
pre-commit install
# merge the latest code
-
git config --global user.email "you@example.com"
-
git config --global user.name "Your Name"
-
git fetch origin incubate/lite
-
git merge --no-ff origin/incubate/lite
-
./paddle/fluid/lite/tools/build.sh check_style
-
./paddle/fluid/lite/tools/build.sh check_style
cache
:
cache
:
key
:
check_style
key
:
check_style
paths
:
paths
:
...
@@ -42,17 +50,11 @@ build:server:
...
@@ -42,17 +50,11 @@ build:server:
paths
:
paths
:
-
build/third_party
-
build/third_party
-
~/.ccache
-
~/.ccache
-
$CI_PROJECT_DIR/_build_server_ccache
script
:
script
:
-
apt install ccache
# customize ccache path for specifying runner cache
-
export http_proxy=$CI_PROXY
-
export CCACHE_DIR=$CI_PROJECT_DIR/_build_server_ccache
-
export https_proxy=$CI_PROXY
# run build and test
# merge the latest code
-
git config --global user.email "you@example.com"
-
git config --global user.name "Your Name"
-
git fetch origin incubate/lite
-
git merge --no-ff origin/incubate/lite
-
mkdir -p build
-
mkdir -p build
-
cd build
-
cd build
-
../paddle/fluid/lite/tools/build.sh cmake_x86
-
../paddle/fluid/lite/tools/build.sh cmake_x86
...
@@ -66,7 +68,27 @@ build:server:
...
@@ -66,7 +68,27 @@ build:server:
dependencies
:
dependencies
:
-
check:prebuilt
-
check:prebuilt
build:mobile:
build:mobile_android:
tags
:
-
lite
stage
:
build_mobile
image
:
$MOBILE_LITE_DOCKER_IMAGE
cache
:
key
:
mobile_thirdparty
paths
:
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE1
-
~/.ccache
-
$CI_PROJECT_DIR/build_mobile_ccache
script
:
-
export CCACHE_DIR=$CI_PROJECT_DIR/build_mobile_ccache
-
./paddle/fluid/lite/tools/build.sh build_test_arm_subtask_android
dependencies
:
-
build:server
build:mobile_armlinux:
tags
:
tags
:
-
lite
-
lite
stage
:
build_mobile
stage
:
build_mobile
...
@@ -77,17 +99,43 @@ build:mobile:
...
@@ -77,17 +99,43 @@ build:mobile:
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE1
-
$MOBILE_LITE_CACHE1
-
~/.ccache
-
~/.ccache
-
$CI_PROJECT_DIR/build_mobile_ccache2
script
:
script
:
-
apt install ccache
-
export CCACHE_DIR=$CI_PROJECT_DIR/build_mobile_ccache2
-
export http_proxy=$CI_PROXY
-
./paddle/fluid/lite/tools/build.sh build_test_arm_subtask_armlinux
-
export https_proxy=$CI_PROXY
dependencies
:
-
build:server
# merge the latest code
cache
:
-
git config --global user.email "you@example.com"
key
:
mobile_thirdparty
-
git config --global user.name "Your Name"
paths
:
-
git fetch origin incubate/lite
-
$MOBILE_LITE_CACHE0
-
git merge --no-ff origin/incubate/lite
-
$MOBILE_LITE_CACHE1
-
~/.ccache
build:mobile_model_mobilenetv2:
tags
:
-
lite
stage
:
build_mobile
image
:
$MOBILE_LITE_DOCKER_IMAGE
cache
:
key
:
mobile_thirdparty
paths
:
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE1
-
~/.ccache
script
:
-
export CCACHE_DIR=$CI_PROJECT_DIR/build_mobile_model1
-
./paddle/fluid/lite/tools/build.sh build_test_arm_model1
-
./paddle/fluid/lite/tools/build.sh build_test_arm
dependencies
:
dependencies
:
-
build:server
-
build:server
cache
:
key
:
mobile_thirdparty
paths
:
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE1
-
~/.ccache
-
$CI_PROJECT_DIR/build_mobile_model1
paddle/fluid/lite/api/cxx_api_bin.cc
浏览文件 @
58bf3c48
...
@@ -29,9 +29,10 @@ double time_diff(Time t1, Time t2) {
...
@@ -29,9 +29,10 @@ double time_diff(Time t1, Time t2) {
return
counter
.
count
()
/
1000.0
;
return
counter
.
count
()
/
1000.0
;
}
}
void
Run
(
const
char
*
model_dir
,
int
repeat
)
{
void
Run
(
const
char
*
model_dir
,
int
repeat
,
int
thread_num
)
{
#ifdef LITE_WITH_ARM
#ifdef LITE_WITH_ARM
DeviceInfo
::
Init
();
DeviceInfo
::
Init
();
DeviceInfo
::
Global
().
SetRunMode
(
LITE_POWER_HIGH
,
thread_num
);
#endif
#endif
lite
::
ExecutorLite
predictor
;
lite
::
ExecutorLite
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
...
@@ -67,8 +68,8 @@ void Run(const char* model_dir, int repeat) {
...
@@ -67,8 +68,8 @@ void Run(const char* model_dir, int repeat) {
}
// namespace paddle
}
// namespace paddle
int
main
(
int
argc
,
char
**
argv
)
{
int
main
(
int
argc
,
char
**
argv
)
{
CHECK_EQ
(
argc
,
3
)
<<
"usage: ./cmd <model_dir> <repeat
>"
;
CHECK_EQ
(
argc
,
4
)
<<
"usage: ./cmd <model_dir> <repeat> <thread_num
>"
;
paddle
::
lite
::
Run
(
argv
[
1
],
std
::
stoi
(
argv
[
2
]));
paddle
::
lite
::
Run
(
argv
[
1
],
std
::
stoi
(
argv
[
2
])
,
std
::
stoi
(
argv
[
3
])
);
return
0
;
return
0
;
}
}
...
...
paddle/fluid/lite/core/context.cc
浏览文件 @
58bf3c48
...
@@ -13,322 +13,7 @@
...
@@ -13,322 +13,7 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/lite/core/context.h"
#include "paddle/fluid/lite/core/context.h"
#include "paddle/fluid/lite/core/cpu_info.h"
#ifdef LITE_WITH_LINUX
#include <sys/syscall.h>
#include <unistd.h>
#endif
#if __APPLE__
#include "TargetConditionals.h"
#if TARGET_OS_IPHONE
#include <mach/machine.h>
#include <sys/sysctl.h>
#include <sys/types.h>
#endif // TARGET_OS_IPHONE
#endif // __APPLE__
#ifdef ARM_WITH_OMP
#include <omp.h>
#endif
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{}
// namespace lite
#ifdef LITE_WITH_ARM
void
Context
<
TargetType
::
kARM
>::
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
)
{
DeviceInfo
&
dev
=
DeviceInfo
::
Global
();
int
cpu_count
=
arm_get_cpucount
();
dev
.
L1_cache_
.
resize
(
cpu_count
);
dev
.
L2_cache_
.
resize
(
cpu_count
);
dev
.
L3_cache_
.
resize
(
cpu_count
);
for
(
int
i
=
0
;
i
<
cpu_count
;
++
i
)
{
dev
.
L1_cache_
[
i
]
=
l1size
;
dev
.
L2_cache_
[
i
]
=
l2size
;
dev
.
L3_cache_
[
i
]
=
l3size
;
}
workspace_
.
Resize
({
2
*
(
l1size
+
l2size
)});
}
Context
<
TargetType
::
kARM
>::
Context
()
{
active_ids_
=
{
0
};
mode_
=
LITE_POWER_HIGH
;
DeviceInfo
&
dev
=
DeviceInfo
::
Global
();
workspace_
.
Resize
(
{
static_cast
<
int64_t
>
(
dev
.
L2_cache_
[
active_ids_
[
0
]]
/
sizeof
(
float
))});
#ifdef TARGET_IOS
arch_
=
APPLE
;
// use 6x8
#else
if
(
dev
.
big_core_ids_
.
size
()
>
0
)
{
arch_
=
dev
.
archs_
[
dev
.
big_core_ids_
[
0
]];
}
#endif
}
PowerMode
Context
<
TargetType
::
kARM
>::
mode
()
const
{
return
mode_
;
}
int
Context
<
TargetType
::
kARM
>::
threads
()
const
{
return
active_ids_
.
size
();
}
Context
<
TargetType
::
kARM
>::
Context
(
const
ARMContext
&
ctx
)
{
mode_
=
ctx
.
mode_
;
active_ids_
=
ctx
.
active_ids_
;
workspace_
=
ctx
.
workspace_
;
arch_
=
ctx
.
arch_
;
count_
=
ctx
.
count_
;
}
ARMContext
&
Context
<
TargetType
::
kARM
>::
operator
=
(
const
ARMContext
&
ctx
)
{
mode_
=
ctx
.
mode_
;
active_ids_
=
ctx
.
active_ids_
;
workspace_
=
ctx
.
workspace_
;
arch_
=
ctx
.
arch_
;
count_
=
ctx
.
count_
;
return
*
this
;
}
void
Context
<
TargetType
::
kARM
>::
BindDev
()
{
#ifdef ARM_WITH_OMP
int
num_threads
=
active_ids_
.
size
();
omp_set_num_threads
(
num_threads
);
#ifdef LITE_WITH_LINUX
std
::
vector
<
int
>
ssarets
;
for
(
int
j
=
0
;
j
<
num_threads
;
++
j
)
{
ssarets
.
push_back
(
0
);
}
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
num_threads
;
i
++
)
{
ssarets
[
i
]
=
set_sched_affinity
(
active_ids_
);
}
for
(
int
i
=
0
;
i
<
num_threads
;
i
++
)
{
if
(
ssarets
[
i
]
!=
0
)
{
LOG
(
ERROR
)
<<
"set cpu affinity failed, cpuID: "
<<
active_ids_
[
i
];
return
;
}
}
#endif // LITE_WITH_LINUX
#else // ARM_WITH_OMP
#ifdef LITE_WITH_LINUX
std
::
vector
<
int
>
cpuid1
;
cpuid1
.
push_back
(
active_ids_
[
0
]);
int
ssaret
=
set_sched_affinity
(
cpuid1
);
if
(
ssaret
!=
0
)
{
printf
(
"set cpu affinity failed, cpuID: %d
\n
"
,
active_ids_
[
0
]);
return
;
}
#endif // LITE_WITH_LINUX
#endif // ARM_WITH_OMP
}
void
Context
<
TargetType
::
kARM
>::
SetRunMode
(
PowerMode
mode
,
int
threads
)
{
DeviceInfo
&
dev
=
DeviceInfo
::
Global
();
int
big_core_size
=
dev
.
big_core_ids_
.
size
();
int
small_core_size
=
dev
.
little_core_ids_
.
size
();
if
(
threads
>
big_core_size
+
small_core_size
)
{
threads
=
big_core_size
+
small_core_size
;
}
#ifdef ARM_WITH_OMP
count_
++
;
int
shift_num
=
(
count_
/
10
)
%
big_core_size
;
switch
(
mode
)
{
case
LITE_POWER_FULL
:
mode_
=
mode
;
active_ids_
.
clear
();
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
if
(
i
<
big_core_size
)
{
active_ids_
.
push_back
(
dev
.
big_core_ids_
[
i
]);
}
else
{
active_ids_
.
push_back
(
dev
.
little_core_ids_
[
i
-
big_core_size
]);
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_HIGH
:
active_ids_
.
clear
();
if
(
big_core_size
>
0
)
{
mode_
=
LITE_POWER_HIGH
;
if
(
threads
>
big_core_size
)
{
LOG
(
ERROR
)
<<
"threads: "
<<
threads
<<
", exceed the big cores size: "
<<
big_core_size
;
active_ids_
=
dev
.
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
dev
.
big_core_ids_
[
i
]);
}
}
}
else
{
mode_
=
LITE_POWER_LOW
;
LOG
(
ERROR
)
<<
"HIGH POWER MODE is not support, switch to little cores"
;
if
(
threads
>
small_core_size
)
{
active_ids_
=
dev
.
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
dev
.
little_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_LOW
:
active_ids_
.
clear
();
if
(
small_core_size
>
0
)
{
mode_
=
LITE_POWER_LOW
;
if
(
threads
>
small_core_size
)
{
LOG
(
WARNING
)
<<
"threads: "
<<
threads
<<
", exceed the little cores size: "
<<
small_core_size
;
active_ids_
=
dev
.
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
dev
.
little_core_ids_
[
i
]);
}
}
}
else
{
mode_
=
LITE_POWER_HIGH
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores"
;
if
(
threads
>
big_core_size
)
{
active_ids_
=
dev
.
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
dev
.
big_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_NO_BIND
:
mode_
=
LITE_POWER_NO_BIND
;
active_ids_
.
clear
();
if
(
threads
>
dev
.
core_ids_
.
size
())
{
active_ids_
.
resize
(
dev
.
core_ids_
.
size
());
}
else
{
active_ids_
.
resize
(
threads
);
}
break
;
case
LITE_POWER_RAND_HIGH
:
active_ids_
.
clear
();
if
(
big_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_HIGH
;
if
(
threads
>
big_core_size
)
{
LOG
(
WARNING
)
<<
"threads: "
<<
threads
<<
", exceed the big cores size: "
<<
big_core_size
;
active_ids_
=
dev
.
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
dev
.
big_core_ids_
[(
i
+
shift_num
)
%
big_core_size
]);
}
}
}
else
{
mode_
=
LITE_POWER_LOW
;
LOG
(
WARNING
)
<<
"HIGH POWER MODE is not support, switch to little cores"
;
if
(
threads
>
small_core_size
)
{
active_ids_
=
dev
.
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
dev
.
little_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_RAND_LOW
:
active_ids_
.
clear
();
if
(
small_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_LOW
;
if
(
threads
>
small_core_size
)
{
LOG
(
WARNING
)
<<
"threads: "
<<
threads
<<
", exceed the little cores size: "
<<
small_core_size
;
active_ids_
=
dev
.
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
dev
.
little_core_ids_
[(
i
+
shift_num
)
%
small_core_size
]);
}
}
}
else
{
mode_
=
LITE_POWER_HIGH
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores"
;
if
(
threads
>
big_core_size
)
{
active_ids_
=
dev
.
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
dev
.
big_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
}
//! fix multi-threads LITE_POWER_HIGH mode
if
(
mode_
==
LITE_POWER_NO_BIND
||
threads
>
1
)
{
int
threads
=
active_ids_
.
size
();
omp_set_num_threads
(
threads
);
}
else
{
if
(
check_online
(
active_ids_
))
{
BindDev
();
}
else
{
LOG
(
ERROR
)
<<
"core id "
<<
active_ids_
[
0
]
<<
" is offline, switch to NO BIND MODE"
;
int
threads
=
active_ids_
.
size
();
omp_set_num_threads
(
threads
);
}
}
#else
if
(
big_core_size
>
0
)
{
active_ids_
=
{
dev
.
big_core_ids_
[
0
]};
}
else
{
active_ids_
=
{
0
};
}
#endif
//! alloc memory for sgemm in this context
int
temp_mem_size
=
DeviceInfo
::
Global
().
L2_cache_
[
active_ids_
[
0
]]
/
sizeof
(
float
);
workspace_
.
Resize
({
temp_mem_size
});
arch_
=
DeviceInfo
::
Global
().
archs_
[
active_ids_
[
0
]];
}
ARMArch
Context
<
TargetType
::
kARM
>::
arch
()
const
{
return
arch_
;
}
void
Context
<
TargetType
::
kARM
>::
SetArch
(
ARMArch
arch
)
{
arch_
=
arch
;
}
int
Context
<
TargetType
::
kARM
>::
l1_cache_size
()
const
{
DeviceInfo
&
dev
=
DeviceInfo
::
Global
();
return
dev
.
L1_cache_
[
active_ids_
[
0
]];
}
int
Context
<
TargetType
::
kARM
>::
l2_cache_size
()
const
{
DeviceInfo
&
dev
=
DeviceInfo
::
Global
();
return
dev
.
L2_cache_
[
active_ids_
[
0
]];
}
int
Context
<
TargetType
::
kARM
>::
l3_cache_size
()
const
{
DeviceInfo
&
dev
=
DeviceInfo
::
Global
();
return
dev
.
L3_cache_
[
active_ids_
[
0
]];
}
bool
Context
<
TargetType
::
kARM
>::
ExtendWorkspace
(
DDimLite
dims
)
{
auto
count
=
dims
.
product
();
auto
old
=
workspace_
.
dims
();
if
(
count
==
old
.
product
())
{
return
false
;
}
workspace_
.
Resize
(
{
static_cast
<
int64_t
>
(
count
+
l2_cache_size
()
/
sizeof
(
float
))});
return
true
;
}
#endif // LITE_WITH_ARM
}
// namespace lite
}
// namespace paddle
}
// namespace paddle
paddle/fluid/lite/core/context.h
浏览文件 @
58bf3c48
...
@@ -61,47 +61,41 @@ class Context<TargetType::kHost> {
...
@@ -61,47 +61,41 @@ class Context<TargetType::kHost> {
template
<
>
template
<
>
class
Context
<
TargetType
::
kARM
>
{
class
Context
<
TargetType
::
kARM
>
{
public:
public:
Context
();
Context
()
{}
Context
(
PowerMode
mode
,
int
threads
);
explicit
Context
(
const
ARMContext
&
ctx
);
explicit
Context
(
const
ARMContext
&
ctx
);
ARMContext
&
operator
=
(
const
ARMContext
&
ctx
)
;
ARMContext
&
operator
=
(
const
ARMContext
&
ctx
)
{}
// NOTE: InitOnce should only be used by ContextScheduler
// NOTE: InitOnce should only be used by ContextScheduler
void
InitOnce
()
{
DeviceInfo
::
Init
();
}
void
InitOnce
()
{
DeviceInfo
::
Init
();
}
void
CopyShared
(
const
ARMContext
*
ctx
)
{}
void
CopyShared
(
const
ARMContext
*
ctx
)
{}
void
SetRunMode
(
PowerMode
mode
,
int
threads
);
void
SetRunMode
(
PowerMode
mode
,
int
threads
)
{
void
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
);
return
DeviceInfo
::
Global
().
SetRunMode
(
mode
,
threads
);
void
SetArch
(
ARMArch
arch
);
}
void
BindDev
();
void
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
)
{
return
DeviceInfo
::
Global
().
SetCache
(
l1size
,
l2size
,
l3size
);
}
void
SetArch
(
ARMArch
arch
)
{
return
DeviceInfo
::
Global
().
SetArch
(
arch
);
}
PowerMode
mode
()
const
;
PowerMode
mode
()
const
{
return
DeviceInfo
::
Global
().
mode
();
}
int
threads
()
const
;
int
threads
()
const
{
return
DeviceInfo
::
Global
().
threads
();
}
ARMArch
arch
()
const
;
ARMArch
arch
()
const
{
return
DeviceInfo
::
Global
().
arch
();
}
int
l1_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l1_cache_size
();
}
int
l2_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l2_cache_size
();
}
int
l3_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l3_cache_size
();
}
template
<
typename
T
>
template
<
typename
T
>
T
*
workspace_data
()
{
T
*
workspace_data
()
{
return
workspace_
.
mutabl
e_data
<
T
>
();
return
DeviceInfo
::
Global
().
workspac
e_data
<
T
>
();
}
}
int
l1_cache_size
()
const
;
bool
ExtendWorkspace
(
DDimLite
dims
)
{
int
l2_cache_size
()
const
;
return
DeviceInfo
::
Global
().
ExtendWorkspace
(
dims
);
int
l3_cache_size
()
const
;
}
bool
ExtendWorkspace
(
DDimLite
dims
);
std
::
string
name
()
const
{
return
"ARMContext"
;
}
std
::
string
name
()
const
{
return
"ARMContext"
;
}
private:
// LITE_POWER_HIGH stands for using big cores,
// LITE_POWER_LOW stands for using small core,
// LITE_POWER_FULL stands for using all cores
ARMArch
arch_
;
PowerMode
mode_
;
std
::
vector
<
int
>
active_ids_
;
TensorLite
workspace_
;
int64_t
count_
{
0
};
};
};
#endif
#endif
...
...
paddle/fluid/lite/core/cpu_info.cc
浏览文件 @
58bf3c48
...
@@ -12,312 +12,81 @@
...
@@ -12,312 +12,81 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#ifdef LITE_WITH_LINUX
#include <sys/syscall.h>
#include <unistd.h>
#endif
#if __APPLE__
#include "TargetConditionals.h"
#if TARGET_OS_IPHONE
#include <mach/machine.h>
#include <sys/sysctl.h>
#include <sys/types.h>
#endif // TARGET_OS_IPHONE
#endif // __APPLE__
#ifdef ARM_WITH_OMP
#include <omp.h>
#endif
#include <algorithm>
#include <limits>
#include "paddle/fluid/lite/core/cpu_info.h"
#include "paddle/fluid/lite/core/cpu_info.h"
#include <cstdarg>
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
#ifdef LITE_WITH_ARM
#ifdef LITE_WITH_ARM
void
DeviceInfo
::
InitInternal
(
DeviceInfo
*
dev
)
{
#ifdef TARGET_IOS
set_default_cache
(
dev
);
const
int
DEFAULT_L1_CACHE_SIZE
=
64
*
1024
;
dev
->
compute_core_num_
=
arm_get_cpucount
();
const
int
DEFAULT_L2_CACHE_SIZE
=
2048
*
1024
;
dev
->
max_memory_
=
arm_get_meminfo
();
const
int
DEFAULT_L3_CACHE_SIZE
=
0
;
#else
// get max freq
const
int
DEFAULT_L1_CACHE_SIZE
=
32
*
1024
;
#ifdef LITE_WITH_LINUX
const
int
DEFAULT_L2_CACHE_SIZE
=
512
*
1024
;
std
::
vector
<
int
>
max_freq
(
dev
->
compute_core_num_
);
const
int
DEFAULT_L3_CACHE_SIZE
=
0
;
for
(
int
i
=
0
;
i
<
dev
->
compute_core_num_
;
++
i
)
{
max_freq
[
i
]
=
get_max_freq_khz
(
i
)
/
1000
;
}
std
::
string
cpu_name
=
arm_get_cpu_name
();
if
(
get_cpu_info_from_name
(
dev
,
cpu_name
)
!=
true
)
{
arm_sort_cpuid_by_max_frequency
(
dev
->
compute_core_num_
,
&
dev
->
core_ids_
,
max_freq
,
&
dev
->
cluster_ids_
);
dev
->
big_core_ids_
.
clear
();
dev
->
little_core_ids_
.
clear
();
for
(
int
i
=
0
;
i
<
dev
->
cluster_ids_
.
size
();
++
i
)
{
if
(
dev
->
cluster_ids_
[
i
]
==
0
)
{
dev
->
big_core_ids_
.
push_back
(
dev
->
core_ids_
[
i
]);
}
else
{
dev
->
little_core_ids_
.
push_back
(
dev
->
core_ids_
[
i
]);
}
}
arm_get_cpu_arch
(
&
dev
->
archs_
);
}
LOG
(
INFO
)
<<
"ARM multiprocessors number: "
<<
dev
->
compute_core_num_
;
for
(
int
i
=
0
;
i
<
dev
->
compute_core_num_
;
++
i
)
{
LOG
(
INFO
)
<<
"ARM multiprocessors ID: "
<<
dev
->
core_ids_
[
i
]
<<
", frequence: "
<<
max_freq
[
i
]
<<
", cluster ID: "
<<
dev
->
cluster_ids_
[
dev
->
core_ids_
[
i
]]
<<
", CPU ARCH: A"
<<
dev
->
archs_
[
i
];
}
VLOG
(
1
)
<<
"L1 DataCache size is: "
;
for
(
int
i
=
0
;
i
<
dev
->
compute_core_num_
;
++
i
)
{
VLOG
(
1
)
<<
dev
->
L1_cache_
[
i
]
/
1024
<<
" KB"
;
}
VLOG
(
1
)
<<
"L2 Cache size is: "
;
for
(
int
i
=
0
;
i
<
dev
->
compute_core_num_
;
++
i
)
{
VLOG
(
1
)
<<
dev
->
L2_cache_
[
i
]
/
1024
<<
" KB"
;
}
VLOG
(
1
)
<<
"Total memory: "
<<
dev
->
max_memory_
<<
"KB"
;
dev
->
max_freq_
=
max_freq
[
0
];
for
(
int
j
=
1
;
j
<
dev
->
compute_core_num_
;
++
j
)
{
if
(
dev
->
max_freq_
<
max_freq
[
j
])
{
dev
->
max_freq_
=
max_freq
[
j
];
}
}
#elif defined(TARGET_IOS)
arm_get_cpu_arch
(
&
dev
->
archs_
);
#endif
#endif
}
// cache_id : 0 -> L1, 1 -> L2, 2 -> L3
int
get_cpu_num
()
{
void
set_cache_info
(
DeviceInfo
*
cpu_info
,
int
cache_id
,
int
argc
,
...)
{
#ifdef LITE_WITH_LINUX
va_list
arg_ptr
;
// get cpu count from /sys/devices/system/cpu/cpunum/uevent
va_start
(
arg_ptr
,
argc
);
int
max_cpu_num
=
20
;
std
::
vector
<
int
>*
cache
;
int
cpu_num
=
0
;
switch
(
cache_id
)
{
for
(
int
i
=
0
;
i
<
max_cpu_num
;
++
i
)
{
case
0
:
char
path
[
256
];
cache
=
&
cpu_info
->
L1_cache_
;
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/uevent"
,
i
);
break
;
FILE
*
fp
=
fopen
(
path
,
"rb"
);
case
1
:
if
(
!
fp
)
{
cache
=
&
cpu_info
->
L2_cache_
;
break
;
case
2
:
cache
=
&
cpu_info
->
L3_cache_
;
break
;
default:
break
;
break
;
}
int
core_num
=
cpu_info
->
compute_core_num_
;
cache
->
resize
(
core_num
);
if
(
argc
==
1
)
{
int
cache_size
=
va_arg
(
arg_ptr
,
int
);
for
(
int
i
=
0
;
i
<
core_num
;
++
i
)
{
(
*
cache
)[
i
]
=
cache_size
;
}
}
else
{
int
big_core_num
=
cpu_info
->
big_core_ids_
.
size
();
int
little_core_num
=
cpu_info
->
little_core_ids_
.
size
();
int
big_core_cache_size
=
va_arg
(
arg_ptr
,
int
);
int
little_core_cache_size
=
va_arg
(
arg_ptr
,
int
);
for
(
int
i
=
0
;
i
<
big_core_num
;
++
i
)
{
(
*
cache
)[
cpu_info
->
big_core_ids_
[
i
]]
=
big_core_cache_size
;
}
for
(
int
i
=
0
;
i
<
little_core_num
;
++
i
)
{
(
*
cache
)[
cpu_info
->
little_core_ids_
[
i
]]
=
little_core_cache_size
;
}
}
cpu_num
++
;
fclose
(
fp
);
}
}
va_end
(
arg_ptr
);
if
(
cpu_num
<
1
)
{
}
cpu_num
=
1
;
void
set_arch_info
(
DeviceInfo
*
cpu_info
,
int
argc
,
...)
{
va_list
arg_ptr
;
va_start
(
arg_ptr
,
argc
);
int
core_num
=
cpu_info
->
compute_core_num_
;
cpu_info
->
archs_
.
resize
(
core_num
);
if
(
argc
==
1
)
{
ARMArch
arch
=
(
ARMArch
)
va_arg
(
arg_ptr
,
int
);
for
(
int
i
=
0
;
i
<
core_num
;
++
i
)
{
cpu_info
->
archs_
[
i
]
=
arch
;
}
}
else
{
ARMArch
big_core_arch
=
(
ARMArch
)
va_arg
(
arg_ptr
,
int
);
ARMArch
little_core_arch
=
(
ARMArch
)
va_arg
(
arg_ptr
,
int
);
int
big_core_num
=
cpu_info
->
big_core_ids_
.
size
();
int
little_core_num
=
cpu_info
->
little_core_ids_
.
size
();
for
(
int
i
=
0
;
i
<
big_core_num
;
++
i
)
{
cpu_info
->
archs_
[
cpu_info
->
big_core_ids_
[
i
]]
=
big_core_arch
;
}
for
(
int
i
=
0
;
i
<
little_core_num
;
++
i
)
{
cpu_info
->
archs_
[
cpu_info
->
little_core_ids_
[
i
]]
=
little_core_arch
;
}
}
}
va_end
(
arg_ptr
);
return
cpu_num
;
}
#elif defined(TARGET_IOS)
int
cpu_num
=
0
;
bool
get_cpu_info_from_name
(
DeviceInfo
*
cpu_info
,
std
::
string
hardware_name
)
{
size_t
len
=
sizeof
(
cpu_num
);
/* Snapdragon */
sysctlbyname
(
"hw.ncpu"
,
&
cpu_num
,
&
len
,
NULL
,
0
);
if
(
hardware_name
.
find
(
"SDM845"
)
!=
std
::
string
::
npos
)
{
// 845
if
(
cpu_num
<
1
)
{
cpu_info
->
compute_core_num_
=
8
;
cpu_num
=
1
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
2
,
kA75
,
kA55
);
set_cache_info
(
cpu_info
,
0
,
1
,
32
*
1024
);
set_cache_info
(
cpu_info
,
1
,
2
,
256
*
1024
,
128
*
1024
);
set_cache_info
(
cpu_info
,
2
,
1
,
2048
*
1024
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"SDM710"
)
!=
std
::
string
::
npos
)
{
// 710
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
};
set_arch_info
(
cpu_info
,
2
,
kA75
,
kA55
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MSM8998"
)
!=
std
::
string
::
npos
)
{
// 835
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
2
,
kA73
,
kA53
);
set_cache_info
(
cpu_info
,
0
,
2
,
64
*
1024
);
set_cache_info
(
cpu_info
,
1
,
2
,
1024
*
1024
,
/*real cache size is 2M, while that will get bad performace
on conv3x3s1 or gemm, set to 1M or 512K*/
1024
*
1024
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MSM8996"
)
!=
std
::
string
::
npos
)
{
// 820
cpu_info
->
compute_core_num_
=
4
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
big_core_ids_
=
{
2
,
3
};
cpu_info
->
little_core_ids_
=
{
0
,
1
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
0
,
0
};
set_arch_info
(
cpu_info
,
1
,
kA72
);
set_cache_info
(
cpu_info
,
0
,
1
,
24
*
1024
);
set_cache_info
(
cpu_info
,
1
,
2
,
1024
*
1024
,
512
*
1024
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"SDM660"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"SDM636"
)
!=
std
::
string
::
npos
)
{
// 660, 636
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
1
,
kA73
);
set_cache_info
(
cpu_info
,
0
,
2
,
64
*
1024
,
32
*
1024
);
set_cache_info
(
cpu_info
,
1
,
1
,
1024
*
1024
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MSM8976"
)
!=
std
::
string
::
npos
)
{
// 652,653
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
2
,
kA72
,
kA53
);
set_cache_info
(
cpu_info
,
0
,
1
,
32
*
1024
);
set_cache_info
(
cpu_info
,
1
,
2
,
1024
*
1024
,
512
*
1024
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MSM8953"
)
!=
std
::
string
::
npos
)
{
// 625
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{};
cpu_info
->
cluster_ids_
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
1
,
kA53
);
set_cache_info
(
cpu_info
,
0
,
1
,
32
*
1024
);
set_cache_info
(
cpu_info
,
1
,
1
,
1024
*
1024
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MSM8939"
)
!=
std
::
string
::
npos
)
{
// 615
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
little_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
cluster_ids_
=
{
0
,
0
,
0
,
0
,
1
,
1
,
1
,
1
};
set_arch_info
(
cpu_info
,
1
,
kA53
);
set_cache_info
(
cpu_info
,
0
,
1
,
32
*
1024
);
set_cache_info
(
cpu_info
,
1
,
2
,
512
*
1024
,
256
*
1024
);
return
true
;
/* MediaTek */
}
else
if
(
hardware_name
.
find
(
"MT6797"
)
!=
std
::
string
::
npos
)
{
// X20/X23/X25/X27
cpu_info
->
compute_core_num_
=
10
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
};
cpu_info
->
big_core_ids_
=
{
8
,
9
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
};
set_arch_info
(
cpu_info
,
2
,
kA72
,
kA53
);
set_cache_info
(
cpu_info
,
0
,
1
,
32
*
1024
);
set_cache_info
(
cpu_info
,
1
,
2
,
1024
*
1024
,
512
*
1024
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MT6799"
)
!=
std
::
string
::
npos
)
{
// X30
cpu_info
->
compute_core_num_
=
10
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
};
cpu_info
->
big_core_ids_
=
{
8
,
9
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
};
set_arch_info
(
cpu_info
,
2
,
kA73
,
kA53
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MT6795"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6762"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6755T"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6755S"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6753"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6752"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6750"
)
!=
std
::
string
::
npos
)
{
// X10, P22, P15/P18, MT6753, MT6752/MT6752M, MT6750
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{};
cpu_info
->
cluster_ids_
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
1
,
kA53
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MT6758"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6757"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6763"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6755M"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6755"
)
!=
std
::
string
::
npos
)
{
// P30, P20/P25, P23, P10
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
1
,
kA53
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MT6771"
)
!=
std
::
string
::
npos
)
{
// P60
cpu_info
->
compute_core_num_
=
8
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cpu_info
->
big_core_ids_
=
{
4
,
5
,
6
,
7
};
cpu_info
->
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
2
,
kA73
,
kA53
);
return
true
;
}
else
if
(
hardware_name
.
find
(
"MT6765"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6739"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6738"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"MT6737"
)
!=
std
::
string
::
npos
)
{
// A22, MT6739, MT6738, MT6767
cpu_info
->
compute_core_num_
=
4
;
cpu_info
->
core_ids_
=
{
0
,
1
,
2
,
3
};
cpu_info
->
big_core_ids_
=
{
0
,
0
,
0
,
0
};
cpu_info
->
little_core_ids_
=
{};
cpu_info
->
cluster_ids_
=
{
0
,
0
,
0
,
0
};
set_arch_info
(
cpu_info
,
1
,
kA53
);
return
true
;
}
}
return
false
;
return
cpu_num
;
#else
return
1
;
#endif
}
}
size_t
arm_get_meminfo
()
{
size_t
get_mem_size
()
{
#ifdef LITE_WITH_LINUX
#ifdef LITE_WITH_LINUX
// get cpu count from /proc/cpuinfo
// get cpu count from /proc/cpuinfo
FILE
*
fp
=
fopen
(
"/proc/meminfo"
,
"rb"
);
FILE
*
fp
=
fopen
(
"/proc/meminfo"
,
"rb"
);
if
(
!
fp
)
{
if
(
!
fp
)
{
return
1
;
return
1
;
}
}
size_t
memsize
=
0
;
size_t
memsize
=
0
;
char
line
[
1024
];
char
line
[
1024
];
while
(
!
feof
(
fp
))
{
while
(
!
feof
(
fp
))
{
...
@@ -327,52 +96,18 @@ size_t arm_get_meminfo() {
...
@@ -327,52 +96,18 @@ size_t arm_get_meminfo() {
}
}
sscanf
(
s
,
"MemTotal: %d kB"
,
&
memsize
);
sscanf
(
s
,
"MemTotal: %d kB"
,
&
memsize
);
}
}
fclose
(
fp
);
fclose
(
fp
);
return
memsize
;
return
memsize
;
#elif defined(TARGET_IOS)
#elif defined(TARGET_IOS)
// to be implemented
// to be implemented
printf
(
"not implemented
\n
"
);
printf
(
"not implemented
\n
"
);
return
0
;
#endif
}
int
arm_get_cpucount
()
{
#ifdef LITE_WITH_LINUX
// get cpu count from /sys/devices/system/cpu/cpunum/uevent
int
max_cpu_count
=
20
;
int
count
=
0
;
for
(
int
i
=
0
;
i
<
max_cpu_count
;
++
i
)
{
char
path
[
256
];
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/uevent"
,
i
);
FILE
*
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
break
;
}
count
++
;
fclose
(
fp
);
}
if
(
count
<
1
)
{
count
=
1
;
}
return
count
;
#elif defined(TARGET_IOS)
int
count
=
0
;
size_t
len
=
sizeof
(
count
);
sysctlbyname
(
"hw.ncpu"
,
&
count
,
&
len
,
NULL
,
0
);
if
(
count
<
1
)
{
count
=
1
;
}
return
count
;
#else
return
1
;
#endif
#endif
return
0
;
}
}
void
arm_get_cpu_arch
(
std
::
vector
<
ARMArch
>*
archs
)
{
void
get_cpu_arch
(
std
::
vector
<
ARMArch
>*
archs
,
const
int
cpu_num
)
{
#ifdef LITE_WITH_LINUX
archs
->
clear
();
archs
->
clear
();
#ifdef LITE_WITH_LINUX
//! get CPU ARCH
//! get CPU ARCH
FILE
*
fp
=
fopen
(
"/proc/cpuinfo"
,
"rb"
);
FILE
*
fp
=
fopen
(
"/proc/cpuinfo"
,
"rb"
);
if
(
!
fp
)
{
if
(
!
fp
)
{
...
@@ -406,6 +141,29 @@ void arm_get_cpu_arch(std::vector<ARMArch>* archs) {
...
@@ -406,6 +141,29 @@ void arm_get_cpu_arch(std::vector<ARMArch>* archs) {
case
0xd0a
:
case
0xd0a
:
archs
->
push_back
(
kA75
);
archs
->
push_back
(
kA75
);
break
;
break
;
case
0xd40
:
archs
->
push_back
(
kA76
);
break
;
case
0x804
:
// 855
archs
->
push_back
(
kA76
);
break
;
case
0x805
:
// 855
archs
->
push_back
(
kA55
);
break
;
case
0x802
:
// 845
archs
->
push_back
(
kA75
);
break
;
case
0x803
:
// 845
archs
->
push_back
(
kA55
);
break
;
case
0x801
:
// 835
archs
->
push_back
(
kA73
);
break
;
case
0x800
:
case
0x800
:
// 835
// 835
archs
->
push_back
(
kA73
);
archs
->
push_back
(
kA73
);
...
@@ -415,49 +173,31 @@ void arm_get_cpu_arch(std::vector<ARMArch>* archs) {
...
@@ -415,49 +173,31 @@ void arm_get_cpu_arch(std::vector<ARMArch>* archs) {
archs
->
push_back
(
kA72
);
archs
->
push_back
(
kA72
);
break
;
break
;
default:
default:
LOG
(
ERROR
)
<<
"
unknow type"
;
LOG
(
ERROR
)
<<
"
Unknow cpu arch: "
<<
arch_id
;
archs
->
push_back
(
kARMArch_UNKOWN
);
archs
->
push_back
(
kARMArch_UNKOWN
);
}
}
}
}
}
}
fclose
(
fp
);
fclose
(
fp
);
int
cpu_count
=
arm_get_cpucount
();
if
(
archs
->
size
()
<
cpu_num
)
{
if
(
archs
->
size
()
<
cpu_count
)
{
for
(
int
i
=
archs
->
size
();
i
<
cpu_num
;
++
i
)
{
for
(
int
i
=
archs
->
size
();
i
<
cpu_count
;
++
i
)
{
archs
->
push_back
(
archs
->
at
(
i
-
1
));
archs
->
push_back
(
archs
->
at
(
i
-
1
));
}
}
}
}
#endif
#elif defined(TARGET_IOS)
#ifdef TARGET_IOS
for
(
int
i
=
0
;
i
<
cpu_num
;
++
i
)
{
int
cpu_count
=
arm_get_cpucount
();
for
(
int
i
=
0
;
i
<
cpu_count
;
++
i
)
{
archs
->
push_back
(
APPLE
);
archs
->
push_back
(
APPLE
);
}
}
#else
for
(
int
i
=
0
;
i
<
cpu_num
;
++
i
)
{
archs
->
push_back
(
kARMArch_UNKOWN
);
}
#endif
#endif
}
}
#ifdef LITE_WITH_LINUX
#ifdef LITE_WITH_LINUX
void
set_default_cache
(
DeviceInfo
*
dev
)
{
std
::
string
get_cpu_name
()
{
int
cpu_count
=
arm_get_cpucount
();
dev
->
L1_cache_
.
resize
(
cpu_count
);
dev
->
L2_cache_
.
resize
(
cpu_count
);
dev
->
L3_cache_
.
resize
(
cpu_count
);
#ifdef TARGET_IOS
for
(
int
i
=
0
;
i
<
cpu_count
;
++
i
)
{
dev
->
L1_cache_
[
i
]
=
64
*
1024
;
dev
->
L2_cache_
[
i
]
=
2048
*
1024
;
dev
->
L3_cache_
[
i
]
=
0
;
}
#else
for
(
int
i
=
0
;
i
<
cpu_count
;
++
i
)
{
dev
->
L1_cache_
[
i
]
=
32
*
1024
;
dev
->
L2_cache_
[
i
]
=
512
*
1024
;
dev
->
L3_cache_
[
i
]
=
0
;
}
#endif
}
std
::
string
arm_get_cpu_name
()
{
FILE
*
fp
=
fopen
(
"/proc/cpuinfo"
,
"rb"
);
FILE
*
fp
=
fopen
(
"/proc/cpuinfo"
,
"rb"
);
if
(
!
fp
)
{
if
(
!
fp
)
{
return
""
;
return
""
;
...
@@ -477,122 +217,163 @@ std::string arm_get_cpu_name() {
...
@@ -477,122 +217,163 @@ std::string arm_get_cpu_name() {
return
""
;
return
""
;
}
}
int
get_max_freq_khz
(
int
cpuid
)
{
void
get_cpu_max_min_freq
(
int
cpu_id
,
int
*
max_freq
,
int
*
min_freq
)
{
*
max_freq
=
0
;
*
min_freq
=
0
;
// first try, for all possible cpu
// first try, for all possible cpu
char
path
[
256
];
char
path
[
256
];
snprintf
(
path
,
sizeof
(
path
),
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpufreq/stats/cpu%d/time_in_state"
,
cpuid
);
"/sys/devices/system/cpu/cpufreq/stats/cpu%d/time_in_state"
,
cpu_id
);
FILE
*
fp
=
fopen
(
path
,
"rb"
);
FILE
*
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
if
(
!
fp
)
{
// second try, for online cpu
// second try, for online cpu
snprintf
(
path
,
sizeof
(
path
),
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/cpufreq/stats/time_in_state"
,
"/sys/devices/system/cpu/cpu%d/cpufreq/stats/time_in_state"
,
cpuid
);
cpu
_
id
);
fp
=
fopen
(
path
,
"rb"
);
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
if
(
!
fp
)
{
// third try, for online cpu
// third try, for online cpu
// get max_freq
snprintf
(
path
,
sizeof
(
path
),
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq"
,
cpuid
);
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq"
,
cpu_id
);
fp
=
fopen
(
path
,
"rb"
);
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
if
(
!
fp
)
{
return
-
1
;
return
;
}
}
fscanf
(
fp
,
"%d"
,
max_freq
);
int
max_freq_khz
=
-
1
;
fscanf
(
fp
,
"%d"
,
&
max_freq_khz
);
fclose
(
fp
);
fclose
(
fp
);
// get min_freq
return
max_freq_khz
;
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_min_freq"
,
cpu_id
);
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
return
;
}
fscanf
(
fp
,
"%d"
,
min_freq
);
fclose
(
fp
);
return
;
}
}
}
}
*
min_freq
=
std
::
numeric_limits
<
int
>::
max
();
int
max_freq_khz
=
0
;
while
(
!
feof
(
fp
))
{
while
(
!
feof
(
fp
))
{
int
freq
_khz
=
0
;
int
freq
=
0
;
int
nscan
=
fscanf
(
fp
,
"%d %*d"
,
&
freq
_khz
);
int
nscan
=
fscanf
(
fp
,
"%d %*d"
,
&
freq
);
if
(
nscan
!=
1
)
{
if
(
nscan
!=
1
)
{
break
;
break
;
}
}
if
(
freq
>
*
max_freq
)
{
if
(
freq_khz
>
max_freq_khz
)
{
*
max_freq
=
freq
;
max_freq_khz
=
freq_khz
;
}
if
(
freq
<
*
min_freq
)
{
*
min_freq
=
freq
;
}
}
}
}
fclose
(
fp
);
fclose
(
fp
);
return
max_freq_khz
;
}
}
int
arm_sort_cpuid_by_max_frequency
(
int
cpu_count
,
std
::
vector
<
int
>*
cpuids
,
void
sort_cpuid_by_max_freq
(
const
std
::
vector
<
int
>&
max_freqs
,
const
std
::
vector
<
int
>&
cpu_freq
,
std
::
vector
<
int
>*
cpu_ids
,
std
::
vector
<
int
>*
cluster_ids
)
{
std
::
vector
<
int
>*
cluster_ids
)
{
if
(
cpu_count
==
0
)
{
int
cpu_num
=
max_freqs
.
size
();
return
0
;
if
(
cpu_num
==
0
)
{
return
;
}
}
cpu_ids
->
resize
(
cpu_num
);
cpuids
->
resize
(
cpu_count
);
cluster_ids
->
resize
(
cpu_num
);
cluster_ids
->
resize
(
cpu_count
);
for
(
int
i
=
0
;
i
<
cpu_num
;
i
++
)
{
cpu_ids
->
at
(
i
)
=
i
;
for
(
int
i
=
0
;
i
<
cpu_count
;
i
++
)
{
cpuids
->
at
(
i
)
=
i
;
}
}
// sort cpuid as big core first
// sort cpuid as big core first
// simple bubble sort
// simple bubble sort
for
(
int
i
=
0
;
i
<
cpu_num
;
i
++
)
{
for
(
int
i
=
0
;
i
<
cpu_count
;
i
++
)
{
for
(
int
j
=
i
+
1
;
j
<
cpu_num
;
j
++
)
{
for
(
int
j
=
i
+
1
;
j
<
cpu_count
;
j
++
)
{
if
(
max_freqs
[
i
]
<
max_freqs
[
j
])
{
if
(
cpu_freq
[
i
]
<
cpu_freq
[
j
])
{
// swap
// swap
int
tmp
=
cpuids
->
at
(
i
);
int
tmp
=
cpu
_
ids
->
at
(
i
);
cpu
ids
->
at
(
i
)
=
cpu
ids
->
at
(
j
);
cpu
_ids
->
at
(
i
)
=
cpu_
ids
->
at
(
j
);
cpuids
->
at
(
j
)
=
tmp
;
cpu
_
ids
->
at
(
j
)
=
tmp
;
}
}
}
}
}
}
// SMP
// SMP
int
mid_max_freq
_khz
=
int
mid_max_freq
=
(
cpu_freq
[
cpuids
->
at
(
0
)]
+
cpu_freq
[
cpuids
->
at
(
cpu_count
-
1
)])
/
2
;
(
max_freqs
[
cpu_ids
->
at
(
0
)]
+
max_freqs
[
cpu_ids
->
at
(
cpu_num
-
1
)])
/
2
;
for
(
int
i
=
0
;
i
<
cpu_
count
;
i
++
)
{
for
(
int
i
=
0
;
i
<
cpu_
num
;
i
++
)
{
cpuids
->
at
(
i
)
=
i
;
cpu
_
ids
->
at
(
i
)
=
i
;
if
(
cpu_freq
[
i
]
>=
mid_max_freq_khz
)
{
if
(
max_freqs
[
i
]
>=
mid_max_freq
)
{
cluster_ids
->
at
(
i
)
=
0
;
cluster_ids
->
at
(
i
)
=
0
;
}
else
{
}
else
{
cluster_ids
->
at
(
i
)
=
1
;
cluster_ids
->
at
(
i
)
=
1
;
}
}
}
}
return
0
;
}
}
int
check_online
(
const
std
::
vector
<
int
>&
core_ids
)
{
void
get_cpu_cache_size
(
int
cpu_id
,
int
*
l1_cache_size
,
int
*
l2_cache_size
,
if
(
core_ids
.
size
()
==
0
)
{
int
*
l3_cache_size
)
{
return
0
;
int
max_cache_idx_num
=
10
;
*
l1_cache_size
=
DEFAULT_L1_CACHE_SIZE
;
*
l2_cache_size
=
DEFAULT_L2_CACHE_SIZE
;
*
l3_cache_size
=
DEFAULT_L3_CACHE_SIZE
;
for
(
int
i
=
0
;
i
<
max_cache_idx_num
;
i
++
)
{
char
path
[
256
];
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/cache/index%d/level"
,
cpu_id
,
i
);
FILE
*
fp
=
fopen
(
path
,
"rb"
);
if
(
fp
)
{
int
level
=
-
1
;
fscanf
(
fp
,
"%d"
,
&
level
);
fclose
(
fp
);
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/cache/index%d/size"
,
cpu_id
,
i
);
fp
=
fopen
(
path
,
"rb"
);
if
(
fp
)
{
int
size
=
-
1
;
fscanf
(
fp
,
"%d"
,
&
size
);
fclose
(
fp
);
if
(
size
>=
0
)
{
if
(
level
==
1
)
{
*
l1_cache_size
=
size
*
1024
;
}
else
if
(
level
==
2
)
{
*
l2_cache_size
=
size
*
1024
;
}
else
if
(
level
==
3
)
{
*
l3_cache_size
=
size
*
1024
;
}
}
}
}
}
}
bool
check_cpu_online
(
const
std
::
vector
<
int
>&
cpu_ids
)
{
if
(
cpu_ids
.
size
()
==
0
)
{
return
false
;
}
}
char
path
[
256
];
char
path
[
256
];
int
online
=
1
;
bool
all_online
=
true
;
for
(
int
i
=
0
;
i
<
c
ore
_ids
.
size
();
++
i
)
{
for
(
int
i
=
0
;
i
<
c
pu
_ids
.
size
();
++
i
)
{
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/online"
,
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/online"
,
c
ore
_ids
[
i
]);
c
pu
_ids
[
i
]);
FILE
*
fp
=
fopen
(
path
,
"rb"
);
FILE
*
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
int
is_online
=
0
;
return
0
;
if
(
fp
)
{
fscanf
(
fp
,
"%d"
,
&
is_online
);
fclose
(
fp
);
}
else
{
LOG
(
ERROR
)
<<
"Failed to query the online statue of CPU id:"
<<
cpu_ids
[
i
];
}
if
(
is_online
==
0
)
{
all_online
=
false
;
LOG
(
ERROR
)
<<
"CPU id:"
<<
cpu_ids
[
i
]
<<
" is offine"
;
}
}
int
cur_online
=
0
;
fscanf
(
fp
,
"%d"
,
&
cur_online
);
online
&=
cur_online
;
fclose
(
fp
);
}
}
return
online
;
return
all_
online
;
}
}
int
set_sched_affinity
(
const
std
::
vector
<
int
>&
cpuids
)
{
int
set_sched_affinity
(
const
std
::
vector
<
int
>&
cpu
_
ids
)
{
// #define CPU_SETSIZE 1024
// #define CPU_SETSIZE 1024
// #define __NCPUBITS (8 * sizeof (unsigned long))
// #define __NCPUBITS (8 * sizeof (unsigned long))
// typedef struct
// typedef struct
...
@@ -608,20 +389,569 @@ int set_sched_affinity(const std::vector<int>& cpuids) {
...
@@ -608,20 +389,569 @@ int set_sched_affinity(const std::vector<int>& cpuids) {
#endif
#endif
cpu_set_t
mask
;
cpu_set_t
mask
;
CPU_ZERO
(
&
mask
);
CPU_ZERO
(
&
mask
);
for
(
int
i
=
0
;
i
<
cpu
ids
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
cpu
_ids
.
size
();
++
i
)
{
CPU_SET
(
cpuids
[
i
],
&
mask
);
CPU_SET
(
cpu
_
ids
[
i
],
&
mask
);
}
}
int
syscallret
=
syscall
(
__NR_sched_setaffinity
,
pid
,
sizeof
(
mask
),
&
mask
);
int
syscallret
=
syscall
(
__NR_sched_setaffinity
,
pid
,
sizeof
(
mask
),
&
mask
);
if
(
syscallret
)
{
if
(
syscallret
)
{
LOG
(
ERROR
)
<<
"syscall error "
<<
syscallret
;
return
-
1
;
return
-
1
;
}
}
return
0
;
}
bool
bind_threads
(
const
std
::
vector
<
int
>
cpu_ids
)
{
#ifdef ARM_WITH_OMP
int
thread_num
=
cpu_ids
.
size
();
omp_set_num_threads
(
thread_num
);
std
::
vector
<
int
>
ssarets
;
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
ssarets
.
push_back
(
0
);
}
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
thread_num
;
i
++
)
{
ssarets
[
i
]
=
set_sched_affinity
(
cpu_ids
);
}
for
(
int
i
=
0
;
i
<
thread_num
;
i
++
)
{
if
(
ssarets
[
i
]
!=
0
)
{
LOG
(
ERROR
)
<<
"Set cpu affinity failed, core id: "
<<
cpu_ids
[
i
];
return
false
;
}
}
#else // ARM_WITH_OMP
std
::
vector
<
int
>
first_cpu_id
;
first_cpu_id
.
push_back
(
cpu_ids
[
0
]);
int
ssaret
=
set_sched_affinity
(
first_cpu_id
);
if
(
ssaret
!=
0
)
{
LOG
(
ERROR
)
<<
"Set cpu affinity failed, core id: "
<<
cpu_ids
[
0
];
return
false
;
}
#endif // ARM_WITH_OMP
}
#endif // LITE_WITH_LINUX
// cache_id : 0 -> L1, 1 -> L2, 2 -> L3
void
DeviceInfo
::
SetCacheInfo
(
int
cache_id
,
int
argc
,
...)
{
va_list
arg_ptr
;
va_start
(
arg_ptr
,
argc
);
std
::
vector
<
int
>*
cache
;
switch
(
cache_id
)
{
case
0
:
cache
=
&
L1_cache_
;
break
;
case
1
:
cache
=
&
L2_cache_
;
break
;
case
2
:
cache
=
&
L3_cache_
;
break
;
default:
break
;
}
cache
->
resize
(
core_num_
);
if
(
argc
==
1
)
{
int
cache_size
=
va_arg
(
arg_ptr
,
int
);
for
(
int
i
=
0
;
i
<
core_num_
;
++
i
)
{
(
*
cache
)[
i
]
=
cache_size
;
}
}
else
{
int
big_core_num
=
big_core_ids_
.
size
();
int
little_core_num
=
little_core_ids_
.
size
();
int
big_core_cache_size
=
va_arg
(
arg_ptr
,
int
);
int
little_core_cache_size
=
va_arg
(
arg_ptr
,
int
);
for
(
int
i
=
0
;
i
<
big_core_num
;
++
i
)
{
(
*
cache
)[
big_core_ids_
[
i
]]
=
big_core_cache_size
;
}
for
(
int
i
=
0
;
i
<
little_core_num
;
++
i
)
{
(
*
cache
)[
little_core_ids_
[
i
]]
=
little_core_cache_size
;
}
}
va_end
(
arg_ptr
);
}
void
DeviceInfo
::
SetArchInfo
(
int
argc
,
...)
{
va_list
arg_ptr
;
va_start
(
arg_ptr
,
argc
);
archs_
.
resize
(
core_num_
);
if
(
argc
==
1
)
{
ARMArch
arch
=
(
ARMArch
)
va_arg
(
arg_ptr
,
int
);
for
(
int
i
=
0
;
i
<
core_num_
;
++
i
)
{
archs_
[
i
]
=
arch
;
}
}
else
{
ARMArch
big_core_arch
=
(
ARMArch
)
va_arg
(
arg_ptr
,
int
);
ARMArch
little_core_arch
=
(
ARMArch
)
va_arg
(
arg_ptr
,
int
);
int
big_core_num
=
big_core_ids_
.
size
();
int
little_core_num
=
little_core_ids_
.
size
();
for
(
int
i
=
0
;
i
<
big_core_num
;
++
i
)
{
archs_
[
big_core_ids_
[
i
]]
=
big_core_arch
;
}
for
(
int
i
=
0
;
i
<
little_core_num
;
++
i
)
{
archs_
[
little_core_ids_
[
i
]]
=
little_core_arch
;
}
}
va_end
(
arg_ptr
);
}
bool
DeviceInfo
::
SetCPUInfoByName
()
{
/* Snapdragon */
if
(
dev_name_
.
find
(
"SM8150"
)
!=
std
::
string
::
npos
)
{
// 855
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
SetArchInfo
(
2
,
kA76
,
kA55
);
SetCacheInfo
(
0
,
2
,
64
*
1024
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
256
*
1024
,
128
*
1024
);
SetCacheInfo
(
2
,
1
,
2048
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"SDM845"
)
!=
std
::
string
::
npos
)
{
// 845
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
SetArchInfo
(
2
,
kA75
,
kA55
);
SetCacheInfo
(
0
,
2
,
64
*
1024
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
256
*
1024
,
128
*
1024
);
SetCacheInfo
(
2
,
1
,
2048
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"SDM710"
)
!=
std
::
string
::
npos
)
{
// 710
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
};
SetArchInfo
(
2
,
kA75
,
kA55
);
SetCacheInfo
(
0
,
2
,
64
*
1024
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
256
*
1024
,
128
*
1024
);
SetCacheInfo
(
2
,
1
,
1024
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MSM8998"
)
!=
std
::
string
::
npos
)
{
// 835
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
SetArchInfo
(
2
,
kA73
,
kA53
);
SetCacheInfo
(
0
,
2
,
64
*
1024
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
1024
*
1024
,
/*real cache size is 2M, while that will get bad performace
on conv3x3s1 or gemm, set to 1M or 512K*/
1024
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MSM8996"
)
!=
std
::
string
::
npos
)
{
// 820
core_num_
=
4
;
core_ids_
=
{
0
,
1
,
2
,
3
};
big_core_ids_
=
{
2
,
3
};
little_core_ids_
=
{
0
,
1
};
cluster_ids_
=
{
1
,
1
,
0
,
0
};
SetArchInfo
(
1
,
kA72
);
SetCacheInfo
(
0
,
1
,
24
*
1024
);
SetCacheInfo
(
1
,
2
,
1024
*
1024
,
512
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"SDM660"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"SDM636"
)
!=
std
::
string
::
npos
)
{
// 660, 636
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
SetArchInfo
(
1
,
kA73
);
SetCacheInfo
(
0
,
2
,
64
*
1024
,
32
*
1024
);
SetCacheInfo
(
1
,
1
,
1024
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MSM8976"
)
!=
std
::
string
::
npos
)
{
// 652,653
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
SetArchInfo
(
2
,
kA72
,
kA53
);
SetCacheInfo
(
0
,
1
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
1024
*
1024
,
512
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MSM8953"
)
!=
std
::
string
::
npos
)
{
// 625
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
little_core_ids_
=
{};
cluster_ids_
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
SetArchInfo
(
1
,
kA53
);
SetCacheInfo
(
0
,
1
,
32
*
1024
);
SetCacheInfo
(
1
,
1
,
1024
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MSM8939"
)
!=
std
::
string
::
npos
)
{
// 615
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
0
,
1
,
2
,
3
};
little_core_ids_
=
{
4
,
5
,
6
,
7
};
cluster_ids_
=
{
0
,
0
,
0
,
0
,
1
,
1
,
1
,
1
};
SetArchInfo
(
1
,
kA53
);
SetCacheInfo
(
0
,
1
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
512
*
1024
,
256
*
1024
);
return
true
;
/* MediaTek */
}
else
if
(
dev_name_
.
find
(
"MT6797"
)
!=
std
::
string
::
npos
)
{
// X20/X23/X25/X27
core_num_
=
10
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
};
big_core_ids_
=
{
8
,
9
};
little_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
};
SetArchInfo
(
2
,
kA72
,
kA53
);
SetCacheInfo
(
0
,
1
,
32
*
1024
);
SetCacheInfo
(
1
,
2
,
1024
*
1024
,
512
*
1024
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MT6799"
)
!=
std
::
string
::
npos
)
{
// X30
core_num_
=
10
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
};
big_core_ids_
=
{
8
,
9
};
little_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
};
SetArchInfo
(
2
,
kA73
,
kA53
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MT6795"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6762"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6755T"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6755S"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6753"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6752"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6750"
)
!=
std
::
string
::
npos
)
{
// X10, P22, P15/P18, MT6753, MT6752/MT6752M, MT6750
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
little_core_ids_
=
{};
cluster_ids_
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
SetArchInfo
(
1
,
kA53
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MT6758"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6757"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6763"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6755M"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6755"
)
!=
std
::
string
::
npos
)
{
// P30, P20/P25, P23, P10
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
SetArchInfo
(
1
,
kA53
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MT6771"
)
!=
std
::
string
::
npos
)
{
// P60
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
little_core_ids_
=
{
0
,
1
,
2
,
3
};
cluster_ids_
=
{
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
};
SetArchInfo
(
2
,
kA73
,
kA53
);
return
true
;
}
else
if
(
dev_name_
.
find
(
"MT6765"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6739"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6738"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"MT6737"
)
!=
std
::
string
::
npos
)
{
// A22, MT6739, MT6738, MT6767
core_num_
=
4
;
core_ids_
=
{
0
,
1
,
2
,
3
};
big_core_ids_
=
{
0
,
1
,
2
,
3
};
little_core_ids_
=
{};
cluster_ids_
=
{
0
,
0
,
0
,
0
};
SetArchInfo
(
1
,
kA53
);
return
true
;
}
return
false
;
}
void
DeviceInfo
::
SetCPUInfoByProb
()
{
#ifdef LITE_WITH_LINUX
// get big.LITTLE cores by sorting CPU frequency
sort_cpuid_by_max_freq
(
max_freqs_
,
&
core_ids_
,
&
cluster_ids_
);
big_core_ids_
.
clear
();
little_core_ids_
.
clear
();
for
(
int
i
=
0
;
i
<
cluster_ids_
.
size
();
++
i
)
{
if
(
cluster_ids_
[
i
]
==
0
)
{
big_core_ids_
.
push_back
(
core_ids_
[
i
]);
}
else
{
little_core_ids_
.
push_back
(
core_ids_
[
i
]);
}
}
// get l1, l2, l3 cache size for each core
for
(
int
i
=
0
;
i
<
core_num_
;
i
++
)
{
get_cpu_cache_size
(
i
,
&
(
L1_cache_
[
i
]),
&
(
L2_cache_
[
i
]),
&
(
L3_cache_
[
i
]));
}
#endif // LITE_WITH_LINUX
}
void
DeviceInfo
::
RequestPowerFullMode
(
const
int
thread_num
)
{
int
big_core_size
=
big_core_ids_
.
size
();
int
little_core_size
=
little_core_ids_
.
size
();
active_ids_
.
clear
();
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
if
(
i
<
big_core_size
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
else
if
(
i
<
big_core_size
+
little_core_size
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
-
big_core_size
]);
}
}
mode_
=
LITE_POWER_FULL
;
}
void
DeviceInfo
::
RequestPowerHighMode
(
const
int
thread_num
)
{
int
big_core_size
=
big_core_ids_
.
size
();
int
little_core_size
=
little_core_ids_
.
size
();
active_ids_
.
clear
();
if
(
big_core_size
>
0
)
{
mode_
=
LITE_POWER_HIGH
;
if
(
thread_num
>
big_core_size
)
{
LOG
(
ERROR
)
<<
"Request thread num: "
<<
thread_num
<<
", exceed the big cores size: "
<<
big_core_size
<<
", truncate thread num to "
<<
big_core_size
;
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
}
else
{
mode_
=
LITE_POWER_LOW
;
LOG
(
ERROR
)
<<
"HIGH POWER MODE is not support, switch to little cores."
;
if
(
thread_num
>
little_core_size
)
{
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
}
}
void
DeviceInfo
::
RequestPowerLowMode
(
const
int
thread_num
)
{
int
big_core_size
=
big_core_ids_
.
size
();
int
little_core_size
=
little_core_ids_
.
size
();
active_ids_
.
clear
();
if
(
little_core_size
>
0
)
{
mode_
=
LITE_POWER_LOW
;
if
(
thread_num
>
little_core_size
)
{
LOG
(
WARNING
)
<<
"Request thread num: "
<<
thread_num
<<
", exceed the little cores size: "
<<
little_core_size
<<
", truncate thread num to "
<<
little_core_size
;
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
i
++
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
}
else
{
mode_
=
LITE_POWER_HIGH
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores"
;
if
(
thread_num
>
big_core_size
)
{
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
i
++
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
}
}
void
DeviceInfo
::
RequestPowerNoBindMode
(
const
int
thread_num
)
{
active_ids_
.
clear
();
for
(
int
i
=
0
;
i
<
thread_num
;
i
++
)
{
active_ids_
.
push_back
(
0
);
}
mode_
=
LITE_POWER_NO_BIND
;
}
void
DeviceInfo
::
RequestPowerRandHighMode
(
const
int
shift_num
,
const
int
thread_num
)
{
int
big_core_size
=
big_core_ids_
.
size
();
int
little_core_size
=
little_core_ids_
.
size
();
if
(
big_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_HIGH
;
if
(
thread_num
>
big_core_size
)
{
LOG
(
WARNING
)
<<
"Request thread num: "
<<
thread_num
<<
", exceed the big cores size: "
<<
big_core_size
<<
", truncate thread num to "
<<
big_core_size
;
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[(
i
+
shift_num
)
%
big_core_size
]);
}
}
}
else
{
mode_
=
LITE_POWER_LOW
;
LOG
(
WARNING
)
<<
"HIGH POWER MODE is not support, switch to little cores."
;
if
(
thread_num
>
little_core_size
)
{
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
}
}
void
DeviceInfo
::
RequestPowerRandLowMode
(
const
int
shift_num
,
const
int
thread_num
)
{
int
big_core_size
=
big_core_ids_
.
size
();
int
little_core_size
=
little_core_ids_
.
size
();
active_ids_
.
clear
();
if
(
little_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_LOW
;
if
(
thread_num
>
little_core_size
)
{
LOG
(
WARNING
)
<<
"Request thread num: "
<<
thread_num
<<
", exceed the little cores size: "
<<
little_core_size
<<
", truncate thread num to "
<<
little_core_size
;
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[(
i
+
shift_num
)
%
little_core_size
]);
}
}
}
else
{
mode_
=
LITE_POWER_HIGH
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores."
;
if
(
thread_num
>
big_core_size
)
{
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
}
}
int
DeviceInfo
::
Setup
()
{
core_num_
=
get_cpu_num
();
mem_size_
=
get_mem_size
();
get_cpu_arch
(
&
archs_
,
core_num_
);
// set defalut CPU info
SetCacheInfo
(
0
,
DEFAULT_L1_CACHE_SIZE
);
SetCacheInfo
(
1
,
DEFAULT_L2_CACHE_SIZE
);
SetCacheInfo
(
2
,
DEFAULT_L3_CACHE_SIZE
);
#ifdef LITE_WITH_LINUX
// get max&min freq
max_freqs_
.
resize
(
core_num_
);
min_freqs_
.
resize
(
core_num_
);
for
(
int
i
=
0
;
i
<
core_num_
;
++
i
)
{
int
max_freq
,
min_freq
;
get_cpu_max_min_freq
(
i
,
&
max_freq
,
&
min_freq
);
max_freqs_
[
i
]
=
max_freq
/
1000
;
min_freqs_
[
i
]
=
min_freq
/
1000
;
}
// get cache size and big.LITTLE core ids
dev_name_
=
get_cpu_name
();
if
(
!
SetCPUInfoByName
())
{
SetCPUInfoByProb
();
}
// output info
LOG
(
INFO
)
<<
"ARM multiprocessors name: "
<<
dev_name_
;
LOG
(
INFO
)
<<
"ARM multiprocessors number: "
<<
core_num_
;
for
(
int
i
=
0
;
i
<
core_num_
;
++
i
)
{
LOG
(
INFO
)
<<
"ARM multiprocessors ID: "
<<
core_ids_
[
i
]
<<
", max freq: "
<<
max_freqs_
[
i
]
<<
", min freq: "
<<
min_freqs_
[
i
]
<<
", cluster ID: "
<<
cluster_ids_
[
core_ids_
[
i
]]
<<
", CPU ARCH: A"
<<
archs_
[
i
];
}
LOG
(
INFO
)
<<
"L1 DataCache size is: "
;
for
(
int
i
=
0
;
i
<
core_num_
;
++
i
)
{
LOG
(
INFO
)
<<
L1_cache_
[
i
]
/
1024
<<
" KB"
;
}
LOG
(
INFO
)
<<
"L2 Cache size is: "
;
for
(
int
i
=
0
;
i
<
core_num_
;
++
i
)
{
LOG
(
INFO
)
<<
L2_cache_
[
i
]
/
1024
<<
" KB"
;
}
LOG
(
INFO
)
<<
"Total memory: "
<<
mem_size_
<<
"KB"
;
#endif
// set default run mode
SetRunMode
(
LITE_POWER_NO_BIND
,
1
);
// use single thread by default
return
0
;
return
0
;
}
}
void
DeviceInfo
::
SetRunMode
(
PowerMode
mode
,
int
thread_num
)
{
#ifdef ARM_WITH_OMP
thread_num
=
std
::
min
(
thread_num
,
core_num_
);
#else
thread_num
=
1
;
// force thread_num to 1 if OpenMP is disabled
#endif
#ifdef LITE_WITH_LINUX
int
big_core_size
=
big_core_ids_
.
size
();
int
little_core_size
=
little_core_ids_
.
size
();
int
big_little_core_size
=
big_core_size
+
little_core_size
;
thread_num
=
std
::
min
(
thread_num
,
big_little_core_size
);
count_
++
;
int
shift_num
=
(
count_
/
10
)
%
big_core_size
;
switch
(
mode
)
{
case
LITE_POWER_FULL
:
RequestPowerFullMode
(
thread_num
);
break
;
case
LITE_POWER_HIGH
:
RequestPowerHighMode
(
thread_num
);
break
;
case
LITE_POWER_LOW
:
RequestPowerLowMode
(
thread_num
);
break
;
case
LITE_POWER_NO_BIND
:
RequestPowerNoBindMode
(
thread_num
);
break
;
case
LITE_POWER_RAND_HIGH
:
RequestPowerRandHighMode
(
shift_num
,
thread_num
);
break
;
case
LITE_POWER_RAND_LOW
:
RequestPowerRandLowMode
(
shift_num
,
thread_num
);
break
;
default:
LOG
(
FATAL
)
<<
"Unsupported power mode: "
<<
mode
;
break
;
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
#ifdef ARM_WITH_OMP
omp_set_num_threads
(
active_ids_
.
size
());
#endif
if
(
mode_
!=
LITE_POWER_NO_BIND
)
{
if
(
check_cpu_online
(
active_ids_
))
{
bind_threads
(
active_ids_
);
}
else
{
LOG
(
WARNING
)
<<
"Some cores are offline, switch to NO BIND MODE"
;
mode_
=
LITE_POWER_NO_BIND
;
}
}
#else // LITE_WITH_LINUX
// only LITE_POWER_NO_BIND is supported in other OS
RequestPowerNoBindMode
(
thread_num
);
#ifdef ARM_WITH_OMP
omp_set_num_threads
(
active_ids_
.
size
());
#endif
#endif // LITE_WITH_LINUX
#endif // LITE_WITH_LINUX
//! alloc memory for sgemm in this context
workspace_
.
Resize
(
{
static_cast
<
int64_t
>
(
L2_cache_
[
active_ids_
[
0
]]
/
sizeof
(
float
))});
arch_
=
archs_
[
active_ids_
[
0
]];
}
void
DeviceInfo
::
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
)
{
SetCacheInfo
(
0
,
l1size
);
SetCacheInfo
(
1
,
l2size
);
SetCacheInfo
(
2
,
l3size
);
workspace_
.
Resize
({
2
*
(
l1size
+
l2size
)});
}
bool
DeviceInfo
::
ExtendWorkspace
(
DDimLite
dims
)
{
auto
count
=
dims
.
product
();
auto
old
=
workspace_
.
dims
();
if
(
count
==
old
.
product
())
{
return
false
;
}
workspace_
.
Resize
({
static_cast
<
int64_t
>
(
count
+
L2_cache_
[
active_ids_
[
0
]]
/
sizeof
(
float
))});
return
true
;
}
#endif // LITE_WITH_ARM
#endif // LITE_WITH_ARM
...
...
paddle/fluid/lite/core/cpu_info.h
浏览文件 @
58bf3c48
...
@@ -14,24 +14,12 @@
...
@@ -14,24 +14,12 @@
#pragma once
#pragma once
#include <cstdarg>
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/lite/core/lite_tensor.h"
#include "paddle/fluid/lite/utils/cp_logging.h"
#include "paddle/fluid/lite/utils/cp_logging.h"
#ifdef LITE_WITH_LINUX
#include <sys/syscall.h>
#include <unistd.h>
#endif
#if __APPLE__
#include "TargetConditionals.h"
#if TARGET_OS_IPHONE
#include <mach/machine.h>
#include <sys/sysctl.h>
#include <sys/types.h>
#endif // TARGET_OS_IPHONE
#endif // __APPLE__
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
...
@@ -60,64 +48,73 @@ typedef enum {
...
@@ -60,64 +48,73 @@ typedef enum {
class
DeviceInfo
{
class
DeviceInfo
{
public:
public:
int
idx_
;
int
max_freq_
;
int
min_freq_
;
int
generate_arch_
;
int
compute_core_num_
;
int
max_memory_
;
int
sharemem_size_
;
std
::
string
device_name_
;
std
::
string
compute_ability_
;
std
::
vector
<
int
>
L1_cache_
;
std
::
vector
<
int
>
L2_cache_
;
std
::
vector
<
int
>
L3_cache_
;
std
::
vector
<
int
>
core_ids_
;
std
::
vector
<
int
>
big_core_ids_
;
std
::
vector
<
int
>
little_core_ids_
;
std
::
vector
<
int
>
cluster_ids_
;
std
::
vector
<
ARMArch
>
archs_
;
static
DeviceInfo
&
Global
()
{
static
DeviceInfo
&
Global
()
{
static
auto
*
x
=
new
DeviceInfo
;
static
auto
*
x
=
new
DeviceInfo
;
return
*
x
;
return
*
x
;
}
}
static
void
Init
()
{
static
int
Init
()
{
auto
&
info
=
Global
();
static
int
ret
=
Global
().
Setup
();
InitInternal
(
&
info
)
;
return
ret
;
}
}
private:
int
Setup
();
DeviceInfo
()
=
default
;
static
void
InitInternal
(
DeviceInfo
*
dev
);
};
size_t
arm_get_meminfo
();
void
SetRunMode
(
PowerMode
mode
,
int
thread_num
);
void
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
);
void
SetArch
(
ARMArch
arch
)
{
arch_
=
arch
;
}
int
arm_get_cpucount
();
PowerMode
mode
()
const
{
return
mode_
;
}
int
threads
()
const
{
return
active_ids_
.
size
();
}
ARMArch
arch
()
const
{
return
arch_
;
}
int
l1_cache_size
()
const
{
return
L1_cache_
[
active_ids_
[
0
]];
}
int
l2_cache_size
()
const
{
return
L2_cache_
[
active_ids_
[
0
]];
}
int
l3_cache_size
()
const
{
return
L3_cache_
[
active_ids_
[
0
]];
}
void
arm_get_cpu_arch
(
std
::
vector
<
ARMArch
>*
archs
);
template
<
typename
T
>
T
*
workspace_data
()
{
bool
get_cpu_info_from_name
(
DeviceInfo
*
cpu_info
,
std
::
string
hardware_name
);
return
workspace_
.
mutable_data
<
T
>
();
}
#ifdef LITE_WITH_LINUX
bool
ExtendWorkspace
(
DDimLite
dims
);
void
set_default_cache
(
DeviceInfo
*
dev
);
std
::
string
arm_get_cpu_name
();
private:
int
core_num_
;
std
::
vector
<
int
>
max_freqs_
;
std
::
vector
<
int
>
min_freqs_
;
int
mem_size_
;
std
::
string
dev_name_
;
int
get_max_freq_khz
(
int
cpuid
);
std
::
vector
<
int
>
L1_cache_
;
std
::
vector
<
int
>
L2_cache_
;
std
::
vector
<
int
>
L3_cache_
;
std
::
vector
<
int
>
core_ids_
;
std
::
vector
<
int
>
big_core_ids_
;
std
::
vector
<
int
>
little_core_ids_
;
std
::
vector
<
int
>
cluster_ids_
;
std
::
vector
<
ARMArch
>
archs_
;
int
arm_sort_cpuid_by_max_frequency
(
int
cpu_count
,
std
::
vector
<
int
>*
cpuids
,
ARMArch
arch_
;
const
std
::
vector
<
int
>&
cpu_freq
,
// LITE_POWER_HIGH stands for using big cores,
std
::
vector
<
int
>*
cluster_ids
);
// LITE_POWER_LOW stands for using small core,
int
check_online
(
const
std
::
vector
<
int
>&
core_ids
);
// LITE_POWER_FULL stands for using all cores
int
set_sched_affinity
(
const
std
::
vector
<
int
>&
cpuids
);
PowerMode
mode_
;
std
::
vector
<
int
>
active_ids_
;
TensorLite
workspace_
;
int64_t
count_
{
0
};
void
SetCacheInfo
(
int
cache_id
,
int
argc
,
...);
void
SetArchInfo
(
int
argc
,
...);
bool
SetCPUInfoByName
();
void
SetCPUInfoByProb
();
void
RequestPowerFullMode
(
const
int
thread_num
);
void
RequestPowerHighMode
(
const
int
thread_num
);
void
RequestPowerLowMode
(
const
int
thread_num
);
void
RequestPowerNoBindMode
(
const
int
thread_num
);
void
RequestPowerRandHighMode
(
const
int
shift_num
,
const
int
thread_num
);
void
RequestPowerRandLowMode
(
const
int
shift_num
,
const
int
thread_num
);
#endif // LITE_WITH_LINUX
DeviceInfo
()
=
default
;
};
#endif // LITE_WITH_ARM
#endif // LITE_WITH_ARM
...
...
paddle/fluid/lite/kernels/arm/conv_compute.cc
浏览文件 @
58bf3c48
...
@@ -28,8 +28,6 @@ void ConvCompute::PrepareForRun() {
...
@@ -28,8 +28,6 @@ void ConvCompute::PrepareForRun() {
auto
o_dims
=
param
.
output
->
dims
();
auto
o_dims
=
param
.
output
->
dims
();
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
// TODO(xxx): make api and expose it
ctx
.
SetRunMode
(
LITE_POWER_HIGH
,
4
);
int
win
=
x_dims
[
3
];
// nchw
int
win
=
x_dims
[
3
];
// nchw
int
hin
=
x_dims
[
2
];
int
hin
=
x_dims
[
2
];
...
...
paddle/fluid/lite/kernels/arm/fc_compute.cc
浏览文件 @
58bf3c48
...
@@ -28,7 +28,6 @@ void FcCompute::PrepareForRun() {
...
@@ -28,7 +28,6 @@ void FcCompute::PrepareForRun() {
auto
w_dims
=
param
.
w
->
dims
();
auto
w_dims
=
param
.
w
->
dims
();
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
ctx
.
SetRunMode
(
LITE_POWER_HIGH
,
4
);
CHECK_GE
(
x_dims
.
size
(),
2UL
);
CHECK_GE
(
x_dims
.
size
(),
2UL
);
CHECK_EQ
(
w_dims
.
size
(),
2UL
);
CHECK_EQ
(
w_dims
.
size
(),
2UL
);
...
...
paddle/fluid/lite/kernels/arm/mul_compute.cc
浏览文件 @
58bf3c48
...
@@ -24,7 +24,6 @@ namespace arm {
...
@@ -24,7 +24,6 @@ namespace arm {
void
MulCompute
::
PrepareForRun
()
{
void
MulCompute
::
PrepareForRun
()
{
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
ctx
.
SetRunMode
(
LITE_POWER_HIGH
,
4
);
}
}
void
MulCompute
::
Run
()
{
void
MulCompute
::
Run
()
{
...
...
paddle/fluid/lite/kernels/arm/pool_compute.cc
浏览文件 @
58bf3c48
...
@@ -26,7 +26,6 @@ namespace arm {
...
@@ -26,7 +26,6 @@ namespace arm {
void
PoolCompute
::
PrepareForRun
()
{
void
PoolCompute
::
PrepareForRun
()
{
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
ctx
.
SetRunMode
(
LITE_POWER_HIGH
,
4
);
}
}
void
PoolCompute
::
Run
()
{
void
PoolCompute
::
Run
()
{
...
...
paddle/fluid/lite/kernels/x86/CMakeLists.txt
浏览文件 @
58bf3c48
...
@@ -17,6 +17,7 @@ cc_library(dropout_compute_x86 SRCS dropout_compute.cc DEPS ${lite_kernel_deps}
...
@@ -17,6 +17,7 @@ cc_library(dropout_compute_x86 SRCS dropout_compute.cc DEPS ${lite_kernel_deps}
cc_library
(
concat_compute_x86 SRCS concat_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
concat_compute_x86 SRCS concat_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
conv_compute_x86 SRCS conv_compute.cc DEPS
${
lite_kernel_deps
}
blas im2col vol2col
)
cc_library
(
conv_compute_x86 SRCS conv_compute.cc DEPS
${
lite_kernel_deps
}
blas im2col vol2col
)
cc_library
(
pool_compute_x86 SRCS pool_compute.cc DEPS
${
lite_kernel_deps
}
pooling
)
cc_library
(
pool_compute_x86 SRCS pool_compute.cc DEPS
${
lite_kernel_deps
}
pooling
)
cc_library
(
batch_norm_compute_x86 SRCS batch_norm_compute.cc DEPS
${
lite_kernel_deps
}
)
lite_cc_test
(
test_fc_compute_x86 SRCS fc_compute_test.cc DEPS fc_compute_x86
)
lite_cc_test
(
test_fc_compute_x86 SRCS fc_compute_test.cc DEPS fc_compute_x86
)
lite_cc_test
(
test_conv2d_compute_x86 SRCS conv_compute_test.cc DEPS conv_compute_x86
)
lite_cc_test
(
test_conv2d_compute_x86 SRCS conv_compute_test.cc DEPS conv_compute_x86
)
...
@@ -28,6 +29,7 @@ lite_cc_test(test_relu_compute_x86 SRCS relu_compute_test.cc DEPS relu_compute_x
...
@@ -28,6 +29,7 @@ lite_cc_test(test_relu_compute_x86 SRCS relu_compute_test.cc DEPS relu_compute_x
lite_cc_test
(
test_mul_compute_x86 SRCS mul_compute_test.cc DEPS mul_compute_x86 operator
)
lite_cc_test
(
test_mul_compute_x86 SRCS mul_compute_test.cc DEPS mul_compute_x86 operator
)
lite_cc_test
(
test_scale_compute_x86 SRCS scale_compute_test.cc DEPS scale_compute_x86
)
lite_cc_test
(
test_scale_compute_x86 SRCS scale_compute_test.cc DEPS scale_compute_x86
)
lite_cc_test
(
test_dropout_compute_x86 SRCS dropout_compute_test.cc DEPS dropout_compute_x86
)
lite_cc_test
(
test_dropout_compute_x86 SRCS dropout_compute_test.cc DEPS dropout_compute_x86
)
lite_cc_test
(
test_batch_norm_compute_x86 SRCS batch_norm_compute_test.cc DEPS batch_norm_compute_x86
)
set
(
x86_kernels
set
(
x86_kernels
...
@@ -44,6 +46,7 @@ set(x86_kernels
...
@@ -44,6 +46,7 @@ set(x86_kernels
concat_compute_x86
concat_compute_x86
conv_compute_x86
conv_compute_x86
pool_compute_x86
pool_compute_x86
batch_norm_compute_x86
)
)
set
(
x86_kernels
"
${
x86_kernels
}
"
CACHE INTERNAL
"x86 kernels"
)
set
(
x86_kernels
"
${
x86_kernels
}
"
CACHE INTERNAL
"x86 kernels"
)
...
...
paddle/fluid/lite/kernels/x86/batch_norm_compute.cc
0 → 100644
浏览文件 @
58bf3c48
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/x86/batch_norm_compute.h"
REGISTER_LITE_KERNEL
(
batch_norm
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
BatchNormCompute
<
float
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindInput
(
"Scale"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindInput
(
"Mean"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindInput
(
"Variance"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"MeanOut"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"VarianceOut"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"MeanOut"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"SavedMean"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"SavedVariance"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
Finalize
();
paddle/fluid/lite/kernels/x86/batch_norm_compute.h
0 → 100644
浏览文件 @
58bf3c48
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <random>
#include <string>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
template
<
typename
T
>
using
EigenArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
>>
;
template
<
typename
T
>
using
ConstEigenArrayMap
=
Eigen
::
Map
<
const
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
>>
;
template
<
typename
T
>
using
EigenVectorArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>>
;
template
<
typename
T
>
using
ConstEigenVectorArrayMap
=
Eigen
::
Map
<
const
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>>
;
template
<
typename
T
>
class
BatchNormCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
BatchNormParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
BatchNormParam
>
();
bool
global_stats
=
param
.
is_test
||
param
.
use_global_stats
;
const
auto
*
x
=
param
.
x
;
const
auto
&
x_dims
=
x
->
dims
();
CHECK
(
x_dims
.
size
()
>=
2
&&
x_dims
.
size
()
<=
5
);
const
int
N
=
x_dims
[
0
];
const
int
C
=
param
.
data_layout
==
DATALAYOUT
(
kNCHW
)
?
x_dims
[
1
]
:
x_dims
[
x_dims
.
size
()
-
1
];
const
int
sample_size
=
x
->
dims
().
production
()
/
N
/
C
;
// alloc memory
param
.
y
->
template
mutable_data
<
T
>();
param
.
mean_out
->
template
mutable_data
<
T
>();
param
.
variance_out
->
template
mutable_data
<
T
>();
param
.
saved_mean
->
template
mutable_data
<
T
>();
param
.
saved_variance
->
template
mutable_data
<
T
>();
if
(
!
global_stats
)
{
// saved_xx is use just in this batch of data
EigenVectorArrayMap
<
T
>
saved_mean_e
(
param
.
saved_mean
->
mutable_data
<
T
>
(),
C
);
EigenVectorArrayMap
<
T
>
saved_variance_e
(
param
.
saved_variance
->
mutable_data
<
T
>
(),
C
);
saved_mean_e
.
setZero
();
saved_variance_e
.
setZero
();
EigenVectorArrayMap
<
T
>
running_mean_arr
(
param
.
mean_out
->
mutable_data
<
T
>
(),
C
);
EigenVectorArrayMap
<
T
>
running_var_arr
(
param
.
variance_out
->
mutable_data
<
T
>
(),
C
);
if
((
N
*
sample_size
)
==
1
)
{
LOG
(
WARNING
)
<<
"Only 1 element in normalization dimension, "
<<
"we skip the batch norm calculation, let y = x."
;
framework
::
TensorCopy
(
x
->
raw_tensor
(),
platform
::
CPUPlace
(),
&
param
.
y
->
raw_tensor
());
return
;
}
switch
(
param
.
data_layout
)
{
case
DATALAYOUT
(
kNCHW
):
{
ConstEigenArrayMap
<
T
>
x_arr
(
x
->
data
<
T
>
(),
sample_size
,
N
*
C
);
for
(
int
nc
=
0
;
nc
<
N
*
C
;
++
nc
)
{
saved_mean_e
(
nc
%
C
)
+=
x_arr
.
col
(
nc
).
sum
();
}
saved_mean_e
/=
N
*
sample_size
;
for
(
int
nc
=
0
;
nc
<
N
*
C
;
++
nc
)
{
saved_variance_e
(
nc
%
C
)
+=
(
x_arr
.
col
(
nc
)
-
saved_mean_e
(
nc
%
C
)).
matrix
().
squaredNorm
();
}
saved_variance_e
/=
N
*
sample_size
;
break
;
}
default:
LOG
(
FATAL
)
<<
"Unknown storage order: "
<<
DataLayoutToStr
(
param
.
data_layout
);
break
;
}
running_mean_arr
=
running_mean_arr
*
param
.
momentum
+
saved_mean_e
*
(
1.
-
param
.
momentum
);
running_var_arr
=
running_var_arr
*
param
.
momentum
+
saved_variance_e
*
(
1.
-
param
.
momentum
);
}
// use SavedMean and SavedVariance to do normalize
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>
inv_std
(
C
);
if
(
global_stats
)
{
ConstEigenVectorArrayMap
<
T
>
var_arr
(
param
.
variance
->
data
<
T
>
(),
C
);
inv_std
=
(
var_arr
+
param
.
epsilon
).
sqrt
().
inverse
();
}
else
{
EigenVectorArrayMap
<
T
>
saved_inv_std
(
param
.
saved_variance
->
mutable_data
<
T
>
(),
C
);
// inverse SavedVariance first, gradient will use it too.
saved_inv_std
=
(
saved_inv_std
+
param
.
epsilon
).
inverse
().
sqrt
();
inv_std
=
saved_inv_std
;
}
ConstEigenVectorArrayMap
<
T
>
mean_arr
(
global_stats
?
param
.
mean
->
data
<
T
>
()
:
param
.
saved_mean
->
data
<
T
>
(),
C
);
// ((x - est_mean) * (inv_var) * scale + bias
// formula transform ====>
// (x * inv_var * scale) + (bias - est_mean * inv_var * scale)
ConstEigenVectorArrayMap
<
T
>
scale_arr
(
param
.
scale
->
data
<
T
>
(),
C
);
ConstEigenVectorArrayMap
<
T
>
bias_arr
(
param
.
bias
->
data
<
T
>
(),
C
);
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>
new_scale
=
inv_std
*
scale_arr
;
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>
new_bias
=
bias_arr
-
mean_arr
*
inv_std
*
scale_arr
;
switch
(
param
.
data_layout
)
{
case
DATALAYOUT
(
kNCHW
):
{
EigenArrayMap
<
T
>
y_arr
(
param
.
y
->
mutable_data
<
T
>
(),
sample_size
,
N
*
C
);
ConstEigenArrayMap
<
T
>
x_arr
(
x
->
data
<
T
>
(),
sample_size
,
N
*
C
);
for
(
int
nc
=
0
;
nc
<
N
*
C
;
++
nc
)
{
y_arr
.
col
(
nc
)
=
x_arr
.
col
(
nc
)
*
new_scale
(
nc
%
C
)
+
new_bias
(
nc
%
C
);
}
break
;
}
default:
LOG
(
FATAL
)
<<
"Unknown storage order: "
<<
DataLayoutToStr
(
param
.
data_layout
);
break
;
}
}
virtual
~
BatchNormCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/kernels/x86/batch_norm_compute_test.cc
0 → 100644
浏览文件 @
58bf3c48
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/x86/batch_norm_compute.h"
#include <gtest/gtest.h>
#include <iostream>
#include <vector>
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
TEST
(
batch_norm_x86
,
retrive_op
)
{
auto
batch_norm
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
(
"batch_norm"
);
ASSERT_FALSE
(
batch_norm
.
empty
());
ASSERT_TRUE
(
batch_norm
.
front
());
}
TEST
(
batch_norm_x86
,
init
)
{
BatchNormCompute
<
float
>
batch_norm
;
ASSERT_EQ
(
batch_norm
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
batch_norm
.
target
(),
TARGET
(
kX86
));
}
TEST
(
batch_norm_x86
,
run_test
)
{
lite
::
Tensor
x
,
scale
,
bias
,
mean
,
variance
,
y
,
mean_out
,
variance_out
,
saved_mean
,
saved_variance
;
constexpr
int
batch_size
=
2
;
std
::
vector
<
int64_t
>
x_shape
{
batch_size
,
3
,
64
,
64
};
x
.
Resize
(
lite
::
DDim
(
x_shape
));
std
::
vector
<
int64_t
>
scale_shape
{
3
};
scale
.
Resize
(
lite
::
DDim
(
scale_shape
));
std
::
vector
<
int64_t
>
bias_shape
{
3
};
bias
.
Resize
(
lite
::
DDim
(
bias_shape
));
std
::
vector
<
int64_t
>
mean_shape
{
3
};
mean
.
Resize
(
lite
::
DDim
(
mean_shape
));
std
::
vector
<
int64_t
>
variance_shape
{
3
};
variance
.
Resize
(
lite
::
DDim
(
variance_shape
));
std
::
vector
<
int64_t
>
y_shape
{
batch_size
,
3
,
64
,
64
};
y
.
Resize
(
lite
::
DDim
(
y_shape
));
std
::
vector
<
int64_t
>
mean_out_shape
{
3
};
mean_out
.
Resize
(
lite
::
DDim
(
mean_out_shape
));
std
::
vector
<
int64_t
>
variance_out_shape
{
3
};
variance_out
.
Resize
(
lite
::
DDim
(
variance_out_shape
));
std
::
vector
<
int64_t
>
saved_mean_shape
{
3
};
saved_mean
.
Resize
(
lite
::
DDim
(
saved_mean_shape
));
std
::
vector
<
int64_t
>
saved_variance_shape
{
3
};
saved_variance
.
Resize
(
lite
::
DDim
(
saved_variance_shape
));
auto
x_data
=
x
.
mutable_data
<
float
>
();
auto
scale_data
=
scale
.
mutable_data
<
float
>
();
auto
bias_data
=
bias
.
mutable_data
<
float
>
();
auto
mean_data
=
mean
.
mutable_data
<
float
>
();
auto
variance_data
=
variance
.
mutable_data
<
float
>
();
y
.
mutable_data
<
float
>
();
mean_out
.
mutable_data
<
float
>
();
variance_out
.
mutable_data
<
float
>
();
saved_mean
.
mutable_data
<
float
>
();
saved_variance
.
mutable_data
<
float
>
();
for
(
int64_t
i
=
0
;
i
<
x
.
dims
().
production
();
i
++
)
{
x_data
[
i
]
=
static_cast
<
float
>
(
i
);
}
for
(
int
i
=
0
;
i
<
scale
.
dims
().
production
();
i
++
)
{
scale_data
[
i
]
=
static_cast
<
float
>
(
i
)
*
0.01
f
+
0.03
f
;
}
for
(
int
i
=
0
;
i
<
bias
.
dims
().
production
();
i
++
)
{
bias_data
[
i
]
=
static_cast
<
float
>
(
i
)
*
0.065
f
+
0.1
f
;
}
for
(
int
i
=
0
;
i
<
mean
.
dims
().
production
();
i
++
)
{
mean_data
[
i
]
=
static_cast
<
float
>
(
i
)
*
0.0565
f
;
}
for
(
int
i
=
0
;
i
<
variance
.
dims
().
production
();
i
++
)
{
variance_data
[
i
]
=
static_cast
<
float
>
(
i
)
*
2.08
f
+
1.5
f
;
}
// BatchNormCompute batch_norm;
BatchNormCompute
<
float
>
batch_norm
;
operators
::
BatchNormParam
param
;
param
.
x
=
&
x
;
param
.
is_test
=
false
;
param
.
scale
=
&
scale
;
param
.
bias
=
&
bias
;
param
.
mean
=
&
mean
;
param
.
variance
=
&
variance
;
param
.
use_global_stats
=
false
;
param
.
epsilon
=
1e-4
f
;
param
.
momentum
=
0.9
f
;
param
.
y
=
&
y
;
param
.
mean_out
=
&
mean_out
;
param
.
variance_out
=
&
variance_out
;
param
.
saved_mean
=
&
saved_mean
;
param
.
saved_variance
=
&
saved_variance
;
batch_norm
.
SetParam
(
param
);
batch_norm
.
Run
();
LOG
(
INFO
)
<<
"output: "
<<
y
;
LOG
(
INFO
)
<<
"mean_out: "
<<
mean_out
;
LOG
(
INFO
)
<<
"variance_out: "
<<
mean_out
;
LOG
(
INFO
)
<<
"saved_mean: "
<<
saved_mean
;
LOG
(
INFO
)
<<
"saved_variance: "
<<
saved_variance
;
/*for (int i = 0; i < y.dims().production(); i++) {
if(i < 5 || i > y.dims().production() - 5)
LOG(INFO) << y_data[i];
}*/
}
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_KERNEL
(
batch_norm
,
kX86
,
kFloat
,
kNCHW
,
def
);
paddle/fluid/lite/tools/build.sh
浏览文件 @
58bf3c48
...
@@ -135,8 +135,8 @@ function test_arm_model {
...
@@ -135,8 +135,8 @@ function test_arm_model {
adb
-s
emulator-
${
port
}
push
${
model_dir
}
${
adb_work_dir
}
adb
-s
emulator-
${
port
}
push
${
model_dir
}
${
adb_work_dir
}
adb
-s
emulator-
${
port
}
push
${
testpath
}
${
adb_work_dir
}
adb
-s
emulator-
${
port
}
push
${
testpath
}
${
adb_work_dir
}
adb
-s
emulator-
${
port
}
shell
chmod
+x
"
${
adb_work_dir
}
/
${
test_name
}
"
adb
-s
emulator-
${
port
}
shell
chmod
+x
"
${
adb_work_dir
}
/
${
test_name
}
"
local
adb_model_path
=
"
./
${
adb_work_dir
}
/
`
basename
${
model_dir
}
`
"
local
adb_model_path
=
"
${
adb_work_dir
}
/
`
basename
${
model_dir
}
`
"
adb
-s
emulator-
${
port
}
shell
"
./
${
adb_work_dir
}
/
${
test_name
}
--eval_model_dir=
$adb_model_path
"
adb
-s
emulator-
${
port
}
shell
"
${
adb_work_dir
}
/
${
test_name
}
--eval_model_dir=
$adb_model_path
"
}
}
...
@@ -225,16 +225,11 @@ function test_arm {
...
@@ -225,16 +225,11 @@ function test_arm {
for
_test
in
$(
cat
$TESTS_FILE
)
;
do
for
_test
in
$(
cat
$TESTS_FILE
)
;
do
test_arm_android
$_test
$port
test_arm_android
$_test
$port
done
done
# TODO(sangoly): refine this
test_arm_model
"test_cxx_api_lite"
$port
"./third_party/install/mobilenet_v2_relu"
}
}
# Build the code and run lite arm tests. This is executed in the CI system.
function
prepare_emulator
{
function
build_test_arm
{
local
port_armv8
=
$1
########################################################################
local
port_armv7
=
$2
# job 1-4 must be in one runner
port_armv8
=
5554
port_armv7
=
5556
adb kill-server
adb kill-server
adb devices |
grep
emulator |
cut
-f1
|
while
read
line
;
do
adb
-s
$line
emu
kill
;
done
adb devices |
grep
emulator |
cut
-f1
|
while
read
line
;
do
adb
-s
$line
emu
kill
;
done
...
@@ -245,6 +240,18 @@ function build_test_arm {
...
@@ -245,6 +240,18 @@ function build_test_arm {
echo
n | avdmanager create avd
-f
-n
paddle-armv7
-k
"system-images;android-24;google_apis;armeabi-v7a"
echo
n | avdmanager create avd
-f
-n
paddle-armv7
-k
"system-images;android-24;google_apis;armeabi-v7a"
echo
-ne
'\n'
|
${
ANDROID_HOME
}
/emulator/emulator
-avd
paddle-armv7
-noaudio
-no-window
-gpu
off
-verbose
-port
${
port_armv7
}
&
echo
-ne
'\n'
|
${
ANDROID_HOME
}
/emulator/emulator
-avd
paddle-armv7
-noaudio
-no-window
-gpu
off
-verbose
-port
${
port_armv7
}
&
sleep
1m
sleep
1m
}
# We split the arm unittest into several sub-tasks to parallel and reduce the overall CI timetime.
# sub-task1
function
build_test_arm_subtask_android
{
########################################################################
# job 1-4 must be in one runner
port_armv8
=
5554
port_armv7
=
5556
prepare_emulator
$port_armv8
$port_armv7
# job 1
# job 1
build_arm
"android"
"armv8"
"gcc"
build_arm
"android"
"armv8"
"gcc"
...
@@ -252,9 +259,9 @@ function build_test_arm {
...
@@ -252,9 +259,9 @@ function build_test_arm {
cd
-
cd
-
# job 2
# job 2
build_arm
"android"
"armv8"
"clang"
#
build_arm "android" "armv8" "clang"
test_arm
"android"
"armv8"
"clang"
${
port_armv8
}
#
test_arm "android" "armv8" "clang" ${port_armv8}
cd
-
#
cd -
# job 3
# job 3
build_arm
"android"
"armv7"
"gcc"
build_arm
"android"
"armv7"
"gcc"
...
@@ -262,13 +269,22 @@ function build_test_arm {
...
@@ -262,13 +269,22 @@ function build_test_arm {
cd
-
cd
-
# job 4
# job 4
build_arm
"android"
"armv7"
"clang"
#
build_arm "android" "armv7" "clang"
test_arm
"android"
"armv7"
"clang"
${
port_armv7
}
#
test_arm "android" "armv7" "clang" ${port_armv7}
cd
-
#
cd -
adb devices |
grep
emulator |
cut
-f1
|
while
read
line
;
do
adb
-s
$line
emu
kill
;
done
adb devices |
grep
emulator |
cut
-f1
|
while
read
line
;
do
adb
-s
$line
emu
kill
;
done
echo
"Done"
echo
"Done"
}
# sub-task2
function
build_test_arm_subtask_armlinux
{
########################################################################
########################################################################
# job 1-4 must be in one runner
port_armv8
=
5554
port_armv7
=
5556
prepare_emulator
$port_armv8
$port_armv7
# job 5
# job 5
build_arm
"armlinux"
"armv8"
build_arm
"armlinux"
"armv8"
...
@@ -285,9 +301,47 @@ function build_test_arm {
...
@@ -285,9 +301,47 @@ function build_test_arm {
test_arm
"armlinux"
"armv7hf"
test_arm
"armlinux"
"armv7hf"
cd
-
cd
-
adb devices |
grep
emulator |
cut
-f1
|
while
read
line
;
do
adb
-s
$line
emu
kill
;
done
echo
"Done"
}
# sub-task3
function
build_test_arm_subtask3_mobilenet_v2
{
local
port_armv8
=
5554
local
port_armv7
=
5556
# We just test following single one environment to limit the CI time.
local
os
=
android
local
abi
=
armv8
local
lang
=
gcc
cur_dir
=
$(
pwd
)
build_dir
=
$cur_dir
/build.lite.
${
os
}
.
${
abi
}
.
${
lang
}
mkdir
-p
$build_dir
cd
$build_dir
cmake_arm
$os
$abi
$lang
make test_cxx_api_lite
-j
$NUM_CORES_FOR_COMPILE
prepare_emulator
$port_armv8
$port_armv7
# just test the model on armv8
test_arm_model
"test_cxx_api_lite"
$port_armv8
"./third_party/install/mobilenet_v2_relu"
adb devices |
grep
emulator |
cut
-f1
|
while
read
line
;
do
adb
-s
$line
emu
kill
;
done
echo
"Done"
echo
"Done"
}
}
# Build the code and run lite arm tests. This is executed in the CI system.
function
build_test_arm
{
########################################################################
# job 1-4 must be in one runner
port_armv8
=
5554
port_armv7
=
5556
build_test_arm_subtask_android
build_test_arm_subtask_armlinux
}
############################# MAIN #################################
############################# MAIN #################################
function
print_usage
{
function
print_usage
{
echo
-e
"
\n
USAGE:"
echo
-e
"
\n
USAGE:"
...
@@ -379,6 +433,18 @@ function main {
...
@@ -379,6 +433,18 @@ function main {
build_test_arm
build_test_arm
shift
shift
;;
;;
build_test_arm_subtask_android
)
build_test_arm_subtask_android
shift
;;
build_test_arm_subtask_armlinux
)
build_test_arm_subtask_armlinux
shift
;;
build_test_arm_model1
)
build_test_arm_subtask3_mobilenet_v2
shift
;;
check_style
)
check_style
)
check_style
check_style
shift
shift
...
@@ -397,4 +463,3 @@ function main {
...
@@ -397,4 +463,3 @@ function main {
}
}
main
$@
main
$@
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录