Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
c8d89a2b
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c8d89a2b
编写于
6月 18, 2019
作者:
H
hong19860320
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
make all ArmContext share the same DeviceInfo, and export SetRunMode to set thread num
test=develop
上级
ce6c24e6
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
318 addition
and
365 deletion
+318
-365
paddle/fluid/lite/api/cxx_api_bin.cc
paddle/fluid/lite/api/cxx_api_bin.cc
+4
-3
paddle/fluid/lite/core/context.cc
paddle/fluid/lite/core/context.cc
+1
-316
paddle/fluid/lite/core/context.h
paddle/fluid/lite/core/context.h
+21
-26
paddle/fluid/lite/core/cpu_info.cc
paddle/fluid/lite/core/cpu_info.cc
+263
-1
paddle/fluid/lite/core/cpu_info.h
paddle/fluid/lite/core/cpu_info.h
+29
-14
paddle/fluid/lite/kernels/arm/conv_compute.cc
paddle/fluid/lite/kernels/arm/conv_compute.cc
+0
-2
paddle/fluid/lite/kernels/arm/fc_compute.cc
paddle/fluid/lite/kernels/arm/fc_compute.cc
+0
-1
paddle/fluid/lite/kernels/arm/mul_compute.cc
paddle/fluid/lite/kernels/arm/mul_compute.cc
+0
-1
paddle/fluid/lite/kernels/arm/pool_compute.cc
paddle/fluid/lite/kernels/arm/pool_compute.cc
+0
-1
未找到文件。
paddle/fluid/lite/api/cxx_api_bin.cc
浏览文件 @
c8d89a2b
...
...
@@ -28,9 +28,10 @@ double time_diff(Time t1, Time t2) {
return
counter
.
count
()
/
1000.0
;
}
void
Run
(
const
char
*
model_dir
,
int
repeat
)
{
void
Run
(
const
char
*
model_dir
,
int
repeat
,
int
thread_num
)
{
#ifdef LITE_WITH_ARM
DeviceInfo
::
Init
();
DeviceInfo
::
Global
().
SetRunMode
(
LITE_POWER_HIGH
,
thread_num
);
#endif
lite
::
ExecutorLite
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
...
...
@@ -66,8 +67,8 @@ void Run(const char* model_dir, int repeat) {
}
// namespace paddle
int
main
(
int
argc
,
char
**
argv
)
{
CHECK_EQ
(
argc
,
3
)
<<
"usage: ./cmd <model_dir> <repeat
>"
;
paddle
::
lite
::
Run
(
argv
[
1
],
std
::
stoi
(
argv
[
2
]));
CHECK_EQ
(
argc
,
4
)
<<
"usage: ./cmd <model_dir> <repeat> <thread_num
>"
;
paddle
::
lite
::
Run
(
argv
[
1
],
std
::
stoi
(
argv
[
2
])
,
std
::
stoi
(
argv
[
3
])
);
return
0
;
}
...
...
paddle/fluid/lite/core/context.cc
浏览文件 @
c8d89a2b
...
...
@@ -13,322 +13,7 @@
// limitations under the License.
#include "paddle/fluid/lite/core/context.h"
#include "paddle/fluid/lite/core/cpu_info.h"
#ifdef LITE_WITH_LINUX
#include <sys/syscall.h>
#include <unistd.h>
#endif
#if __APPLE__
#include "TargetConditionals.h"
#if TARGET_OS_IPHONE
#include <mach/machine.h>
#include <sys/sysctl.h>
#include <sys/types.h>
#endif // TARGET_OS_IPHONE
#endif // __APPLE__
#ifdef ARM_WITH_OMP
#include <omp.h>
#endif
namespace
paddle
{
namespace
lite
{
#ifdef LITE_WITH_ARM
void
Context
<
TargetType
::
kARM
>::
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
)
{
DeviceInfo
&
dev
=
DeviceInfo
::
Global
();
int
cpu_count
=
arm_get_cpucount
();
dev
.
L1_cache_
.
resize
(
cpu_count
);
dev
.
L2_cache_
.
resize
(
cpu_count
);
dev
.
L3_cache_
.
resize
(
cpu_count
);
for
(
int
i
=
0
;
i
<
cpu_count
;
++
i
)
{
dev
.
L1_cache_
[
i
]
=
l1size
;
dev
.
L2_cache_
[
i
]
=
l2size
;
dev
.
L3_cache_
[
i
]
=
l3size
;
}
workspace_
.
Resize
({
2
*
(
l1size
+
l2size
)});
}
Context
<
TargetType
::
kARM
>::
Context
()
{
active_ids_
=
{
0
};
mode_
=
LITE_POWER_HIGH
;
DeviceInfo
&
dev
=
DeviceInfo
::
Global
();
workspace_
.
Resize
(
{
static_cast
<
int64_t
>
(
dev
.
L2_cache_
[
active_ids_
[
0
]]
/
sizeof
(
float
))});
#ifdef TARGET_IOS
arch_
=
APPLE
;
// use 6x8
#else
if
(
dev
.
big_core_ids_
.
size
()
>
0
)
{
arch_
=
dev
.
archs_
[
dev
.
big_core_ids_
[
0
]];
}
#endif
}
PowerMode
Context
<
TargetType
::
kARM
>::
mode
()
const
{
return
mode_
;
}
int
Context
<
TargetType
::
kARM
>::
threads
()
const
{
return
active_ids_
.
size
();
}
Context
<
TargetType
::
kARM
>::
Context
(
const
ARMContext
&
ctx
)
{
mode_
=
ctx
.
mode_
;
active_ids_
=
ctx
.
active_ids_
;
workspace_
=
ctx
.
workspace_
;
arch_
=
ctx
.
arch_
;
count_
=
ctx
.
count_
;
}
ARMContext
&
Context
<
TargetType
::
kARM
>::
operator
=
(
const
ARMContext
&
ctx
)
{
mode_
=
ctx
.
mode_
;
active_ids_
=
ctx
.
active_ids_
;
workspace_
=
ctx
.
workspace_
;
arch_
=
ctx
.
arch_
;
count_
=
ctx
.
count_
;
return
*
this
;
}
void
Context
<
TargetType
::
kARM
>::
BindDev
()
{
#ifdef ARM_WITH_OMP
int
num_threads
=
active_ids_
.
size
();
omp_set_num_threads
(
num_threads
);
#ifdef LITE_WITH_LINUX
std
::
vector
<
int
>
ssarets
;
for
(
int
j
=
0
;
j
<
num_threads
;
++
j
)
{
ssarets
.
push_back
(
0
);
}
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
num_threads
;
i
++
)
{
ssarets
[
i
]
=
set_sched_affinity
(
active_ids_
);
}
for
(
int
i
=
0
;
i
<
num_threads
;
i
++
)
{
if
(
ssarets
[
i
]
!=
0
)
{
LOG
(
ERROR
)
<<
"set cpu affinity failed, cpuID: "
<<
active_ids_
[
i
];
return
;
}
}
#endif // LITE_WITH_LINUX
#else // ARM_WITH_OMP
#ifdef LITE_WITH_LINUX
std
::
vector
<
int
>
cpuid1
;
cpuid1
.
push_back
(
active_ids_
[
0
]);
int
ssaret
=
set_sched_affinity
(
cpuid1
);
if
(
ssaret
!=
0
)
{
printf
(
"set cpu affinity failed, cpuID: %d
\n
"
,
active_ids_
[
0
]);
return
;
}
#endif // LITE_WITH_LINUX
#endif // ARM_WITH_OMP
}
void
Context
<
TargetType
::
kARM
>::
SetRunMode
(
PowerMode
mode
,
int
threads
)
{
DeviceInfo
&
dev
=
DeviceInfo
::
Global
();
int
big_core_size
=
dev
.
big_core_ids_
.
size
();
int
small_core_size
=
dev
.
little_core_ids_
.
size
();
if
(
threads
>
big_core_size
+
small_core_size
)
{
threads
=
big_core_size
+
small_core_size
;
}
#ifdef ARM_WITH_OMP
count_
++
;
int
shift_num
=
(
count_
/
10
)
%
big_core_size
;
switch
(
mode
)
{
case
LITE_POWER_FULL
:
mode_
=
mode
;
active_ids_
.
clear
();
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
if
(
i
<
big_core_size
)
{
active_ids_
.
push_back
(
dev
.
big_core_ids_
[
i
]);
}
else
{
active_ids_
.
push_back
(
dev
.
little_core_ids_
[
i
-
big_core_size
]);
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_HIGH
:
active_ids_
.
clear
();
if
(
big_core_size
>
0
)
{
mode_
=
LITE_POWER_HIGH
;
if
(
threads
>
big_core_size
)
{
LOG
(
ERROR
)
<<
"threads: "
<<
threads
<<
", exceed the big cores size: "
<<
big_core_size
;
active_ids_
=
dev
.
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
dev
.
big_core_ids_
[
i
]);
}
}
}
else
{
mode_
=
LITE_POWER_LOW
;
LOG
(
ERROR
)
<<
"HIGH POWER MODE is not support, switch to little cores"
;
if
(
threads
>
small_core_size
)
{
active_ids_
=
dev
.
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
dev
.
little_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_LOW
:
active_ids_
.
clear
();
if
(
small_core_size
>
0
)
{
mode_
=
LITE_POWER_LOW
;
if
(
threads
>
small_core_size
)
{
LOG
(
WARNING
)
<<
"threads: "
<<
threads
<<
", exceed the little cores size: "
<<
small_core_size
;
active_ids_
=
dev
.
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
dev
.
little_core_ids_
[
i
]);
}
}
}
else
{
mode_
=
LITE_POWER_HIGH
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores"
;
if
(
threads
>
big_core_size
)
{
active_ids_
=
dev
.
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
dev
.
big_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_NO_BIND
:
mode_
=
LITE_POWER_NO_BIND
;
active_ids_
.
clear
();
if
(
threads
>
dev
.
core_ids_
.
size
())
{
active_ids_
.
resize
(
dev
.
core_ids_
.
size
());
}
else
{
active_ids_
.
resize
(
threads
);
}
break
;
case
LITE_POWER_RAND_HIGH
:
active_ids_
.
clear
();
if
(
big_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_HIGH
;
if
(
threads
>
big_core_size
)
{
LOG
(
WARNING
)
<<
"threads: "
<<
threads
<<
", exceed the big cores size: "
<<
big_core_size
;
active_ids_
=
dev
.
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
dev
.
big_core_ids_
[(
i
+
shift_num
)
%
big_core_size
]);
}
}
}
else
{
mode_
=
LITE_POWER_LOW
;
LOG
(
WARNING
)
<<
"HIGH POWER MODE is not support, switch to little cores"
;
if
(
threads
>
small_core_size
)
{
active_ids_
=
dev
.
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
dev
.
little_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_RAND_LOW
:
active_ids_
.
clear
();
if
(
small_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_LOW
;
if
(
threads
>
small_core_size
)
{
LOG
(
WARNING
)
<<
"threads: "
<<
threads
<<
", exceed the little cores size: "
<<
small_core_size
;
active_ids_
=
dev
.
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
dev
.
little_core_ids_
[(
i
+
shift_num
)
%
small_core_size
]);
}
}
}
else
{
mode_
=
LITE_POWER_HIGH
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores"
;
if
(
threads
>
big_core_size
)
{
active_ids_
=
dev
.
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
dev
.
big_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
}
//! fix multi-threads LITE_POWER_HIGH mode
if
(
mode_
==
LITE_POWER_NO_BIND
||
threads
>
1
)
{
int
threads
=
active_ids_
.
size
();
omp_set_num_threads
(
threads
);
}
else
{
if
(
check_online
(
active_ids_
))
{
BindDev
();
}
else
{
LOG
(
ERROR
)
<<
"core id "
<<
active_ids_
[
0
]
<<
" is offline, switch to NO BIND MODE"
;
int
threads
=
active_ids_
.
size
();
omp_set_num_threads
(
threads
);
}
}
#else
if
(
big_core_size
>
0
)
{
active_ids_
=
{
dev
.
big_core_ids_
[
0
]};
}
else
{
active_ids_
=
{
0
};
}
#endif
//! alloc memory for sgemm in this context
int
temp_mem_size
=
DeviceInfo
::
Global
().
L2_cache_
[
active_ids_
[
0
]]
/
sizeof
(
float
);
workspace_
.
Resize
({
temp_mem_size
});
arch_
=
DeviceInfo
::
Global
().
archs_
[
active_ids_
[
0
]];
}
ARMArch
Context
<
TargetType
::
kARM
>::
arch
()
const
{
return
arch_
;
}
void
Context
<
TargetType
::
kARM
>::
SetArch
(
ARMArch
arch
)
{
arch_
=
arch
;
}
int
Context
<
TargetType
::
kARM
>::
l1_cache_size
()
const
{
DeviceInfo
&
dev
=
DeviceInfo
::
Global
();
return
dev
.
L1_cache_
[
active_ids_
[
0
]];
}
int
Context
<
TargetType
::
kARM
>::
l2_cache_size
()
const
{
DeviceInfo
&
dev
=
DeviceInfo
::
Global
();
return
dev
.
L2_cache_
[
active_ids_
[
0
]];
}
int
Context
<
TargetType
::
kARM
>::
l3_cache_size
()
const
{
DeviceInfo
&
dev
=
DeviceInfo
::
Global
();
return
dev
.
L3_cache_
[
active_ids_
[
0
]];
}
bool
Context
<
TargetType
::
kARM
>::
ExtendWorkspace
(
DDimLite
dims
)
{
auto
count
=
dims
.
product
();
auto
old
=
workspace_
.
dims
();
if
(
count
==
old
.
product
())
{
return
false
;
}
workspace_
.
Resize
(
{
static_cast
<
int64_t
>
(
count
+
l2_cache_size
()
/
sizeof
(
float
))});
return
true
;
}
#endif // LITE_WITH_ARM
}
// namespace lite
namespace
lite
{}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/core/context.h
浏览文件 @
c8d89a2b
...
...
@@ -61,47 +61,42 @@ class Context<TargetType::kHost> {
template
<
>
class
Context
<
TargetType
::
kARM
>
{
public:
Context
();
Context
(
PowerMode
mode
,
int
threads
);
Context
()
{}
explicit
Context
(
const
ARMContext
&
ctx
);
ARMContext
&
operator
=
(
const
ARMContext
&
ctx
)
;
ARMContext
&
operator
=
(
const
ARMContext
&
ctx
)
{}
// NOTE: InitOnce should only be used by ContextScheduler
void
InitOnce
()
{
DeviceInfo
::
Init
();
}
void
InitOnce
()
{}
void
CopyShared
(
const
ARMContext
*
ctx
)
{}
void
SetRunMode
(
PowerMode
mode
,
int
threads
);
void
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
);
void
SetArch
(
ARMArch
arch
);
void
BindDev
();
void
SetRunMode
(
PowerMode
mode
,
int
threads
)
{
return
DeviceInfo
::
Global
().
SetRunMode
(
mode
,
threads
);
}
void
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
)
{
return
DeviceInfo
::
Global
().
SetCache
(
l1size
,
l2size
,
l3size
);
}
void
SetArch
(
ARMArch
arch
)
{
return
DeviceInfo
::
Global
().
SetArch
(
arch
);
}
void
BindDev
()
{
return
DeviceInfo
::
Global
().
BindDev
();
}
PowerMode
mode
()
const
;
int
threads
()
const
;
ARMArch
arch
()
const
;
PowerMode
mode
()
const
{
return
DeviceInfo
::
Global
().
mode
();
}
int
threads
()
const
{
return
DeviceInfo
::
Global
().
threads
();
}
ARMArch
arch
()
const
{
return
DeviceInfo
::
Global
().
arch
();
}
template
<
typename
T
>
T
*
workspace_data
()
{
return
workspace_
.
mutabl
e_data
<
T
>
();
return
DeviceInfo
::
Global
().
workspac
e_data
<
T
>
();
}
int
l1_cache_size
()
const
;
int
l2_cache_size
()
const
;
int
l3_cache_size
()
const
;
bool
ExtendWorkspace
(
DDimLite
dims
);
int
l1_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l1_cache_size
();
}
int
l2_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l2_cache_size
();
}
int
l3_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l3_cache_size
();
}
bool
ExtendWorkspace
(
DDimLite
dims
)
{
return
DeviceInfo
::
Global
().
ExtendWorkspace
(
dims
);
}
std
::
string
name
()
const
{
return
"ARMContext"
;
}
private:
// LITE_POWER_HIGH stands for using big cores,
// LITE_POWER_LOW stands for using small core,
// LITE_POWER_FULL stands for using all cores
ARMArch
arch_
;
PowerMode
mode_
;
std
::
vector
<
int
>
active_ids_
;
TensorLite
workspace_
;
int64_t
count_
{
0
};
};
#endif
...
...
paddle/fluid/lite/core/cpu_info.cc
浏览文件 @
c8d89a2b
...
...
@@ -12,8 +12,24 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef LITE_WITH_LINUX
#include <sys/syscall.h>
#include <unistd.h>
#endif
#if __APPLE__
#include "TargetConditionals.h"
#if TARGET_OS_IPHONE
#include <mach/machine.h>
#include <sys/sysctl.h>
#include <sys/types.h>
#endif // TARGET_OS_IPHONE
#endif // __APPLE__
#ifdef ARM_WITH_OMP
#include <omp.h>
#endif
#include "paddle/fluid/lite/core/cpu_info.h"
#include <cstdarg>
namespace
paddle
{
namespace
lite
{
...
...
@@ -73,6 +89,252 @@ void DeviceInfo::InitInternal(DeviceInfo* dev) {
#elif defined(TARGET_IOS)
arm_get_cpu_arch
(
&
dev
->
archs_
);
#endif
dev
->
active_ids_
=
{
0
};
dev
->
mode_
=
LITE_POWER_HIGH
;
dev
->
workspace_
.
Resize
({
static_cast
<
int64_t
>
(
dev
->
L2_cache_
[
dev
->
active_ids_
[
0
]]
/
sizeof
(
float
))});
#ifdef TARGET_IOS
dev
->
arch_
=
APPLE
;
// use 6x8
#else
if
(
dev
->
big_core_ids_
.
size
()
>
0
)
{
dev
->
arch_
=
dev
->
archs_
[
dev
->
big_core_ids_
[
0
]];
}
#endif
}
void
DeviceInfo
::
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
)
{
int
cpu_count
=
arm_get_cpucount
();
L1_cache_
.
resize
(
cpu_count
);
L2_cache_
.
resize
(
cpu_count
);
L3_cache_
.
resize
(
cpu_count
);
for
(
int
i
=
0
;
i
<
cpu_count
;
++
i
)
{
L1_cache_
[
i
]
=
l1size
;
L2_cache_
[
i
]
=
l2size
;
L3_cache_
[
i
]
=
l3size
;
}
workspace_
.
Resize
({
2
*
(
l1size
+
l2size
)});
}
void
DeviceInfo
::
BindDev
()
{
#ifdef ARM_WITH_OMP
int
num_threads
=
active_ids_
.
size
();
omp_set_num_threads
(
num_threads
);
#ifdef LITE_WITH_LINUX
std
::
vector
<
int
>
ssarets
;
for
(
int
j
=
0
;
j
<
num_threads
;
++
j
)
{
ssarets
.
push_back
(
0
);
}
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
num_threads
;
i
++
)
{
ssarets
[
i
]
=
set_sched_affinity
(
active_ids_
);
}
for
(
int
i
=
0
;
i
<
num_threads
;
i
++
)
{
if
(
ssarets
[
i
]
!=
0
)
{
LOG
(
ERROR
)
<<
"set cpu affinity failed, cpuID: "
<<
active_ids_
[
i
];
return
;
}
}
#endif // LITE_WITH_LINUX
#else // ARM_WITH_OMP
#ifdef LITE_WITH_LINUX
std
::
vector
<
int
>
cpuid1
;
cpuid1
.
push_back
(
active_ids_
[
0
]);
int
ssaret
=
set_sched_affinity
(
cpuid1
);
if
(
ssaret
!=
0
)
{
printf
(
"set cpu affinity failed, cpuID: %d
\n
"
,
active_ids_
[
0
]);
return
;
}
#endif // LITE_WITH_LINUX
#endif // ARM_WITH_OMP
}
void
DeviceInfo
::
SetRunMode
(
PowerMode
mode
,
int
threads
)
{
LOG
(
INFO
)
<<
"ARM SetRunMode called"
;
int
big_core_size
=
big_core_ids_
.
size
();
int
small_core_size
=
little_core_ids_
.
size
();
if
(
threads
>
big_core_size
+
small_core_size
)
{
threads
=
big_core_size
+
small_core_size
;
}
#ifdef ARM_WITH_OMP
count_
++
;
int
shift_num
=
(
count_
/
10
)
%
big_core_size
;
switch
(
mode
)
{
case
LITE_POWER_FULL
:
mode_
=
mode
;
active_ids_
.
clear
();
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
if
(
i
<
big_core_size
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
else
{
active_ids_
.
push_back
(
little_core_ids_
[
i
-
big_core_size
]);
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_HIGH
:
active_ids_
.
clear
();
if
(
big_core_size
>
0
)
{
mode_
=
LITE_POWER_HIGH
;
if
(
threads
>
big_core_size
)
{
LOG
(
ERROR
)
<<
"threads: "
<<
threads
<<
", exceed the big cores size: "
<<
big_core_size
;
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
}
else
{
mode_
=
LITE_POWER_LOW
;
LOG
(
ERROR
)
<<
"HIGH POWER MODE is not support, switch to little cores."
;
if
(
threads
>
small_core_size
)
{
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_LOW
:
active_ids_
.
clear
();
if
(
small_core_size
>
0
)
{
mode_
=
LITE_POWER_LOW
;
if
(
threads
>
small_core_size
)
{
LOG
(
WARNING
)
<<
"threads: "
<<
threads
<<
", exceed the little cores size: "
<<
small_core_size
;
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
}
else
{
mode_
=
LITE_POWER_HIGH
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores"
;
if
(
threads
>
big_core_size
)
{
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_NO_BIND
:
mode_
=
LITE_POWER_NO_BIND
;
active_ids_
.
clear
();
if
(
threads
>
core_ids_
.
size
())
{
active_ids_
.
resize
(
core_ids_
.
size
());
}
else
{
active_ids_
.
resize
(
threads
);
}
break
;
case
LITE_POWER_RAND_HIGH
:
active_ids_
.
clear
();
if
(
big_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_HIGH
;
if
(
threads
>
big_core_size
)
{
LOG
(
WARNING
)
<<
"threads: "
<<
threads
<<
", exceed the big cores size: "
<<
big_core_size
;
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[(
i
+
shift_num
)
%
big_core_size
]);
}
}
}
else
{
mode_
=
LITE_POWER_LOW
;
LOG
(
WARNING
)
<<
"HIGH POWER MODE is not support, switch to little cores."
;
if
(
threads
>
small_core_size
)
{
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_RAND_LOW
:
active_ids_
.
clear
();
if
(
small_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_LOW
;
if
(
threads
>
small_core_size
)
{
LOG
(
WARNING
)
<<
"threads: "
<<
threads
<<
", exceed the little cores size: "
<<
small_core_size
;
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[(
i
+
shift_num
)
%
small_core_size
]);
}
}
}
else
{
mode_
=
LITE_POWER_HIGH
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores."
;
if
(
threads
>
big_core_size
)
{
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
}
//! fix multi-threads LITE_POWER_HIGH mode
if
(
mode_
==
LITE_POWER_NO_BIND
||
threads
>
1
)
{
int
threads
=
active_ids_
.
size
();
omp_set_num_threads
(
threads
);
}
else
{
if
(
check_online
(
active_ids_
))
{
BindDev
();
}
else
{
LOG
(
WARNING
)
<<
"core id "
<<
active_ids_
[
0
]
<<
" is offline, switch to NO BIND MODE"
;
int
threads
=
active_ids_
.
size
();
omp_set_num_threads
(
threads
);
}
}
#else
if
(
big_core_size
>
0
)
{
active_ids_
=
{
big_core_ids_
[
0
]};
}
else
{
active_ids_
=
{
0
};
}
#endif
//! alloc memory for sgemm in this context
int
temp_mem_size
=
L2_cache_
[
active_ids_
[
0
]]
/
sizeof
(
float
);
workspace_
.
Resize
({
temp_mem_size
});
arch_
=
archs_
[
active_ids_
[
0
]];
}
bool
DeviceInfo
::
ExtendWorkspace
(
DDimLite
dims
)
{
auto
count
=
dims
.
product
();
auto
old
=
workspace_
.
dims
();
if
(
count
==
old
.
product
())
{
return
false
;
}
workspace_
.
Resize
({
static_cast
<
int64_t
>
(
count
+
L2_cache_
[
active_ids_
[
0
]]
/
sizeof
(
float
))});
return
true
;
}
// cache_id : 0 -> L1, 1 -> L2, 2 -> L3
...
...
paddle/fluid/lite/core/cpu_info.h
浏览文件 @
c8d89a2b
...
...
@@ -16,22 +16,9 @@
#include <string>
#include <vector>
#include "paddle/fluid/lite/core/lite_tensor.h"
#include "paddle/fluid/lite/utils/cp_logging.h"
#ifdef LITE_WITH_LINUX
#include <sys/syscall.h>
#include <unistd.h>
#endif
#if __APPLE__
#include "TargetConditionals.h"
#if TARGET_OS_IPHONE
#include <mach/machine.h>
#include <sys/sysctl.h>
#include <sys/types.h>
#endif // TARGET_OS_IPHONE
#endif // __APPLE__
namespace
paddle
{
namespace
lite
{
...
...
@@ -80,6 +67,15 @@ class DeviceInfo {
std
::
vector
<
int
>
cluster_ids_
;
std
::
vector
<
ARMArch
>
archs_
;
ARMArch
arch_
;
// LITE_POWER_HIGH stands for using big cores,
// LITE_POWER_LOW stands for using small core,
// LITE_POWER_FULL stands for using all cores
PowerMode
mode_
;
std
::
vector
<
int
>
active_ids_
;
TensorLite
workspace_
;
int64_t
count_
{
0
};
static
DeviceInfo
&
Global
()
{
static
auto
*
x
=
new
DeviceInfo
;
return
*
x
;
...
...
@@ -90,6 +86,25 @@ class DeviceInfo {
InitInternal
(
&
info
);
}
void
SetRunMode
(
PowerMode
mode
,
int
threads
);
void
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
);
void
SetArch
(
ARMArch
arch
)
{
arch_
=
arch
;
}
void
BindDev
();
PowerMode
mode
()
const
{
return
mode_
;
}
int
threads
()
const
{
return
active_ids_
.
size
();
}
ARMArch
arch
()
const
{
return
arch_
;
}
template
<
typename
T
>
T
*
workspace_data
()
{
return
workspace_
.
mutable_data
<
T
>
();
}
int
l1_cache_size
()
const
{
return
L1_cache_
[
active_ids_
[
0
]];
}
int
l2_cache_size
()
const
{
return
L2_cache_
[
active_ids_
[
0
]];
}
int
l3_cache_size
()
const
{
return
L3_cache_
[
active_ids_
[
0
]];
}
bool
ExtendWorkspace
(
DDimLite
dims
);
private:
DeviceInfo
()
=
default
;
static
void
InitInternal
(
DeviceInfo
*
dev
);
...
...
paddle/fluid/lite/kernels/arm/conv_compute.cc
浏览文件 @
c8d89a2b
...
...
@@ -28,8 +28,6 @@ void ConvCompute::PrepareForRun() {
auto
o_dims
=
param
.
output
->
dims
();
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
// TODO(xxx): make api and expose it
ctx
.
SetRunMode
(
LITE_POWER_HIGH
,
4
);
int
win
=
x_dims
[
3
];
// nchw
int
hin
=
x_dims
[
2
];
...
...
paddle/fluid/lite/kernels/arm/fc_compute.cc
浏览文件 @
c8d89a2b
...
...
@@ -28,7 +28,6 @@ void FcCompute::PrepareForRun() {
auto
w_dims
=
param
.
w
->
dims
();
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
ctx
.
SetRunMode
(
LITE_POWER_HIGH
,
4
);
CHECK_GE
(
x_dims
.
size
(),
2UL
);
CHECK_EQ
(
w_dims
.
size
(),
2UL
);
...
...
paddle/fluid/lite/kernels/arm/mul_compute.cc
浏览文件 @
c8d89a2b
...
...
@@ -24,7 +24,6 @@ namespace arm {
void
MulCompute
::
PrepareForRun
()
{
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
ctx
.
SetRunMode
(
LITE_POWER_HIGH
,
4
);
}
void
MulCompute
::
Run
()
{
...
...
paddle/fluid/lite/kernels/arm/pool_compute.cc
浏览文件 @
c8d89a2b
...
...
@@ -26,7 +26,6 @@ namespace arm {
void
PoolCompute
::
PrepareForRun
()
{
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
ctx
.
SetRunMode
(
LITE_POWER_HIGH
,
4
);
}
void
PoolCompute
::
Run
()
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录