Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
c8d89a2b
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c8d89a2b
编写于
6月 18, 2019
作者:
H
hong19860320
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
make all ArmContext share the same DeviceInfo, and export SetRunMode to set thread num
test=develop
上级
ce6c24e6
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
318 addition
and
365 deletion
+318
-365
paddle/fluid/lite/api/cxx_api_bin.cc
paddle/fluid/lite/api/cxx_api_bin.cc
+4
-3
paddle/fluid/lite/core/context.cc
paddle/fluid/lite/core/context.cc
+1
-316
paddle/fluid/lite/core/context.h
paddle/fluid/lite/core/context.h
+21
-26
paddle/fluid/lite/core/cpu_info.cc
paddle/fluid/lite/core/cpu_info.cc
+263
-1
paddle/fluid/lite/core/cpu_info.h
paddle/fluid/lite/core/cpu_info.h
+29
-14
paddle/fluid/lite/kernels/arm/conv_compute.cc
paddle/fluid/lite/kernels/arm/conv_compute.cc
+0
-2
paddle/fluid/lite/kernels/arm/fc_compute.cc
paddle/fluid/lite/kernels/arm/fc_compute.cc
+0
-1
paddle/fluid/lite/kernels/arm/mul_compute.cc
paddle/fluid/lite/kernels/arm/mul_compute.cc
+0
-1
paddle/fluid/lite/kernels/arm/pool_compute.cc
paddle/fluid/lite/kernels/arm/pool_compute.cc
+0
-1
未找到文件。
paddle/fluid/lite/api/cxx_api_bin.cc
浏览文件 @
c8d89a2b
...
...
@@ -28,9 +28,10 @@ double time_diff(Time t1, Time t2) {
return
counter
.
count
()
/
1000.0
;
}
void
Run
(
const
char
*
model_dir
,
int
repeat
)
{
void
Run
(
const
char
*
model_dir
,
int
repeat
,
int
thread_num
)
{
#ifdef LITE_WITH_ARM
DeviceInfo
::
Init
();
DeviceInfo
::
Global
().
SetRunMode
(
LITE_POWER_HIGH
,
thread_num
);
#endif
lite
::
ExecutorLite
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
...
...
@@ -66,8 +67,8 @@ void Run(const char* model_dir, int repeat) {
}
// namespace paddle
int
main
(
int
argc
,
char
**
argv
)
{
CHECK_EQ
(
argc
,
3
)
<<
"usage: ./cmd <model_dir> <repeat
>"
;
paddle
::
lite
::
Run
(
argv
[
1
],
std
::
stoi
(
argv
[
2
]));
CHECK_EQ
(
argc
,
4
)
<<
"usage: ./cmd <model_dir> <repeat> <thread_num
>"
;
paddle
::
lite
::
Run
(
argv
[
1
],
std
::
stoi
(
argv
[
2
])
,
std
::
stoi
(
argv
[
3
])
);
return
0
;
}
...
...
paddle/fluid/lite/core/context.cc
浏览文件 @
c8d89a2b
...
...
@@ -13,322 +13,7 @@
// limitations under the License.
#include "paddle/fluid/lite/core/context.h"
#include "paddle/fluid/lite/core/cpu_info.h"
#ifdef LITE_WITH_LINUX
#include <sys/syscall.h>
#include <unistd.h>
#endif
#if __APPLE__
#include "TargetConditionals.h"
#if TARGET_OS_IPHONE
#include <mach/machine.h>
#include <sys/sysctl.h>
#include <sys/types.h>
#endif // TARGET_OS_IPHONE
#endif // __APPLE__
#ifdef ARM_WITH_OMP
#include <omp.h>
#endif
namespace
paddle
{
namespace
lite
{
#ifdef LITE_WITH_ARM
void
Context
<
TargetType
::
kARM
>::
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
)
{
DeviceInfo
&
dev
=
DeviceInfo
::
Global
();
int
cpu_count
=
arm_get_cpucount
();
dev
.
L1_cache_
.
resize
(
cpu_count
);
dev
.
L2_cache_
.
resize
(
cpu_count
);
dev
.
L3_cache_
.
resize
(
cpu_count
);
for
(
int
i
=
0
;
i
<
cpu_count
;
++
i
)
{
dev
.
L1_cache_
[
i
]
=
l1size
;
dev
.
L2_cache_
[
i
]
=
l2size
;
dev
.
L3_cache_
[
i
]
=
l3size
;
}
workspace_
.
Resize
({
2
*
(
l1size
+
l2size
)});
}
Context
<
TargetType
::
kARM
>::
Context
()
{
active_ids_
=
{
0
};
mode_
=
LITE_POWER_HIGH
;
DeviceInfo
&
dev
=
DeviceInfo
::
Global
();
workspace_
.
Resize
(
{
static_cast
<
int64_t
>
(
dev
.
L2_cache_
[
active_ids_
[
0
]]
/
sizeof
(
float
))});
#ifdef TARGET_IOS
arch_
=
APPLE
;
// use 6x8
#else
if
(
dev
.
big_core_ids_
.
size
()
>
0
)
{
arch_
=
dev
.
archs_
[
dev
.
big_core_ids_
[
0
]];
}
#endif
}
PowerMode
Context
<
TargetType
::
kARM
>::
mode
()
const
{
return
mode_
;
}
int
Context
<
TargetType
::
kARM
>::
threads
()
const
{
return
active_ids_
.
size
();
}
Context
<
TargetType
::
kARM
>::
Context
(
const
ARMContext
&
ctx
)
{
mode_
=
ctx
.
mode_
;
active_ids_
=
ctx
.
active_ids_
;
workspace_
=
ctx
.
workspace_
;
arch_
=
ctx
.
arch_
;
count_
=
ctx
.
count_
;
}
ARMContext
&
Context
<
TargetType
::
kARM
>::
operator
=
(
const
ARMContext
&
ctx
)
{
mode_
=
ctx
.
mode_
;
active_ids_
=
ctx
.
active_ids_
;
workspace_
=
ctx
.
workspace_
;
arch_
=
ctx
.
arch_
;
count_
=
ctx
.
count_
;
return
*
this
;
}
void
Context
<
TargetType
::
kARM
>::
BindDev
()
{
#ifdef ARM_WITH_OMP
int
num_threads
=
active_ids_
.
size
();
omp_set_num_threads
(
num_threads
);
#ifdef LITE_WITH_LINUX
std
::
vector
<
int
>
ssarets
;
for
(
int
j
=
0
;
j
<
num_threads
;
++
j
)
{
ssarets
.
push_back
(
0
);
}
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
num_threads
;
i
++
)
{
ssarets
[
i
]
=
set_sched_affinity
(
active_ids_
);
}
for
(
int
i
=
0
;
i
<
num_threads
;
i
++
)
{
if
(
ssarets
[
i
]
!=
0
)
{
LOG
(
ERROR
)
<<
"set cpu affinity failed, cpuID: "
<<
active_ids_
[
i
];
return
;
}
}
#endif // LITE_WITH_LINUX
#else // ARM_WITH_OMP
#ifdef LITE_WITH_LINUX
std
::
vector
<
int
>
cpuid1
;
cpuid1
.
push_back
(
active_ids_
[
0
]);
int
ssaret
=
set_sched_affinity
(
cpuid1
);
if
(
ssaret
!=
0
)
{
printf
(
"set cpu affinity failed, cpuID: %d
\n
"
,
active_ids_
[
0
]);
return
;
}
#endif // LITE_WITH_LINUX
#endif // ARM_WITH_OMP
}
void
Context
<
TargetType
::
kARM
>::
SetRunMode
(
PowerMode
mode
,
int
threads
)
{
DeviceInfo
&
dev
=
DeviceInfo
::
Global
();
int
big_core_size
=
dev
.
big_core_ids_
.
size
();
int
small_core_size
=
dev
.
little_core_ids_
.
size
();
if
(
threads
>
big_core_size
+
small_core_size
)
{
threads
=
big_core_size
+
small_core_size
;
}
#ifdef ARM_WITH_OMP
count_
++
;
int
shift_num
=
(
count_
/
10
)
%
big_core_size
;
switch
(
mode
)
{
case
LITE_POWER_FULL
:
mode_
=
mode
;
active_ids_
.
clear
();
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
if
(
i
<
big_core_size
)
{
active_ids_
.
push_back
(
dev
.
big_core_ids_
[
i
]);
}
else
{
active_ids_
.
push_back
(
dev
.
little_core_ids_
[
i
-
big_core_size
]);
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_HIGH
:
active_ids_
.
clear
();
if
(
big_core_size
>
0
)
{
mode_
=
LITE_POWER_HIGH
;
if
(
threads
>
big_core_size
)
{
LOG
(
ERROR
)
<<
"threads: "
<<
threads
<<
", exceed the big cores size: "
<<
big_core_size
;
active_ids_
=
dev
.
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
dev
.
big_core_ids_
[
i
]);
}
}
}
else
{
mode_
=
LITE_POWER_LOW
;
LOG
(
ERROR
)
<<
"HIGH POWER MODE is not support, switch to little cores"
;
if
(
threads
>
small_core_size
)
{
active_ids_
=
dev
.
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
dev
.
little_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_LOW
:
active_ids_
.
clear
();
if
(
small_core_size
>
0
)
{
mode_
=
LITE_POWER_LOW
;
if
(
threads
>
small_core_size
)
{
LOG
(
WARNING
)
<<
"threads: "
<<
threads
<<
", exceed the little cores size: "
<<
small_core_size
;
active_ids_
=
dev
.
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
dev
.
little_core_ids_
[
i
]);
}
}
}
else
{
mode_
=
LITE_POWER_HIGH
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores"
;
if
(
threads
>
big_core_size
)
{
active_ids_
=
dev
.
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
dev
.
big_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_NO_BIND
:
mode_
=
LITE_POWER_NO_BIND
;
active_ids_
.
clear
();
if
(
threads
>
dev
.
core_ids_
.
size
())
{
active_ids_
.
resize
(
dev
.
core_ids_
.
size
());
}
else
{
active_ids_
.
resize
(
threads
);
}
break
;
case
LITE_POWER_RAND_HIGH
:
active_ids_
.
clear
();
if
(
big_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_HIGH
;
if
(
threads
>
big_core_size
)
{
LOG
(
WARNING
)
<<
"threads: "
<<
threads
<<
", exceed the big cores size: "
<<
big_core_size
;
active_ids_
=
dev
.
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
dev
.
big_core_ids_
[(
i
+
shift_num
)
%
big_core_size
]);
}
}
}
else
{
mode_
=
LITE_POWER_LOW
;
LOG
(
WARNING
)
<<
"HIGH POWER MODE is not support, switch to little cores"
;
if
(
threads
>
small_core_size
)
{
active_ids_
=
dev
.
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
dev
.
little_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_RAND_LOW
:
active_ids_
.
clear
();
if
(
small_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_LOW
;
if
(
threads
>
small_core_size
)
{
LOG
(
WARNING
)
<<
"threads: "
<<
threads
<<
", exceed the little cores size: "
<<
small_core_size
;
active_ids_
=
dev
.
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
dev
.
little_core_ids_
[(
i
+
shift_num
)
%
small_core_size
]);
}
}
}
else
{
mode_
=
LITE_POWER_HIGH
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores"
;
if
(
threads
>
big_core_size
)
{
active_ids_
=
dev
.
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
dev
.
big_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
}
//! fix multi-threads LITE_POWER_HIGH mode
if
(
mode_
==
LITE_POWER_NO_BIND
||
threads
>
1
)
{
int
threads
=
active_ids_
.
size
();
omp_set_num_threads
(
threads
);
}
else
{
if
(
check_online
(
active_ids_
))
{
BindDev
();
}
else
{
LOG
(
ERROR
)
<<
"core id "
<<
active_ids_
[
0
]
<<
" is offline, switch to NO BIND MODE"
;
int
threads
=
active_ids_
.
size
();
omp_set_num_threads
(
threads
);
}
}
#else
if
(
big_core_size
>
0
)
{
active_ids_
=
{
dev
.
big_core_ids_
[
0
]};
}
else
{
active_ids_
=
{
0
};
}
#endif
//! alloc memory for sgemm in this context
int
temp_mem_size
=
DeviceInfo
::
Global
().
L2_cache_
[
active_ids_
[
0
]]
/
sizeof
(
float
);
workspace_
.
Resize
({
temp_mem_size
});
arch_
=
DeviceInfo
::
Global
().
archs_
[
active_ids_
[
0
]];
}
ARMArch
Context
<
TargetType
::
kARM
>::
arch
()
const
{
return
arch_
;
}
void
Context
<
TargetType
::
kARM
>::
SetArch
(
ARMArch
arch
)
{
arch_
=
arch
;
}
int
Context
<
TargetType
::
kARM
>::
l1_cache_size
()
const
{
DeviceInfo
&
dev
=
DeviceInfo
::
Global
();
return
dev
.
L1_cache_
[
active_ids_
[
0
]];
}
int
Context
<
TargetType
::
kARM
>::
l2_cache_size
()
const
{
DeviceInfo
&
dev
=
DeviceInfo
::
Global
();
return
dev
.
L2_cache_
[
active_ids_
[
0
]];
}
int
Context
<
TargetType
::
kARM
>::
l3_cache_size
()
const
{
DeviceInfo
&
dev
=
DeviceInfo
::
Global
();
return
dev
.
L3_cache_
[
active_ids_
[
0
]];
}
bool
Context
<
TargetType
::
kARM
>::
ExtendWorkspace
(
DDimLite
dims
)
{
auto
count
=
dims
.
product
();
auto
old
=
workspace_
.
dims
();
if
(
count
==
old
.
product
())
{
return
false
;
}
workspace_
.
Resize
(
{
static_cast
<
int64_t
>
(
count
+
l2_cache_size
()
/
sizeof
(
float
))});
return
true
;
}
#endif // LITE_WITH_ARM
}
// namespace lite
namespace
lite
{}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/core/context.h
浏览文件 @
c8d89a2b
...
...
@@ -61,47 +61,42 @@ class Context<TargetType::kHost> {
template
<
>
class
Context
<
TargetType
::
kARM
>
{
public:
Context
();
Context
(
PowerMode
mode
,
int
threads
);
Context
()
{}
explicit
Context
(
const
ARMContext
&
ctx
);
ARMContext
&
operator
=
(
const
ARMContext
&
ctx
)
;
ARMContext
&
operator
=
(
const
ARMContext
&
ctx
)
{}
// NOTE: InitOnce should only be used by ContextScheduler
void
InitOnce
()
{
DeviceInfo
::
Init
();
}
void
InitOnce
()
{}
void
CopyShared
(
const
ARMContext
*
ctx
)
{}
void
SetRunMode
(
PowerMode
mode
,
int
threads
);
void
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
);
void
SetArch
(
ARMArch
arch
);
void
BindDev
();
void
SetRunMode
(
PowerMode
mode
,
int
threads
)
{
return
DeviceInfo
::
Global
().
SetRunMode
(
mode
,
threads
);
}
void
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
)
{
return
DeviceInfo
::
Global
().
SetCache
(
l1size
,
l2size
,
l3size
);
}
void
SetArch
(
ARMArch
arch
)
{
return
DeviceInfo
::
Global
().
SetArch
(
arch
);
}
void
BindDev
()
{
return
DeviceInfo
::
Global
().
BindDev
();
}
PowerMode
mode
()
const
;
int
threads
()
const
;
ARMArch
arch
()
const
;
PowerMode
mode
()
const
{
return
DeviceInfo
::
Global
().
mode
();
}
int
threads
()
const
{
return
DeviceInfo
::
Global
().
threads
();
}
ARMArch
arch
()
const
{
return
DeviceInfo
::
Global
().
arch
();
}
template
<
typename
T
>
T
*
workspace_data
()
{
return
workspace_
.
mutabl
e_data
<
T
>
();
return
DeviceInfo
::
Global
().
workspac
e_data
<
T
>
();
}
int
l1_cache_size
()
const
;
int
l2_cache_size
()
const
;
int
l3_cache_size
()
const
;
bool
ExtendWorkspace
(
DDimLite
dims
);
int
l1_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l1_cache_size
();
}
int
l2_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l2_cache_size
();
}
int
l3_cache_size
()
const
{
return
DeviceInfo
::
Global
().
l3_cache_size
();
}
bool
ExtendWorkspace
(
DDimLite
dims
)
{
return
DeviceInfo
::
Global
().
ExtendWorkspace
(
dims
);
}
std
::
string
name
()
const
{
return
"ARMContext"
;
}
private:
// LITE_POWER_HIGH stands for using big cores,
// LITE_POWER_LOW stands for using small core,
// LITE_POWER_FULL stands for using all cores
ARMArch
arch_
;
PowerMode
mode_
;
std
::
vector
<
int
>
active_ids_
;
TensorLite
workspace_
;
int64_t
count_
{
0
};
};
#endif
...
...
paddle/fluid/lite/core/cpu_info.cc
浏览文件 @
c8d89a2b
...
...
@@ -12,8 +12,24 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef LITE_WITH_LINUX
#include <sys/syscall.h>
#include <unistd.h>
#endif
#if __APPLE__
#include "TargetConditionals.h"
#if TARGET_OS_IPHONE
#include <mach/machine.h>
#include <sys/sysctl.h>
#include <sys/types.h>
#endif // TARGET_OS_IPHONE
#endif // __APPLE__
#ifdef ARM_WITH_OMP
#include <omp.h>
#endif
#include "paddle/fluid/lite/core/cpu_info.h"
#include <cstdarg>
namespace
paddle
{
namespace
lite
{
...
...
@@ -73,6 +89,252 @@ void DeviceInfo::InitInternal(DeviceInfo* dev) {
#elif defined(TARGET_IOS)
arm_get_cpu_arch
(
&
dev
->
archs_
);
#endif
dev
->
active_ids_
=
{
0
};
dev
->
mode_
=
LITE_POWER_HIGH
;
dev
->
workspace_
.
Resize
({
static_cast
<
int64_t
>
(
dev
->
L2_cache_
[
dev
->
active_ids_
[
0
]]
/
sizeof
(
float
))});
#ifdef TARGET_IOS
dev
->
arch_
=
APPLE
;
// use 6x8
#else
if
(
dev
->
big_core_ids_
.
size
()
>
0
)
{
dev
->
arch_
=
dev
->
archs_
[
dev
->
big_core_ids_
[
0
]];
}
#endif
}
void
DeviceInfo
::
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
)
{
int
cpu_count
=
arm_get_cpucount
();
L1_cache_
.
resize
(
cpu_count
);
L2_cache_
.
resize
(
cpu_count
);
L3_cache_
.
resize
(
cpu_count
);
for
(
int
i
=
0
;
i
<
cpu_count
;
++
i
)
{
L1_cache_
[
i
]
=
l1size
;
L2_cache_
[
i
]
=
l2size
;
L3_cache_
[
i
]
=
l3size
;
}
workspace_
.
Resize
({
2
*
(
l1size
+
l2size
)});
}
void
DeviceInfo
::
BindDev
()
{
#ifdef ARM_WITH_OMP
int
num_threads
=
active_ids_
.
size
();
omp_set_num_threads
(
num_threads
);
#ifdef LITE_WITH_LINUX
std
::
vector
<
int
>
ssarets
;
for
(
int
j
=
0
;
j
<
num_threads
;
++
j
)
{
ssarets
.
push_back
(
0
);
}
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
num_threads
;
i
++
)
{
ssarets
[
i
]
=
set_sched_affinity
(
active_ids_
);
}
for
(
int
i
=
0
;
i
<
num_threads
;
i
++
)
{
if
(
ssarets
[
i
]
!=
0
)
{
LOG
(
ERROR
)
<<
"set cpu affinity failed, cpuID: "
<<
active_ids_
[
i
];
return
;
}
}
#endif // LITE_WITH_LINUX
#else // ARM_WITH_OMP
#ifdef LITE_WITH_LINUX
std
::
vector
<
int
>
cpuid1
;
cpuid1
.
push_back
(
active_ids_
[
0
]);
int
ssaret
=
set_sched_affinity
(
cpuid1
);
if
(
ssaret
!=
0
)
{
printf
(
"set cpu affinity failed, cpuID: %d
\n
"
,
active_ids_
[
0
]);
return
;
}
#endif // LITE_WITH_LINUX
#endif // ARM_WITH_OMP
}
void
DeviceInfo
::
SetRunMode
(
PowerMode
mode
,
int
threads
)
{
LOG
(
INFO
)
<<
"ARM SetRunMode called"
;
int
big_core_size
=
big_core_ids_
.
size
();
int
small_core_size
=
little_core_ids_
.
size
();
if
(
threads
>
big_core_size
+
small_core_size
)
{
threads
=
big_core_size
+
small_core_size
;
}
#ifdef ARM_WITH_OMP
count_
++
;
int
shift_num
=
(
count_
/
10
)
%
big_core_size
;
switch
(
mode
)
{
case
LITE_POWER_FULL
:
mode_
=
mode
;
active_ids_
.
clear
();
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
if
(
i
<
big_core_size
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
else
{
active_ids_
.
push_back
(
little_core_ids_
[
i
-
big_core_size
]);
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_HIGH
:
active_ids_
.
clear
();
if
(
big_core_size
>
0
)
{
mode_
=
LITE_POWER_HIGH
;
if
(
threads
>
big_core_size
)
{
LOG
(
ERROR
)
<<
"threads: "
<<
threads
<<
", exceed the big cores size: "
<<
big_core_size
;
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
}
else
{
mode_
=
LITE_POWER_LOW
;
LOG
(
ERROR
)
<<
"HIGH POWER MODE is not support, switch to little cores."
;
if
(
threads
>
small_core_size
)
{
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_LOW
:
active_ids_
.
clear
();
if
(
small_core_size
>
0
)
{
mode_
=
LITE_POWER_LOW
;
if
(
threads
>
small_core_size
)
{
LOG
(
WARNING
)
<<
"threads: "
<<
threads
<<
", exceed the little cores size: "
<<
small_core_size
;
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
}
else
{
mode_
=
LITE_POWER_HIGH
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores"
;
if
(
threads
>
big_core_size
)
{
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_NO_BIND
:
mode_
=
LITE_POWER_NO_BIND
;
active_ids_
.
clear
();
if
(
threads
>
core_ids_
.
size
())
{
active_ids_
.
resize
(
core_ids_
.
size
());
}
else
{
active_ids_
.
resize
(
threads
);
}
break
;
case
LITE_POWER_RAND_HIGH
:
active_ids_
.
clear
();
if
(
big_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_HIGH
;
if
(
threads
>
big_core_size
)
{
LOG
(
WARNING
)
<<
"threads: "
<<
threads
<<
", exceed the big cores size: "
<<
big_core_size
;
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[(
i
+
shift_num
)
%
big_core_size
]);
}
}
}
else
{
mode_
=
LITE_POWER_LOW
;
LOG
(
WARNING
)
<<
"HIGH POWER MODE is not support, switch to little cores."
;
if
(
threads
>
small_core_size
)
{
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
case
LITE_POWER_RAND_LOW
:
active_ids_
.
clear
();
if
(
small_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_LOW
;
if
(
threads
>
small_core_size
)
{
LOG
(
WARNING
)
<<
"threads: "
<<
threads
<<
", exceed the little cores size: "
<<
small_core_size
;
active_ids_
=
little_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
little_core_ids_
[(
i
+
shift_num
)
%
small_core_size
]);
}
}
}
else
{
mode_
=
LITE_POWER_HIGH
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores."
;
if
(
threads
>
big_core_size
)
{
active_ids_
=
big_core_ids_
;
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
active_ids_
.
push_back
(
big_core_ids_
[
i
]);
}
}
}
if
(
active_ids_
.
size
()
==
0
)
{
active_ids_
.
push_back
(
0
);
}
break
;
}
//! fix multi-threads LITE_POWER_HIGH mode
if
(
mode_
==
LITE_POWER_NO_BIND
||
threads
>
1
)
{
int
threads
=
active_ids_
.
size
();
omp_set_num_threads
(
threads
);
}
else
{
if
(
check_online
(
active_ids_
))
{
BindDev
();
}
else
{
LOG
(
WARNING
)
<<
"core id "
<<
active_ids_
[
0
]
<<
" is offline, switch to NO BIND MODE"
;
int
threads
=
active_ids_
.
size
();
omp_set_num_threads
(
threads
);
}
}
#else
if
(
big_core_size
>
0
)
{
active_ids_
=
{
big_core_ids_
[
0
]};
}
else
{
active_ids_
=
{
0
};
}
#endif
//! alloc memory for sgemm in this context
int
temp_mem_size
=
L2_cache_
[
active_ids_
[
0
]]
/
sizeof
(
float
);
workspace_
.
Resize
({
temp_mem_size
});
arch_
=
archs_
[
active_ids_
[
0
]];
}
bool
DeviceInfo
::
ExtendWorkspace
(
DDimLite
dims
)
{
auto
count
=
dims
.
product
();
auto
old
=
workspace_
.
dims
();
if
(
count
==
old
.
product
())
{
return
false
;
}
workspace_
.
Resize
({
static_cast
<
int64_t
>
(
count
+
L2_cache_
[
active_ids_
[
0
]]
/
sizeof
(
float
))});
return
true
;
}
// cache_id : 0 -> L1, 1 -> L2, 2 -> L3
...
...
paddle/fluid/lite/core/cpu_info.h
浏览文件 @
c8d89a2b
...
...
@@ -16,22 +16,9 @@
#include <string>
#include <vector>
#include "paddle/fluid/lite/core/lite_tensor.h"
#include "paddle/fluid/lite/utils/cp_logging.h"
#ifdef LITE_WITH_LINUX
#include <sys/syscall.h>
#include <unistd.h>
#endif
#if __APPLE__
#include "TargetConditionals.h"
#if TARGET_OS_IPHONE
#include <mach/machine.h>
#include <sys/sysctl.h>
#include <sys/types.h>
#endif // TARGET_OS_IPHONE
#endif // __APPLE__
namespace
paddle
{
namespace
lite
{
...
...
@@ -80,6 +67,15 @@ class DeviceInfo {
std
::
vector
<
int
>
cluster_ids_
;
std
::
vector
<
ARMArch
>
archs_
;
ARMArch
arch_
;
// LITE_POWER_HIGH stands for using big cores,
// LITE_POWER_LOW stands for using small core,
// LITE_POWER_FULL stands for using all cores
PowerMode
mode_
;
std
::
vector
<
int
>
active_ids_
;
TensorLite
workspace_
;
int64_t
count_
{
0
};
static
DeviceInfo
&
Global
()
{
static
auto
*
x
=
new
DeviceInfo
;
return
*
x
;
...
...
@@ -90,6 +86,25 @@ class DeviceInfo {
InitInternal
(
&
info
);
}
void
SetRunMode
(
PowerMode
mode
,
int
threads
);
void
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
);
void
SetArch
(
ARMArch
arch
)
{
arch_
=
arch
;
}
void
BindDev
();
PowerMode
mode
()
const
{
return
mode_
;
}
int
threads
()
const
{
return
active_ids_
.
size
();
}
ARMArch
arch
()
const
{
return
arch_
;
}
template
<
typename
T
>
T
*
workspace_data
()
{
return
workspace_
.
mutable_data
<
T
>
();
}
int
l1_cache_size
()
const
{
return
L1_cache_
[
active_ids_
[
0
]];
}
int
l2_cache_size
()
const
{
return
L2_cache_
[
active_ids_
[
0
]];
}
int
l3_cache_size
()
const
{
return
L3_cache_
[
active_ids_
[
0
]];
}
bool
ExtendWorkspace
(
DDimLite
dims
);
private:
DeviceInfo
()
=
default
;
static
void
InitInternal
(
DeviceInfo
*
dev
);
...
...
paddle/fluid/lite/kernels/arm/conv_compute.cc
浏览文件 @
c8d89a2b
...
...
@@ -28,8 +28,6 @@ void ConvCompute::PrepareForRun() {
auto
o_dims
=
param
.
output
->
dims
();
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
// TODO(xxx): make api and expose it
ctx
.
SetRunMode
(
LITE_POWER_HIGH
,
4
);
int
win
=
x_dims
[
3
];
// nchw
int
hin
=
x_dims
[
2
];
...
...
paddle/fluid/lite/kernels/arm/fc_compute.cc
浏览文件 @
c8d89a2b
...
...
@@ -28,7 +28,6 @@ void FcCompute::PrepareForRun() {
auto
w_dims
=
param
.
w
->
dims
();
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
ctx
.
SetRunMode
(
LITE_POWER_HIGH
,
4
);
CHECK_GE
(
x_dims
.
size
(),
2UL
);
CHECK_EQ
(
w_dims
.
size
(),
2UL
);
...
...
paddle/fluid/lite/kernels/arm/mul_compute.cc
浏览文件 @
c8d89a2b
...
...
@@ -24,7 +24,6 @@ namespace arm {
void
MulCompute
::
PrepareForRun
()
{
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
ctx
.
SetRunMode
(
LITE_POWER_HIGH
,
4
);
}
void
MulCompute
::
Run
()
{
...
...
paddle/fluid/lite/kernels/arm/pool_compute.cc
浏览文件 @
c8d89a2b
...
...
@@ -26,7 +26,6 @@ namespace arm {
void
PoolCompute
::
PrepareForRun
()
{
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
ctx
.
SetRunMode
(
LITE_POWER_HIGH
,
4
);
}
void
PoolCompute
::
Run
()
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录