Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
8430a0e2
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
8430a0e2
编写于
4月 22, 2019
作者:
L
luxuhui
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix sched_setaffinity'bug and opt the performance of threadpool
N/A Signed-off-by:
N
Luxuhui
<
luxuhui@xiaomi.com
>
上级
6c178680
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
220 addition
and
145 deletion
+220
-145
mace/core/runtime/cpu/cpu_runtime.cc
mace/core/runtime/cpu/cpu_runtime.cc
+10
-52
mace/port/android/env.cc
mace/port/android/env.cc
+36
-23
mace/port/android/env.h
mace/port/android/env.h
+1
-1
mace/port/darwin/env.cc
mace/port/darwin/env.cc
+47
-8
mace/port/darwin/env.h
mace/port/darwin/env.h
+2
-0
mace/port/linux/env.cc
mace/port/linux/env.cc
+11
-0
mace/port/linux/env.h
mace/port/linux/env.h
+1
-0
mace/port/linux_base/env.cc
mace/port/linux_base/env.cc
+22
-1
mace/port/linux_base/env.h
mace/port/linux_base/env.h
+1
-0
mace/utils/thread_pool.cc
mace/utils/thread_pool.cc
+84
-60
mace/utils/thread_pool.h
mace/utils/thread_pool.h
+5
-0
未找到文件。
mace/core/runtime/cpu/cpu_runtime.cc
浏览文件 @
8430a0e2
...
@@ -31,16 +31,12 @@
...
@@ -31,16 +31,12 @@
#include "mace/public/mace.h"
#include "mace/public/mace.h"
#include "mace/utils/macros.h"
#include "mace/utils/macros.h"
#include "mace/utils/logging.h"
#include "mace/utils/logging.h"
#include "mace/utils/thread_pool.h"
namespace
mace
{
namespace
mace
{
int
MaceOpenMPThreadCount
=
1
;
int
MaceOpenMPThreadCount
=
1
;
struct
CPUFreq
{
size_t
core_id
;
float
freq
;
};
enum
SchedulePolicy
{
enum
SchedulePolicy
{
SCHED_STATIC
,
SCHED_STATIC
,
SCHED_GUIDED
,
SCHED_GUIDED
,
...
@@ -105,28 +101,12 @@ MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy(
...
@@ -105,28 +101,12 @@ MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy(
return
MaceStatus
::
MACE_RUNTIME_ERROR
;
return
MaceStatus
::
MACE_RUNTIME_ERROR
;
}
}
std
::
vector
<
CPUFreq
>
cpu_freq
(
cpu_max_freqs
.
size
());
std
::
vector
<
size_t
>
cores_to_use
;
for
(
size_t
i
=
0
;
i
<
cpu_max_freqs
.
size
();
++
i
)
{
MACE_RETURN_IF_ERROR
(
cpu_freq
[
i
].
core_id
=
i
;
mace
::
utils
::
GetCPUCoresToUse
(
cpu_freq
[
i
].
freq
=
cpu_max_freqs
[
i
];
cpu_max_freqs
,
policy
,
num_threads_hint
,
&
cores_to_use
));
}
if
(
policy
==
CPUAffinityPolicy
::
AFFINITY_POWER_SAVE
||
policy
==
CPUAffinityPolicy
::
AFFINITY_LITTLE_ONLY
)
{
std
::
sort
(
cpu_freq
.
begin
(),
cpu_freq
.
end
(),
[
=
](
const
CPUFreq
&
lhs
,
const
CPUFreq
&
rhs
)
{
return
lhs
.
freq
<
rhs
.
freq
;
});
}
else
if
(
policy
==
CPUAffinityPolicy
::
AFFINITY_HIGH_PERFORMANCE
||
policy
==
CPUAffinityPolicy
::
AFFINITY_BIG_ONLY
)
{
std
::
sort
(
cpu_freq
.
begin
(),
cpu_freq
.
end
(),
[](
const
CPUFreq
&
lhs
,
const
CPUFreq
&
rhs
)
{
return
lhs
.
freq
>
rhs
.
freq
;
});
}
int
cpu_count
=
static_cast
<
int
>
(
c
pu_freq
.
size
());
int
cpu_count
=
static_cast
<
int
>
(
c
ores_to_use
.
size
());
if
(
num_threads_hint
<=
0
||
num_threads_hint
>
cpu_count
)
{
if
(
num_threads_hint
<=
0
||
num_threads_hint
>
cpu_count
)
{
num_threads_hint
=
cpu_count
;
num_threads_hint
=
cpu_count
;
}
}
...
@@ -148,32 +128,10 @@ MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy(
...
@@ -148,32 +128,10 @@ MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy(
return
MaceStatus
::
MACE_SUCCESS
;
return
MaceStatus
::
MACE_SUCCESS
;
}
}
// decide num of cores to use
int
cores_to_use
=
0
;
if
(
policy
==
CPUAffinityPolicy
::
AFFINITY_BIG_ONLY
||
policy
==
CPUAffinityPolicy
::
AFFINITY_LITTLE_ONLY
)
{
for
(
size_t
i
=
0
;
i
<
cpu_max_freqs
.
size
();
++
i
)
{
if
(
cpu_freq
[
i
].
freq
!=
cpu_freq
[
0
].
freq
)
{
break
;
}
++
cores_to_use
;
}
num_threads_hint
=
std
::
min
(
num_threads_hint
,
cores_to_use
);
}
else
{
cores_to_use
=
num_threads_hint
;
}
MACE_CHECK
(
cores_to_use
>
0
,
"number of cores to use should > 0"
);
VLOG
(
2
)
<<
"Use "
<<
num_threads_hint
<<
" threads"
;
std
::
vector
<
size_t
>
cpu_ids
(
cores_to_use
);
for
(
int
i
=
0
;
i
<
cores_to_use
;
++
i
)
{
VLOG
(
2
)
<<
"Bind thread to core: "
<<
cpu_freq
[
i
].
core_id
<<
" with freq "
<<
cpu_freq
[
i
].
freq
;
cpu_ids
[
i
]
=
cpu_freq
[
i
].
core_id
;
}
SchedulePolicy
sched_policy
=
SCHED_GUIDED
;
SchedulePolicy
sched_policy
=
SCHED_GUIDED
;
if
(
std
::
abs
(
cpu_freq
[
0
].
freq
-
cpu_freq
[
cores_to_use
-
1
].
freq
)
<
1e-6
)
{
float
first_freq
=
cpu_max_freqs
[
cores_to_use
[
0
]];
float
last_freq
=
cpu_max_freqs
[
cores_to_use
[
cores_to_use
.
size
()
-
1
]];
if
(
std
::
abs
(
first_freq
-
last_freq
)
<
1e-6
)
{
sched_policy
=
SCHED_STATIC
;
sched_policy
=
SCHED_STATIC
;
}
}
...
@@ -185,7 +143,7 @@ MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy(
...
@@ -185,7 +143,7 @@ MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy(
#endif // MACE_ENABLE_QUANTIZE
#endif // MACE_ENABLE_QUANTIZE
return
SetOpenMPThreadsAndAffinityCPUs
(
num_threads_hint
,
return
SetOpenMPThreadsAndAffinityCPUs
(
num_threads_hint
,
c
pu_ids
,
c
ores_to_use
,
sched_policy
);
sched_policy
);
}
}
...
...
mace/port/android/env.cc
浏览文件 @
8430a0e2
...
@@ -17,7 +17,6 @@
...
@@ -17,7 +17,6 @@
#include <errno.h>
#include <errno.h>
#include <unwind.h>
#include <unwind.h>
#include <dlfcn.h>
#include <dlfcn.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/types.h>
...
@@ -50,46 +49,60 @@ LogWriter *AndroidEnv::GetLogWriter() {
...
@@ -50,46 +49,60 @@ LogWriter *AndroidEnv::GetLogWriter() {
namespace
{
namespace
{
struct
BacktraceState
{
struct
BacktraceState
{
void
**
current
;
void
**
current
;
void
**
end
;
void
**
end
;
};
};
_Unwind_Reason_Code
UnwindCallback
(
struct
_Unwind_Context
*
context
,
void
*
arg
)
{
_Unwind_Reason_Code
UnwindCallback
(
struct
_Unwind_Context
*
context
,
void
*
arg
)
{
BacktraceState
*
state
=
static_cast
<
BacktraceState
*>
(
arg
);
BacktraceState
*
state
=
static_cast
<
BacktraceState
*>
(
arg
);
uintptr_t
pc
=
_Unwind_GetIP
(
context
);
uintptr_t
pc
=
_Unwind_GetIP
(
context
);
if
(
pc
)
{
if
(
pc
)
{
if
(
state
->
current
==
state
->
end
)
{
if
(
state
->
current
==
state
->
end
)
{
return
_URC_END_OF_STACK
;
return
_URC_END_OF_STACK
;
}
else
{
}
else
{
*
state
->
current
++
=
reinterpret_cast
<
void
*>
(
pc
);
*
state
->
current
++
=
reinterpret_cast
<
void
*>
(
pc
);
}
}
}
}
return
_URC_NO_REASON
;
return
_URC_NO_REASON
;
}
}
size_t
BackTrace
(
void
**
buffer
,
size_t
max
)
{
size_t
BackTrace
(
void
**
buffer
,
size_t
max
)
{
BacktraceState
state
=
{
buffer
,
buffer
+
max
};
BacktraceState
state
=
{
buffer
,
buffer
+
max
};
_Unwind_Backtrace
(
UnwindCallback
,
&
state
);
_Unwind_Backtrace
(
UnwindCallback
,
&
state
);
return
state
.
current
-
buffer
;
return
state
.
current
-
buffer
;
}
}
bool
CpuIsolate
(
size_t
cpu_id
)
{
std
::
string
cpuinfo_isolate_conf
=
MakeString
(
"/sys/devices/system/cpu/cpu"
,
cpu_id
,
"/isolate"
);
std
::
ifstream
isolate_file
(
cpuinfo_isolate_conf
);
int
isolate_switch
=
0
;
if
(
isolate_file
.
is_open
())
{
std
::
string
line
;
if
(
std
::
getline
(
isolate_file
,
line
))
{
isolate_switch
=
strtol
(
line
.
c_str
(),
nullptr
,
0
);
}
isolate_file
.
close
();
}
return
(
isolate_switch
!=
0
);
}
}
// namespace
}
// namespace
MaceStatus
AndroidEnv
::
SchedSetAffinity
(
const
std
::
vector
<
size_t
>
&
cpu_ids
)
{
MaceStatus
AndroidEnv
::
GetCPUMaxFreq
(
std
::
vector
<
float
>
*
max_freqs
)
{
// compute mask
MACE_RETURN_IF_ERROR
(
LinuxBaseEnv
::
GetCPUMaxFreq
(
max_freqs
));
cpu_set_t
mask
;
CPU_ZERO
(
&
mask
);
size_t
cpu_num
=
(
max_freqs
!=
nullptr
)
?
max_freqs
->
size
()
:
0
;
for
(
auto
cpu_id
:
cpu_ids
)
{
if
(
cpu_num
>
0
)
{
CPU_SET
(
cpu_id
,
&
mask
);
for
(
size_t
i
=
0
;
i
<
cpu_num
;
++
i
)
{
}
if
(
CpuIsolate
(
i
))
{
pid_t
pid
=
gettid
();
(
*
max_freqs
)[
i
]
=
0
;
int
err
=
sched_setaffinity
(
pid
,
sizeof
(
mask
),
&
mask
);
}
if
(
err
)
{
}
LOG
(
WARNING
)
<<
"SchedSetAffinity failed: "
<<
strerror
(
errno
);
return
MaceStatus
(
MaceStatus
::
MACE_INVALID_ARGS
,
"SchedSetAffinity failed: "
+
std
::
string
(
strerror
(
errno
)));
}
}
return
MaceStatus
::
MACE_SUCCESS
;
return
MaceStatus
::
MACE_SUCCESS
;
...
@@ -103,8 +116,8 @@ std::vector<std::string> AndroidEnv::GetBackTraceUnsafe(int max_steps) {
...
@@ -103,8 +116,8 @@ std::vector<std::string> AndroidEnv::GetBackTraceUnsafe(int max_steps) {
for
(
int
i
=
0
;
i
<
steps
;
++
i
)
{
for
(
int
i
=
0
;
i
<
steps
;
++
i
)
{
std
::
ostringstream
os
;
std
::
ostringstream
os
;
const
void
*
addr
=
buffer
[
i
];
const
void
*
addr
=
buffer
[
i
];
const
char
*
symbol
=
""
;
const
char
*
symbol
=
""
;
Dl_info
info
;
Dl_info
info
;
if
(
dladdr
(
addr
,
&
info
)
&&
info
.
dli_sname
)
{
if
(
dladdr
(
addr
,
&
info
)
&&
info
.
dli_sname
)
{
symbol
=
info
.
dli_sname
;
symbol
=
info
.
dli_sname
;
...
...
mace/port/android/env.h
浏览文件 @
8430a0e2
...
@@ -29,8 +29,8 @@ namespace port {
...
@@ -29,8 +29,8 @@ namespace port {
class
AndroidEnv
:
public
LinuxBaseEnv
{
class
AndroidEnv
:
public
LinuxBaseEnv
{
public:
public:
MaceStatus
SchedSetAffinity
(
const
std
::
vector
<
size_t
>
&
cpu_ids
)
override
;
LogWriter
*
GetLogWriter
()
override
;
LogWriter
*
GetLogWriter
()
override
;
MaceStatus
GetCPUMaxFreq
(
std
::
vector
<
float
>
*
max_freqs
)
override
;
std
::
vector
<
std
::
string
>
GetBackTraceUnsafe
(
int
max_steps
)
override
;
std
::
vector
<
std
::
string
>
GetBackTraceUnsafe
(
int
max_steps
)
override
;
std
::
unique_ptr
<
MallocLogger
>
NewMallocLogger
(
std
::
unique_ptr
<
MallocLogger
>
NewMallocLogger
(
std
::
ostringstream
*
oss
,
std
::
ostringstream
*
oss
,
...
...
mace/port/darwin/env.cc
浏览文件 @
8430a0e2
...
@@ -15,6 +15,8 @@
...
@@ -15,6 +15,8 @@
#include "mace/port/darwin/env.h"
#include "mace/port/darwin/env.h"
#include <execinfo.h>
#include <execinfo.h>
#include <mach/thread_act.h>
#include <mach/thread_policy.h>
#include <stdint.h>
#include <stdint.h>
#include <sys/sysctl.h>
#include <sys/sysctl.h>
#include <sys/time.h>
#include <sys/time.h>
...
@@ -33,27 +35,64 @@ namespace mace {
...
@@ -33,27 +35,64 @@ namespace mace {
namespace
port
{
namespace
port
{
namespace
{
namespace
{
const
char
kCpuFrequencyMax
[]
=
"hw.cpufrequency_max"
;
constexpr
const
char
kCpuFrequencyMax
[]
=
"hw.cpufrequency_max"
;
constexpr
const
char
kCpuActiveNum
[]
=
"hw.activecpu"
;
}
}
int64_t
DarwinEnv
::
NowMicros
()
{
int64_t
DarwinEnv
::
NowMicros
()
{
return
mace
::
port
::
posix
::
NowMicros
();
return
mace
::
port
::
posix
::
NowMicros
();
}
}
// TODO(luxuhui): this func is not accurate, darwin does not support
// we can't get the frequancy of every cpu on darwin, so this method
// acquiring CPU frequencies, we need to reconsider the CPU scheduling
// return a fake frequancy data.
// strategy.
MaceStatus
DarwinEnv
::
GetCPUMaxFreq
(
std
::
vector
<
float
>
*
cpu_infos
)
{
MaceStatus
DarwinEnv
::
GetCPUMaxFreq
(
std
::
vector
<
float
>
*
max_freqs
)
{
MACE_CHECK_NOTNULL
(
cpu_infos
);
MACE_CHECK_NOTNULL
(
max_freqs
);
uint64_
t
freq
=
0
;
floa
t
freq
=
0
;
size_t
size
=
sizeof
(
freq
);
size_t
size
=
sizeof
(
freq
);
int
ret
=
sysctlbyname
(
kCpuFrequencyMax
,
&
freq
,
&
size
,
NULL
,
0
);
int
ret
=
sysctlbyname
(
kCpuFrequencyMax
,
&
freq
,
&
size
,
NULL
,
0
);
if
(
ret
<
0
)
{
if
(
ret
<
0
)
{
LOG
(
ERROR
)
<<
"failed to get property: "
<<
kCpuFrequencyMax
;
LOG
(
ERROR
)
<<
"failed to get property: "
<<
kCpuFrequencyMax
;
return
MaceStatus
::
MACE_RUNTIME_ERROR
;
return
MaceStatus
::
MACE_RUNTIME_ERROR
;
}
}
max_freqs
->
push_back
(
freq
);
uint64_t
cpu_num
=
0
;
size
=
sizeof
(
cpu_num
);
ret
=
sysctlbyname
(
kCpuActiveNum
,
&
cpu_num
,
&
size
,
NULL
,
0
);
if
(
ret
<
0
)
{
LOG
(
ERROR
)
<<
"failed to get property: "
<<
kCpuActiveNum
;
return
MaceStatus
::
MACE_RUNTIME_ERROR
;
}
for
(
int
i
=
0
;
i
<
cpu_num
;
++
i
)
{
cpu_infos
->
push_back
(
freq
);
}
return
MaceStatus
::
MACE_SUCCESS
;
}
MaceStatus
DarwinEnv
::
SchedSetAffinity
(
const
std
::
vector
<
size_t
>
&
cpu_ids
)
{
unsigned
int
tag
=
0
;
for
(
size_t
i
=
0
;
i
<
cpu_ids
.
size
();
++
i
)
{
tag
+=
(
cpu_ids
[
i
]
<<
i
);
}
#ifdef MACE_OS_MAC
pthread_t
thread
=
pthread_self
();
mach_port_t
mach_port
=
pthread_mach_thread_np
(
thread
);
thread_affinity_policy_data_t
policy_data
=
{(
integer_t
)
tag
};
int
ret
=
thread_policy_set
(
mach_port
,
THREAD_AFFINITY_POLICY
,
(
thread_policy_t
)
&
policy_data
,
1
);
if
(
ret
)
{
LOG
(
INFO
)
<<
"thread_policy_set failed: "
<<
strerror
(
errno
);
return
MaceStatus
::
MACE_RUNTIME_ERROR
;
}
#endif
return
MaceStatus
::
MACE_SUCCESS
;
return
MaceStatus
::
MACE_SUCCESS
;
}
}
...
...
mace/port/darwin/env.h
浏览文件 @
8430a0e2
...
@@ -20,6 +20,7 @@
...
@@ -20,6 +20,7 @@
#include "mace/port/env.h"
#include "mace/port/env.h"
#include "mace/port/logger.h"
#include "mace/port/logger.h"
#include "mace/port/port-arch.h"
#include "mace/port/posix/file_system.h"
#include "mace/port/posix/file_system.h"
namespace
mace
{
namespace
mace
{
...
@@ -29,6 +30,7 @@ class DarwinEnv : public Env {
...
@@ -29,6 +30,7 @@ class DarwinEnv : public Env {
public:
public:
int64_t
NowMicros
()
override
;
int64_t
NowMicros
()
override
;
MaceStatus
GetCPUMaxFreq
(
std
::
vector
<
float
>
*
max_freqs
)
override
;
MaceStatus
GetCPUMaxFreq
(
std
::
vector
<
float
>
*
max_freqs
)
override
;
MaceStatus
SchedSetAffinity
(
const
std
::
vector
<
size_t
>
&
cpu_ids
)
override
;
FileSystem
*
GetFileSystem
()
override
;
FileSystem
*
GetFileSystem
()
override
;
LogWriter
*
GetLogWriter
()
override
;
LogWriter
*
GetLogWriter
()
override
;
std
::
vector
<
std
::
string
>
GetBackTraceUnsafe
(
int
max_steps
)
override
;
std
::
vector
<
std
::
string
>
GetBackTraceUnsafe
(
int
max_steps
)
override
;
...
...
mace/port/linux/env.cc
浏览文件 @
8430a0e2
...
@@ -25,10 +25,21 @@
...
@@ -25,10 +25,21 @@
#include "mace/port/posix/backtrace.h"
#include "mace/port/posix/backtrace.h"
#include "mace/port/posix/file_system.h"
#include "mace/port/posix/file_system.h"
#include "mace/port/posix/time.h"
#include "mace/port/posix/time.h"
#include "mace/utils/macros.h"
namespace
mace
{
namespace
mace
{
namespace
port
{
namespace
port
{
// In our embedded linux device, SchedSetAffinity has side effects
// on performance, so we override this method to do nothing. You
// can try to comment this function, perhaps you could get a better
// performance as we do in Android devices.
MaceStatus
LinuxEnv
::
SchedSetAffinity
(
const
std
::
vector
<
size_t
>
&
cpu_ids
)
{
MACE_UNUSED
(
cpu_ids
);
return
MaceStatus
::
MACE_SUCCESS
;
}
LogWriter
*
LinuxEnv
::
GetLogWriter
()
{
LogWriter
*
LinuxEnv
::
GetLogWriter
()
{
return
&
log_writer_
;
return
&
log_writer_
;
}
}
...
...
mace/port/linux/env.h
浏览文件 @
8430a0e2
...
@@ -26,6 +26,7 @@ namespace port {
...
@@ -26,6 +26,7 @@ namespace port {
class
LinuxEnv
:
public
LinuxBaseEnv
{
class
LinuxEnv
:
public
LinuxBaseEnv
{
public:
public:
MaceStatus
SchedSetAffinity
(
const
std
::
vector
<
size_t
>
&
cpu_ids
)
override
;
LogWriter
*
GetLogWriter
()
override
;
LogWriter
*
GetLogWriter
()
override
;
std
::
vector
<
std
::
string
>
GetBackTraceUnsafe
(
int
max_steps
)
override
;
std
::
vector
<
std
::
string
>
GetBackTraceUnsafe
(
int
max_steps
)
override
;
...
...
mace/port/linux_base/env.cc
浏览文件 @
8430a0e2
...
@@ -14,7 +14,10 @@
...
@@ -14,7 +14,10 @@
#include "mace/port/linux_base/env.h"
#include "mace/port/linux_base/env.h"
#include <errno.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/time.h>
#include <unistd.h>
#include <cstddef>
#include <cstddef>
#include <fstream>
#include <fstream>
...
@@ -28,7 +31,6 @@
...
@@ -28,7 +31,6 @@
namespace
mace
{
namespace
mace
{
namespace
port
{
namespace
port
{
namespace
{
namespace
{
int
GetCPUCount
()
{
int
GetCPUCount
()
{
...
@@ -100,5 +102,24 @@ MaceStatus LinuxBaseEnv::GetCPUMaxFreq(std::vector<float> *max_freqs) {
...
@@ -100,5 +102,24 @@ MaceStatus LinuxBaseEnv::GetCPUMaxFreq(std::vector<float> *max_freqs) {
return
MaceStatus
::
MACE_SUCCESS
;
return
MaceStatus
::
MACE_SUCCESS
;
}
}
MaceStatus
LinuxBaseEnv
::
SchedSetAffinity
(
const
std
::
vector
<
size_t
>
&
cpu_ids
)
{
cpu_set_t
mask
;
CPU_ZERO
(
&
mask
);
for
(
auto
cpu_id
:
cpu_ids
)
{
CPU_SET
(
cpu_id
,
&
mask
);
}
pid_t
pid
=
syscall
(
SYS_gettid
);
int
err
=
sched_setaffinity
(
pid
,
sizeof
(
mask
),
&
mask
);
if
(
err
)
{
LOG
(
WARNING
)
<<
"SchedSetAffinity failed: "
<<
strerror
(
errno
);
return
MaceStatus
(
MaceStatus
::
MACE_INVALID_ARGS
,
"SchedSetAffinity failed: "
+
std
::
string
(
strerror
(
errno
)));
}
return
MaceStatus
::
MACE_SUCCESS
;
}
}
// namespace port
}
// namespace port
}
// namespace mace
}
// namespace mace
mace/port/linux_base/env.h
浏览文件 @
8430a0e2
...
@@ -28,6 +28,7 @@ class LinuxBaseEnv : public Env {
...
@@ -28,6 +28,7 @@ class LinuxBaseEnv : public Env {
int64_t
NowMicros
()
override
;
int64_t
NowMicros
()
override
;
MaceStatus
GetCPUMaxFreq
(
std
::
vector
<
float
>
*
max_freqs
)
override
;
MaceStatus
GetCPUMaxFreq
(
std
::
vector
<
float
>
*
max_freqs
)
override
;
FileSystem
*
GetFileSystem
()
override
;
FileSystem
*
GetFileSystem
()
override
;
MaceStatus
SchedSetAffinity
(
const
std
::
vector
<
size_t
>
&
cpu_ids
)
override
;
protected:
protected:
PosixFileSystem
posix_file_system_
;
PosixFileSystem
posix_file_system_
;
...
...
mace/utils/thread_pool.cc
浏览文件 @
8430a0e2
...
@@ -13,6 +13,8 @@
...
@@ -13,6 +13,8 @@
// limitations under the License.
// limitations under the License.
#include <algorithm>
#include <algorithm>
#include <numeric>
#include "mace/port/port.h"
#include "mace/port/port.h"
#include "mace/port/env.h"
#include "mace/port/env.h"
#include "mace/utils/logging.h"
#include "mace/utils/logging.h"
...
@@ -26,6 +28,8 @@ namespace utils {
...
@@ -26,6 +28,8 @@ namespace utils {
constexpr
int
kThreadPoolSpinWaitTime
=
2000000
;
// ns
constexpr
int
kThreadPoolSpinWaitTime
=
2000000
;
// ns
constexpr
int
kTileCountPerThread
=
2
;
constexpr
int
kTileCountPerThread
=
2
;
constexpr
int
kMaxCostUsingSingleThread
=
100
;
constexpr
int
kMaxCostUsingSingleThread
=
100
;
constexpr
int
kMinCpuCoresForPerformance
=
3
;
constexpr
int
kMaxCpuCoresForPerformance
=
5
;
namespace
{
namespace
{
...
@@ -42,67 +46,87 @@ struct CPUFreq {
...
@@ -42,67 +46,87 @@ struct CPUFreq {
float
freq
;
float
freq
;
};
};
void
GetCPUCoresToUse
(
const
std
::
vector
<
float
>
&
cpu_max_freqs
,
size_t
GetCpuCoresForPerfomance
(
const
std
::
vector
<
CPUFreq
>
&
cpu_freqs
)
{
const
CPUAffinityPolicy
policy
,
float
total_freq
=
std
::
accumulate
(
cpu_freqs
.
begin
(),
cpu_freqs
.
end
(),
0
,
const
size_t
thread_count_hint
,
[](
float
accum
,
CPUFreq
cpu_freq
)
{
std
::
vector
<
size_t
>
*
cores
)
{
return
accum
+
cpu_freq
.
freq
;
size_t
thread_count
=
thread_count_hint
;
});
if
(
!
cpu_max_freqs
.
empty
())
{
size_t
valid_cpu_nums
=
std
::
count_if
(
cpu_freqs
.
begin
(),
cpu_freqs
.
end
(),
const
size_t
cpu_count
=
cpu_max_freqs
.
size
();
[](
CPUFreq
cpu_freq
)
{
if
(
thread_count
==
0
||
thread_count
>
cpu_count
)
{
return
cpu_freq
.
freq
!=
0
;
thread_count
=
cpu_count
;
});
float
avg_freq
=
total_freq
/
valid_cpu_nums
;
size_t
cores_to_use
=
0
;
for
(
auto
cpu_info
:
cpu_freqs
)
{
if
((
cpu_info
.
freq
>
avg_freq
&&
cores_to_use
<
kMaxCpuCoresForPerformance
)
||
cores_to_use
<
kMinCpuCoresForPerformance
)
{
++
cores_to_use
;
}
}
}
if
(
policy
!=
CPUAffinityPolicy
::
AFFINITY_NONE
)
{
return
cores_to_use
;
std
::
vector
<
CPUFreq
>
cpu_freq
(
cpu_max_freqs
.
size
());
}
for
(
size_t
i
=
0
;
i
<
cpu_max_freqs
.
size
();
++
i
)
{
cpu_freq
[
i
].
core_id
=
i
;
cpu_freq
[
i
].
freq
=
cpu_max_freqs
[
i
];
}
if
(
policy
==
CPUAffinityPolicy
::
AFFINITY_POWER_SAVE
||
policy
==
CPUAffinityPolicy
::
AFFINITY_LITTLE_ONLY
)
{
std
::
sort
(
cpu_freq
.
begin
(),
cpu_freq
.
end
(),
[
=
](
const
CPUFreq
&
lhs
,
const
CPUFreq
&
rhs
)
{
return
lhs
.
freq
<
rhs
.
freq
;
});
}
else
if
(
policy
==
CPUAffinityPolicy
::
AFFINITY_HIGH_PERFORMANCE
||
policy
==
CPUAffinityPolicy
::
AFFINITY_BIG_ONLY
)
{
std
::
sort
(
cpu_freq
.
begin
(),
cpu_freq
.
end
(),
[](
const
CPUFreq
&
lhs
,
const
CPUFreq
&
rhs
)
{
return
lhs
.
freq
>
rhs
.
freq
;
});
}
// decide num of cores to use
}
// namespace
size_t
cores_to_use
=
0
;
if
(
policy
==
CPUAffinityPolicy
::
AFFINITY_BIG_ONLY
MaceStatus
GetCPUCoresToUse
(
const
std
::
vector
<
float
>
&
cpu_max_freqs
,
||
policy
==
CPUAffinityPolicy
::
AFFINITY_LITTLE_ONLY
)
{
const
CPUAffinityPolicy
policy
,
for
(
size_t
i
=
0
;
i
<
cpu_max_freqs
.
size
();
++
i
)
{
const
size_t
thread_count_hint
,
if
(
cpu_freq
[
i
].
freq
!=
cpu_freq
[
0
].
freq
)
{
std
::
vector
<
size_t
>
*
cores
)
{
break
;
if
(
cpu_max_freqs
.
empty
())
{
}
++
cores_to_use
;
}
}
else
{
cores_to_use
=
thread_count
;
}
MACE_CHECK
(
cores_to_use
>
0
,
"number of cores to use should > 0"
);
cores
->
resize
(
cores_to_use
);
for
(
size_t
i
=
0
;
i
<
cores_to_use
;
++
i
)
{
VLOG
(
2
)
<<
"Bind thread to core: "
<<
cpu_freq
[
i
].
core_id
<<
" with freq "
<<
cpu_freq
[
i
].
freq
;
(
*
cores
)[
i
]
=
static_cast
<
int
>
(
cpu_freq
[
i
].
core_id
);
}
}
}
else
{
LOG
(
ERROR
)
<<
"CPU core is empty"
;
LOG
(
ERROR
)
<<
"CPU core is empty"
;
return
MaceStatus
::
MACE_RUNTIME_ERROR
;
}
size_t
thread_count
=
thread_count_hint
;
const
size_t
cpu_count
=
cpu_max_freqs
.
size
();
if
(
thread_count
==
0
||
thread_count
>
cpu_count
)
{
thread_count
=
cpu_count
;
}
}
}
}
// namespace
if
(
policy
!=
CPUAffinityPolicy
::
AFFINITY_NONE
)
{
std
::
vector
<
CPUFreq
>
cpu_freq
(
cpu_max_freqs
.
size
());
for
(
size_t
i
=
0
;
i
<
cpu_max_freqs
.
size
();
++
i
)
{
cpu_freq
[
i
].
core_id
=
i
;
cpu_freq
[
i
].
freq
=
cpu_max_freqs
[
i
];
}
if
(
policy
==
CPUAffinityPolicy
::
AFFINITY_POWER_SAVE
||
policy
==
CPUAffinityPolicy
::
AFFINITY_LITTLE_ONLY
)
{
std
::
sort
(
cpu_freq
.
begin
(),
cpu_freq
.
end
(),
[
=
](
const
CPUFreq
&
lhs
,
const
CPUFreq
&
rhs
)
{
return
lhs
.
freq
<
rhs
.
freq
;
});
}
else
if
(
policy
==
CPUAffinityPolicy
::
AFFINITY_HIGH_PERFORMANCE
||
policy
==
CPUAffinityPolicy
::
AFFINITY_BIG_ONLY
)
{
std
::
sort
(
cpu_freq
.
begin
(),
cpu_freq
.
end
(),
[](
const
CPUFreq
&
lhs
,
const
CPUFreq
&
rhs
)
{
return
lhs
.
freq
>
rhs
.
freq
;
});
}
// decide num of cores to use
size_t
cores_to_use
=
0
;
if
(
policy
==
CPUAffinityPolicy
::
AFFINITY_BIG_ONLY
||
policy
==
CPUAffinityPolicy
::
AFFINITY_LITTLE_ONLY
)
{
cores_to_use
=
GetCpuCoresForPerfomance
(
cpu_freq
);
}
else
{
cores_to_use
=
thread_count
;
}
MACE_CHECK
(
cores_to_use
>
0
,
"number of cores to use should > 0"
);
cores
->
resize
(
cores_to_use
);
for
(
size_t
i
=
0
;
i
<
cores_to_use
;
++
i
)
{
VLOG
(
2
)
<<
"Bind thread to core: "
<<
cpu_freq
[
i
].
core_id
<<
" with freq "
<<
cpu_freq
[
i
].
freq
;
(
*
cores
)[
i
]
=
static_cast
<
int
>
(
cpu_freq
[
i
].
core_id
);
}
}
return
MaceStatus
::
MACE_SUCCESS
;
}
ThreadPool
::
ThreadPool
(
const
size_t
thread_count_hint
,
ThreadPool
::
ThreadPool
(
const
size_t
thread_count_hint
,
const
CPUAffinityPolicy
policy
)
const
CPUAffinityPolicy
policy
)
...
@@ -173,13 +197,13 @@ void ThreadPool::Run(const std::function<void(const int64_t)> &func,
...
@@ -173,13 +197,13 @@ void ThreadPool::Run(const std::function<void(const int64_t)> &func,
std
::
unique_lock
<
std
::
mutex
>
run_lock
(
run_mutex_
);
std
::
unique_lock
<
std
::
mutex
>
run_lock
(
run_mutex_
);
for
(
size_t
i
=
0
;
i
<
thread_count
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
thread_count
;
++
i
)
{
int64_t
count
=
iters_per_thread
+
(
static_cast
<
int64_t
>
(
i
)
<
remainder
);
int64_t
range_len
=
iters_per_thread
+
(
static_cast
<
int64_t
>
(
i
)
<
remainder
);
thread_infos_
[
i
].
range_start
=
iters_offset
;
thread_infos_
[
i
].
range_start
=
iters_offset
;
int64_t
range_end
=
iters_offset
+
count
;
thread_infos_
[
i
].
range_len
=
range_len
;
thread_infos_
[
i
].
range_end
=
range_end
;
thread_infos_
[
i
].
range_end
=
iters_offset
+
range_len
;
thread_infos_
[
i
].
range_len
=
range_end
-
iters_offset
;
thread_infos_
[
i
].
func
=
reinterpret_cast
<
uintptr_t
>
(
&
func
);
thread_infos_
[
i
].
func
=
reinterpret_cast
<
uintptr_t
>
(
&
func
);
iters_offset
+=
thread_infos_
[
i
].
range_len
;
iters_offset
=
thread_infos_
[
i
].
range_end
;
}
}
count_down_latch_
.
Reset
(
thread_count
-
1
);
count_down_latch_
.
Reset
(
thread_count
-
1
);
...
...
mace/utils/thread_pool.h
浏览文件 @
8430a0e2
...
@@ -29,6 +29,11 @@
...
@@ -29,6 +29,11 @@
namespace
mace
{
namespace
mace
{
namespace
utils
{
namespace
utils
{
MaceStatus
GetCPUCoresToUse
(
const
std
::
vector
<
float
>
&
cpu_max_freqs
,
const
CPUAffinityPolicy
policy
,
const
size_t
thread_count_hint
,
std
::
vector
<
size_t
>
*
cores
);
class
ThreadPool
{
class
ThreadPool
{
public:
public:
ThreadPool
(
const
size_t
thread_count
,
ThreadPool
(
const
size_t
thread_count
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录