“8b45c3e65e95ccfb4776d2e227f16949d09bc090”上不存在“paddlespeech/s2t/exps/u2_st/model.py”
提交 09788348 编写于 作者: 李寅

Merge branch 'affinity' into 'master'

Fix CPU AFFINITY_NONE

See merge request !1145
...@@ -104,12 +104,7 @@ MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy( ...@@ -104,12 +104,7 @@ MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy(
std::vector<size_t> cores_to_use; std::vector<size_t> cores_to_use;
MACE_RETURN_IF_ERROR( MACE_RETURN_IF_ERROR(
mace::utils::GetCPUCoresToUse( mace::utils::GetCPUCoresToUse(
cpu_max_freqs, policy, num_threads_hint, &cores_to_use)); cpu_max_freqs, policy, &num_threads_hint, &cores_to_use));
int cpu_count = static_cast<int>(cores_to_use.size());
if (num_threads_hint <= 0 || num_threads_hint > cpu_count) {
num_threads_hint = cpu_count;
}
if (policy == CPUAffinityPolicy::AFFINITY_NONE) { if (policy == CPUAffinityPolicy::AFFINITY_NONE) {
#ifdef MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_QUANTIZE
......
...@@ -46,20 +46,22 @@ struct CPUFreq { ...@@ -46,20 +46,22 @@ struct CPUFreq {
float freq; float freq;
}; };
size_t GetCpuCoresForPerfomance(const std::vector<CPUFreq> &cpu_freqs) { int GetCpuCoresForPerfomance(
const std::vector<CPUFreq> &cpu_freqs,
const std::function<bool(const float &x, const float &y)> &comp) {
float total_freq = std::accumulate(cpu_freqs.begin(), cpu_freqs.end(), 0, float total_freq = std::accumulate(cpu_freqs.begin(), cpu_freqs.end(), 0,
[](float accum, CPUFreq cpu_freq) { [](float accum, CPUFreq cpu_freq) {
return accum + cpu_freq.freq; return accum + cpu_freq.freq;
}); });
size_t valid_cpu_nums = std::count_if(cpu_freqs.begin(), cpu_freqs.end(), int64_t valid_cpu_nums = std::count_if(cpu_freqs.begin(), cpu_freqs.end(),
[](CPUFreq cpu_freq) { [](CPUFreq cpu_freq) {
return cpu_freq.freq != 0; return cpu_freq.freq != 0;
}); });
float avg_freq = total_freq / valid_cpu_nums; float avg_freq = total_freq / valid_cpu_nums;
size_t cores_to_use = 0; int cores_to_use = 0;
for (auto cpu_info : cpu_freqs) { for (auto cpu_info : cpu_freqs) {
if ((cpu_info.freq > avg_freq if ((comp(cpu_info.freq, avg_freq)
&& cores_to_use < kMaxCpuCoresForPerformance) && cores_to_use < kMaxCpuCoresForPerformance)
|| cores_to_use < kMinCpuCoresForPerformance) { || cores_to_use < kMinCpuCoresForPerformance) {
++cores_to_use; ++cores_to_use;
...@@ -73,16 +75,17 @@ size_t GetCpuCoresForPerfomance(const std::vector<CPUFreq> &cpu_freqs) { ...@@ -73,16 +75,17 @@ size_t GetCpuCoresForPerfomance(const std::vector<CPUFreq> &cpu_freqs) {
MaceStatus GetCPUCoresToUse(const std::vector<float> &cpu_max_freqs, MaceStatus GetCPUCoresToUse(const std::vector<float> &cpu_max_freqs,
const CPUAffinityPolicy policy, const CPUAffinityPolicy policy,
const size_t thread_count_hint, int *thread_count,
std::vector<size_t> *cores) { std::vector<size_t> *cores) {
if (cpu_max_freqs.empty()) { if (cpu_max_freqs.empty()) {
*thread_count = 1;
LOG(ERROR) << "CPU core is empty"; LOG(ERROR) << "CPU core is empty";
return MaceStatus::MACE_RUNTIME_ERROR; return MaceStatus::MACE_RUNTIME_ERROR;
} }
size_t thread_count = thread_count_hint; *thread_count = std::max(*thread_count, 0);
const size_t cpu_count = cpu_max_freqs.size(); const int cpu_count = static_cast<int>(cpu_max_freqs.size());
if (thread_count == 0 || thread_count > cpu_count) { if (*thread_count == 0 || *thread_count > cpu_count) {
thread_count = cpu_count; *thread_count = cpu_count;
} }
if (policy != CPUAffinityPolicy::AFFINITY_NONE) { if (policy != CPUAffinityPolicy::AFFINITY_NONE) {
...@@ -108,69 +111,78 @@ MaceStatus GetCPUCoresToUse(const std::vector<float> &cpu_max_freqs, ...@@ -108,69 +111,78 @@ MaceStatus GetCPUCoresToUse(const std::vector<float> &cpu_max_freqs,
} }
// decide num of cores to use // decide num of cores to use
size_t cores_to_use = 0; int cores_to_use = 0;
if (policy == CPUAffinityPolicy::AFFINITY_BIG_ONLY if (policy == CPUAffinityPolicy::AFFINITY_BIG_ONLY) {
|| policy == CPUAffinityPolicy::AFFINITY_LITTLE_ONLY) { cores_to_use =
cores_to_use = GetCpuCoresForPerfomance(cpu_freq); GetCpuCoresForPerfomance(cpu_freq, std::greater_equal<float>());
} else if (policy == CPUAffinityPolicy::AFFINITY_LITTLE_ONLY) {
cores_to_use =
GetCpuCoresForPerfomance(cpu_freq, std::less_equal<float>());
} else { } else {
cores_to_use = thread_count; cores_to_use = *thread_count;
} }
MACE_CHECK(cores_to_use > 0, "number of cores to use should > 0"); MACE_CHECK(cores_to_use > 0, "number of cores to use should > 0");
cores->resize(cores_to_use); cores->resize(static_cast<size_t>(cores_to_use));
for (size_t i = 0; i < cores_to_use; ++i) { for (int i = 0; i < cores_to_use; ++i) {
VLOG(2) << "Bind thread to core: " << cpu_freq[i].core_id VLOG(2) << "Bind thread to core: " << cpu_freq[i].core_id
<< " with freq " << " with freq "
<< cpu_freq[i].freq; << cpu_freq[i].freq;
(*cores)[i] = static_cast<int>(cpu_freq[i].core_id); (*cores)[i] = static_cast<int>(cpu_freq[i].core_id);
} }
if (*thread_count == 0 || *thread_count > cores_to_use) {
*thread_count = cores_to_use;
}
} }
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
ThreadPool::ThreadPool(const size_t thread_count_hint, ThreadPool::ThreadPool(const int thread_count_hint,
const CPUAffinityPolicy policy) const CPUAffinityPolicy policy)
: event_(kThreadPoolNone), : event_(kThreadPoolNone),
count_down_latch_(kThreadPoolSpinWaitTime) { count_down_latch_(kThreadPoolSpinWaitTime) {
size_t thread_count = thread_count_hint; int thread_count = thread_count_hint;
std::vector<float> cpu_max_freqs; if (port::Env::Default()->GetCPUMaxFreq(&cpu_max_freqs_)
if (port::Env::Default()->GetCPUMaxFreq(&cpu_max_freqs)
!= MaceStatus::MACE_SUCCESS) { != MaceStatus::MACE_SUCCESS) {
LOG(ERROR) << "Fail to get cpu max frequencies"; LOG(ERROR) << "Fail to get cpu max frequencies";
} }
thread_count = std::max(static_cast<size_t>(1),
std::min(thread_count, cpu_max_freqs.size()));
std::vector<size_t> cores_to_use; std::vector<size_t> cores_to_use;
GetCPUCoresToUse(cpu_max_freqs, policy, thread_count, &cores_to_use); GetCPUCoresToUse(cpu_max_freqs_, policy, &thread_count, &cores_to_use);
MACE_CHECK(thread_count > 0);
VLOG(2) << "Use " << thread_count << " threads";
if (!cores_to_use.empty()) { if (!cores_to_use.empty()) {
if (port::Env::Default()->SchedSetAffinity(cores_to_use) if (port::Env::Default()->SchedSetAffinity(cores_to_use)
!= MaceStatus::MACE_SUCCESS) { != MaceStatus::MACE_SUCCESS) {
LOG(ERROR) << "Failed to sched_set_affinity"; LOG(ERROR) << "Failed to sched_set_affinity";
} }
} }
if (!cores_to_use.empty() && thread_count > cores_to_use.size()) {
thread_count = cores_to_use.size();
}
VLOG(2) << "Use " << thread_count << " threads";
default_tile_count_ = thread_count; default_tile_count_ = thread_count;
if (cores_to_use.size() >= 2 if (thread_count > 1) {
&& cpu_max_freqs[cores_to_use[0]] != cpu_max_freqs[cores_to_use.back()]) {
default_tile_count_ = thread_count * kTileCountPerThread; default_tile_count_ = thread_count * kTileCountPerThread;
} }
MACE_CHECK(default_tile_count_ > 0, "default tile count should > 0"); MACE_CHECK(default_tile_count_ > 0, "default tile count should > 0");
threads_ = std::vector<std::thread>(thread_count); threads_ = std::vector<std::thread>(static_cast<size_t>(thread_count));
thread_infos_ = std::vector<ThreadInfo>(thread_count); thread_infos_ = std::vector<ThreadInfo>(static_cast<size_t>(thread_count));
for (auto &thread_info : thread_infos_) { for (auto &thread_info : thread_infos_) {
thread_info.cpu_cores = cores_to_use; thread_info.cpu_cores = cores_to_use;
} }
} }
ThreadPool::~ThreadPool() { ThreadPool::~ThreadPool() {
// Clear affinity of main thread
if (!cpu_max_freqs_.empty()) {
std::vector<size_t> cores(cpu_max_freqs_.size());
for (size_t i = 0; i < cores.size(); ++i) {
cores[i] = i;
}
port::Env::Default()->SchedSetAffinity(cores);
}
Destroy(); Destroy();
} }
......
...@@ -31,12 +31,12 @@ namespace utils { ...@@ -31,12 +31,12 @@ namespace utils {
MaceStatus GetCPUCoresToUse(const std::vector<float> &cpu_max_freqs, MaceStatus GetCPUCoresToUse(const std::vector<float> &cpu_max_freqs,
const CPUAffinityPolicy policy, const CPUAffinityPolicy policy,
const size_t thread_count_hint, int *thread_count_hint,
std::vector<size_t> *cores); std::vector<size_t> *cores);
class ThreadPool { class ThreadPool {
public: public:
ThreadPool(const size_t thread_count, ThreadPool(const int thread_count,
const CPUAffinityPolicy affinity_policy); const CPUAffinityPolicy affinity_policy);
~ThreadPool(); ~ThreadPool();
...@@ -114,6 +114,7 @@ class ThreadPool { ...@@ -114,6 +114,7 @@ class ThreadPool {
}; };
std::vector<ThreadInfo> thread_infos_; std::vector<ThreadInfo> thread_infos_;
std::vector<std::thread> threads_; std::vector<std::thread> threads_;
std::vector<float> cpu_max_freqs_;
int64_t default_tile_count_; int64_t default_tile_count_;
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册