fix sched_setaffinity'bug and opt the performance of threadpool

N/A Signed-off-by: N Luxuhui <luxuhui@xiaomi.com>

fix sched_setaffinity'bug and opt the performance of threadpool
N/A Signed-off-by: N Luxuhui <luxuhui@xiaomi.com>
8430a0e2 · luxuhui · 6c178680 · 8430a0e2 · 8430a0e2 · 8430a0e2
11 changed file
--- a/mace/core/runtime/cpu/cpu_runtime.cc
+++ b/mace/core/runtime/cpu/cpu_runtime.cc
@@ -31,16 +31,12 @@
 #include "mace/public/mace.h"
 #include "mace/utils/macros.h"
 #include "mace/utils/logging.h"
+#include "mace/utils/thread_pool.h"

 namespace mace {

 int MaceOpenMPThreadCount = 1;

-struct CPUFreq {
-  size_t core_id;
-  float freq;
-};
-
 enum SchedulePolicy {
  SCHED_STATIC,
  SCHED_GUIDED,
@@ -105,28 +101,12 @@ MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy(
    return MaceStatus::MACE_RUNTIME_ERROR;
  }

-  std::vector<CPUFreq> cpu_freq(cpu_max_freqs.size());
-  for (size_t i = 0; i < cpu_max_freqs.size(); ++i) {
-    cpu_freq[i].core_id = i;
-    cpu_freq[i].freq = cpu_max_freqs[i];
-  }
-  if (policy == CPUAffinityPolicy::AFFINITY_POWER_SAVE ||
-      policy == CPUAffinityPolicy::AFFINITY_LITTLE_ONLY) {
-    std::sort(cpu_freq.begin(),
-              cpu_freq.end(),
-              [=](const CPUFreq &lhs, const CPUFreq &rhs) {
-                return lhs.freq < rhs.freq;
-              });
-  } else if (policy == CPUAffinityPolicy::AFFINITY_HIGH_PERFORMANCE ||
-      policy == CPUAffinityPolicy::AFFINITY_BIG_ONLY) {
-    std::sort(cpu_freq.begin(),
-              cpu_freq.end(),
-              [](const CPUFreq &lhs, const CPUFreq &rhs) {
-                return lhs.freq > rhs.freq;
-              });
-  }
+  std::vector<size_t> cores_to_use;
+  MACE_RETURN_IF_ERROR(
+      mace::utils::GetCPUCoresToUse(
+          cpu_max_freqs, policy, num_threads_hint, &cores_to_use));

-  int cpu_count = static_cast<int>(cpu_freq.size());
+  int cpu_count = static_cast<int>(cores_to_use.size());
  if (num_threads_hint <= 0 || num_threads_hint > cpu_count) {
    num_threads_hint = cpu_count;
  }
@@ -148,32 +128,10 @@ MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy(
    return MaceStatus::MACE_SUCCESS;
  }

-
-  // decide num of cores to use
-  int cores_to_use = 0;
-  if (policy == CPUAffinityPolicy::AFFINITY_BIG_ONLY
-      || policy == CPUAffinityPolicy::AFFINITY_LITTLE_ONLY) {
-    for (size_t i = 0; i < cpu_max_freqs.size(); ++i) {
-      if (cpu_freq[i].freq != cpu_freq[0].freq) {
-        break;
-      }
-      ++cores_to_use;
-    }
-    num_threads_hint = std::min(num_threads_hint, cores_to_use);
-  } else {
-    cores_to_use = num_threads_hint;
-  }
-  MACE_CHECK(cores_to_use > 0, "number of cores to use should > 0");
-
-  VLOG(2) << "Use " << num_threads_hint << " threads";
-  std::vector<size_t> cpu_ids(cores_to_use);
-  for (int i = 0; i < cores_to_use; ++i) {
-    VLOG(2) << "Bind thread to core: " << cpu_freq[i].core_id << " with freq "
-            << cpu_freq[i].freq;
-    cpu_ids[i] = cpu_freq[i].core_id;
-  }
  SchedulePolicy sched_policy = SCHED_GUIDED;
-  if (std::abs(cpu_freq[0].freq - cpu_freq[cores_to_use - 1].freq) < 1e-6) {
+  float first_freq = cpu_max_freqs[cores_to_use[0]];
+  float last_freq = cpu_max_freqs[cores_to_use[cores_to_use.size() - 1]];
+  if (std::abs(first_freq - last_freq) < 1e-6) {
    sched_policy = SCHED_STATIC;
  }

@@ -185,7 +143,7 @@ MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy(
 #endif  // MACE_ENABLE_QUANTIZE

  return SetOpenMPThreadsAndAffinityCPUs(num_threads_hint,
-                                         cpu_ids,
+                                         cores_to_use,
                                         sched_policy);
 }


--- a/mace/port/android/env.cc
+++ b/mace/port/android/env.cc
@@ -17,7 +17,6 @@
 #include <errno.h>
 #include <unwind.h>
 #include <dlfcn.h>
-#include <unistd.h>
 #include <sys/syscall.h>
 #include <sys/types.h>

@@ -50,46 +49,60 @@ LogWriter *AndroidEnv::GetLogWriter() {
 namespace {

 struct BacktraceState {
-  void** current;
-  void** end;
+  void **current;
+  void **end;
 };

-_Unwind_Reason_Code UnwindCallback(struct _Unwind_Context* context, void* arg) {
-  BacktraceState* state = static_cast<BacktraceState*>(arg);
+_Unwind_Reason_Code UnwindCallback(struct _Unwind_Context *context, void *arg) {
+  BacktraceState *state = static_cast<BacktraceState *>(arg);
  uintptr_t pc = _Unwind_GetIP(context);
  if (pc) {
    if (state->current == state->end) {
      return _URC_END_OF_STACK;
    } else {
-      *state->current++ = reinterpret_cast<void*>(pc);
+      *state->current++ = reinterpret_cast<void *>(pc);
    }
  }
  return _URC_NO_REASON;
 }

-size_t BackTrace(void** buffer, size_t max) {
+size_t BackTrace(void **buffer, size_t max) {
  BacktraceState state = {buffer, buffer + max};
  _Unwind_Backtrace(UnwindCallback, &state);

  return state.current - buffer;
 }

+bool CpuIsolate(size_t cpu_id) {
+  std::string cpuinfo_isolate_conf = MakeString(
+      "/sys/devices/system/cpu/cpu",
+      cpu_id,
+      "/isolate");
+  std::ifstream isolate_file(cpuinfo_isolate_conf);
+  int isolate_switch = 0;
+  if (isolate_file.is_open()) {
+    std::string line;
+    if (std::getline(isolate_file, line)) {
+      isolate_switch = strtol(line.c_str(), nullptr, 0);
+    }
+    isolate_file.close();
+  }
+
+  return (isolate_switch != 0);
+}
+
 }  // namespace

-MaceStatus AndroidEnv::SchedSetAffinity(const std::vector<size_t> &cpu_ids) {
-  // compute mask
-  cpu_set_t mask;
-  CPU_ZERO(&mask);
-  for (auto cpu_id : cpu_ids) {
-    CPU_SET(cpu_id, &mask);
-  }
-  pid_t pid = gettid();
-  int err = sched_setaffinity(pid, sizeof(mask), &mask);
-  if (err) {
-    LOG(WARNING) << "SchedSetAffinity failed: " << strerror(errno);
-    return MaceStatus(MaceStatus::MACE_INVALID_ARGS,
-                      "SchedSetAffinity failed: " +
-                      std::string(strerror(errno)));
+MaceStatus AndroidEnv::GetCPUMaxFreq(std::vector<float> *max_freqs) {
+  MACE_RETURN_IF_ERROR(LinuxBaseEnv::GetCPUMaxFreq(max_freqs));
+
+  size_t cpu_num = (max_freqs != nullptr) ? max_freqs->size() : 0;
+  if (cpu_num > 0) {
+    for (size_t i = 0; i < cpu_num; ++i) {
+      if (CpuIsolate(i)) {
+        (*max_freqs)[i] = 0;
+      }
+    }
  }

  return MaceStatus::MACE_SUCCESS;
@@ -103,8 +116,8 @@ std::vector<std::string> AndroidEnv::GetBackTraceUnsafe(int max_steps) {
  for (int i = 0; i < steps; ++i) {
    std::ostringstream os;

-    const void* addr = buffer[i];
-    const char* symbol = "";
+    const void *addr = buffer[i];
+    const char *symbol = "";
    Dl_info info;
    if (dladdr(addr, &info) && info.dli_sname) {
      symbol = info.dli_sname;

--- a/mace/port/android/env.h
+++ b/mace/port/android/env.h
@@ -29,8 +29,8 @@ namespace port {

 class AndroidEnv : public LinuxBaseEnv {
 public:
-  MaceStatus SchedSetAffinity(const std::vector<size_t> &cpu_ids) override;
  LogWriter *GetLogWriter() override;
+  MaceStatus GetCPUMaxFreq(std::vector<float> *max_freqs) override;
  std::vector<std::string> GetBackTraceUnsafe(int max_steps) override;
  std::unique_ptr<MallocLogger> NewMallocLogger(
      std::ostringstream *oss,

--- a/mace/port/darwin/env.cc
+++ b/mace/port/darwin/env.cc
@@ -15,6 +15,8 @@
 #include "mace/port/darwin/env.h"

 #include <execinfo.h>
+#include <mach/thread_act.h>
+#include <mach/thread_policy.h>
 #include <stdint.h>
 #include <sys/sysctl.h>
 #include <sys/time.h>
@@ -33,27 +35,64 @@ namespace mace {
 namespace port {

 namespace {
-const char kCpuFrequencyMax[] = "hw.cpufrequency_max";
+
+constexpr const char kCpuFrequencyMax[] = "hw.cpufrequency_max";
+constexpr const char kCpuActiveNum[] = "hw.activecpu";
+
 }

 int64_t DarwinEnv::NowMicros() {
  return mace::port::posix::NowMicros();
 }

-// TODO(luxuhui): this func is not accurate, darwin does not support
-// acquiring CPU frequencies, we need to reconsider the CPU scheduling
-// strategy.
-MaceStatus DarwinEnv::GetCPUMaxFreq(std::vector<float> *max_freqs) {
-  MACE_CHECK_NOTNULL(max_freqs);
+// we can't get the frequancy of every cpu on darwin, so this method
+// return a fake frequancy data.
+MaceStatus DarwinEnv::GetCPUMaxFreq(std::vector<float> *cpu_infos) {
+  MACE_CHECK_NOTNULL(cpu_infos);

-  uint64_t freq = 0;
+  float freq = 0;
  size_t size = sizeof(freq);
  int ret = sysctlbyname(kCpuFrequencyMax, &freq, &size, NULL, 0);
  if (ret < 0) {
    LOG(ERROR) << "failed to get property: " << kCpuFrequencyMax;
    return MaceStatus::MACE_RUNTIME_ERROR;
  }
-  max_freqs->push_back(freq);
+
+  uint64_t cpu_num = 0;
+  size = sizeof(cpu_num);
+  ret = sysctlbyname(kCpuActiveNum, &cpu_num, &size, NULL, 0);
+  if (ret < 0) {
+    LOG(ERROR) << "failed to get property: " << kCpuActiveNum;
+    return MaceStatus::MACE_RUNTIME_ERROR;
+  }
+
+  for (int i = 0; i < cpu_num; ++i) {
+    cpu_infos->push_back(freq);
+  }
+
+  return MaceStatus::MACE_SUCCESS;
+}
+
+MaceStatus DarwinEnv::SchedSetAffinity(
+    const std::vector<size_t> &cpu_ids) {
+  unsigned int tag = 0;
+  for (size_t i = 0; i < cpu_ids.size(); ++i) {
+    tag += (cpu_ids[i] << i);
+  }
+
+#ifdef MACE_OS_MAC
+  pthread_t thread = pthread_self();
+  mach_port_t mach_port = pthread_mach_thread_np(thread);
+  thread_affinity_policy_data_t policy_data = {(integer_t) tag};
+  int ret = thread_policy_set(mach_port,
+                              THREAD_AFFINITY_POLICY,
+                              (thread_policy_t) & policy_data,
+                              1);
+  if (ret) {
+    LOG(INFO) << "thread_policy_set failed: " << strerror(errno);
+    return MaceStatus::MACE_RUNTIME_ERROR;
+  }
+#endif

  return MaceStatus::MACE_SUCCESS;
 }

--- a/mace/port/darwin/env.h
+++ b/mace/port/darwin/env.h
@@ -20,6 +20,7 @@

 #include "mace/port/env.h"
 #include "mace/port/logger.h"
+#include "mace/port/port-arch.h"
 #include "mace/port/posix/file_system.h"

 namespace mace {
@@ -29,6 +30,7 @@ class DarwinEnv : public Env {
 public:
  int64_t NowMicros() override;
  MaceStatus GetCPUMaxFreq(std::vector<float> *max_freqs) override;
+  MaceStatus SchedSetAffinity(const std::vector<size_t> &cpu_ids) override;
  FileSystem *GetFileSystem() override;
  LogWriter *GetLogWriter() override;
  std::vector<std::string> GetBackTraceUnsafe(int max_steps) override;

--- a/mace/port/linux/env.cc
+++ b/mace/port/linux/env.cc
@@ -25,10 +25,21 @@
 #include "mace/port/posix/backtrace.h"
 #include "mace/port/posix/file_system.h"
 #include "mace/port/posix/time.h"
+#include "mace/utils/macros.h"

 namespace mace {
 namespace port {

+// In our embedded linux device, SchedSetAffinity has side effects
+// on performance, so we override this method to do nothing. You
+// can try to comment this function, perhaps you could get a better
+// performance as we do in Android devices.
+MaceStatus LinuxEnv::SchedSetAffinity(const std::vector<size_t> &cpu_ids) {
+  MACE_UNUSED(cpu_ids);
+
+  return MaceStatus::MACE_SUCCESS;
+}
+
 LogWriter *LinuxEnv::GetLogWriter() {
  return &log_writer_;
 }

--- a/mace/port/linux/env.h
+++ b/mace/port/linux/env.h
@@ -26,6 +26,7 @@ namespace port {

 class LinuxEnv : public LinuxBaseEnv {
 public:
+  MaceStatus SchedSetAffinity(const std::vector<size_t> &cpu_ids) override;
  LogWriter *GetLogWriter() override;
  std::vector<std::string> GetBackTraceUnsafe(int max_steps) override;


--- a/mace/port/linux_base/env.cc
+++ b/mace/port/linux_base/env.cc
@@ -14,7 +14,10 @@

 #include "mace/port/linux_base/env.h"

+#include <errno.h>
+#include <sys/syscall.h>
 #include <sys/time.h>
+#include <unistd.h>

 #include <cstddef>
 #include <fstream>
@@ -28,7 +31,6 @@
 namespace mace {
 namespace port {

-
 namespace {

 int GetCPUCount() {
@@ -100,5 +102,24 @@ MaceStatus LinuxBaseEnv::GetCPUMaxFreq(std::vector<float> *max_freqs) {
  return MaceStatus::MACE_SUCCESS;
 }

+MaceStatus LinuxBaseEnv::SchedSetAffinity(const std::vector<size_t> &cpu_ids) {
+  cpu_set_t mask;
+  CPU_ZERO(&mask);
+  for (auto cpu_id : cpu_ids) {
+    CPU_SET(cpu_id, &mask);
+  }
+
+  pid_t pid = syscall(SYS_gettid);
+  int err = sched_setaffinity(pid, sizeof(mask), &mask);
+  if (err) {
+    LOG(WARNING) << "SchedSetAffinity failed: " << strerror(errno);
+    return MaceStatus(MaceStatus::MACE_INVALID_ARGS,
+                      "SchedSetAffinity failed: " +
+                          std::string(strerror(errno)));
+  }
+
+  return MaceStatus::MACE_SUCCESS;
+}
+
 }  // namespace port
 }  // namespace mace
--- a/mace/port/linux_base/env.h
+++ b/mace/port/linux_base/env.h
@@ -28,6 +28,7 @@ class LinuxBaseEnv : public Env {
  int64_t NowMicros() override;
  MaceStatus GetCPUMaxFreq(std::vector<float> *max_freqs) override;
  FileSystem *GetFileSystem() override;
+  MaceStatus SchedSetAffinity(const std::vector<size_t> &cpu_ids) override;

 protected:
  PosixFileSystem posix_file_system_;

--- a/mace/utils/thread_pool.cc
+++ b/mace/utils/thread_pool.cc
@@ -13,6 +13,8 @@
 // limitations under the License.

 #include <algorithm>
+#include <numeric>
+
 #include "mace/port/port.h"
 #include "mace/port/env.h"
 #include "mace/utils/logging.h"
@@ -26,6 +28,8 @@ namespace utils {
 constexpr int kThreadPoolSpinWaitTime = 2000000;  // ns
 constexpr int kTileCountPerThread = 2;
 constexpr int kMaxCostUsingSingleThread = 100;
+constexpr int kMinCpuCoresForPerformance = 3;
+constexpr int kMaxCpuCoresForPerformance = 5;

 namespace {

@@ -42,67 +46,87 @@ struct CPUFreq {
  float freq;
 };

-void GetCPUCoresToUse(const std::vector<float> &cpu_max_freqs,
-                      const CPUAffinityPolicy policy,
-                      const size_t thread_count_hint,
-                      std::vector<size_t> *cores) {
-  size_t thread_count = thread_count_hint;
-  if (!cpu_max_freqs.empty()) {
-    const size_t cpu_count = cpu_max_freqs.size();
-    if (thread_count == 0 || thread_count > cpu_count) {
-      thread_count = cpu_count;
+size_t GetCpuCoresForPerfomance(const std::vector<CPUFreq> &cpu_freqs) {
+  float total_freq = std::accumulate(cpu_freqs.begin(), cpu_freqs.end(), 0,
+                                     [](float accum, CPUFreq cpu_freq) {
+                                       return accum + cpu_freq.freq;
+                                     });
+  size_t valid_cpu_nums = std::count_if(cpu_freqs.begin(), cpu_freqs.end(),
+                                        [](CPUFreq cpu_freq) {
+                                          return cpu_freq.freq != 0;
+                                        });
+  float avg_freq = total_freq / valid_cpu_nums;
+
+  size_t cores_to_use = 0;
+  for (auto cpu_info : cpu_freqs) {
+    if ((cpu_info.freq > avg_freq
+        && cores_to_use < kMaxCpuCoresForPerformance)
+        || cores_to_use < kMinCpuCoresForPerformance) {
+      ++cores_to_use;
    }
+  }

-    if (policy != CPUAffinityPolicy::AFFINITY_NONE) {
-      std::vector<CPUFreq> cpu_freq(cpu_max_freqs.size());
-      for (size_t i = 0; i < cpu_max_freqs.size(); ++i) {
-        cpu_freq[i].core_id = i;
-        cpu_freq[i].freq = cpu_max_freqs[i];
-      }
-      if (policy == CPUAffinityPolicy::AFFINITY_POWER_SAVE ||
-          policy == CPUAffinityPolicy::AFFINITY_LITTLE_ONLY) {
-        std::sort(cpu_freq.begin(),
-                  cpu_freq.end(),
-                  [=](const CPUFreq &lhs, const CPUFreq &rhs) {
-                    return lhs.freq < rhs.freq;
-                  });
-      } else if (policy == CPUAffinityPolicy::AFFINITY_HIGH_PERFORMANCE ||
-          policy == CPUAffinityPolicy::AFFINITY_BIG_ONLY) {
-        std::sort(cpu_freq.begin(),
-                  cpu_freq.end(),
-                  [](const CPUFreq &lhs, const CPUFreq &rhs) {
-                    return lhs.freq > rhs.freq;
-                  });
-      }
+  return cores_to_use;
+}

-      // decide num of cores to use
-      size_t cores_to_use = 0;
-      if (policy == CPUAffinityPolicy::AFFINITY_BIG_ONLY
-          || policy == CPUAffinityPolicy::AFFINITY_LITTLE_ONLY) {
-        for (size_t i = 0; i < cpu_max_freqs.size(); ++i) {
-          if (cpu_freq[i].freq != cpu_freq[0].freq) {
-            break;
-          }
-          ++cores_to_use;
-        }
-      } else {
-        cores_to_use = thread_count;
-      }
-      MACE_CHECK(cores_to_use > 0, "number of cores to use should > 0");
-      cores->resize(cores_to_use);
-      for (size_t i = 0; i < cores_to_use; ++i) {
-        VLOG(2) << "Bind thread to core: " << cpu_freq[i].core_id
-                << " with freq "
-                << cpu_freq[i].freq;
-        (*cores)[i] = static_cast<int>(cpu_freq[i].core_id);
-      }
-    }
-  } else {
+}  // namespace
+
+MaceStatus GetCPUCoresToUse(const std::vector<float> &cpu_max_freqs,
+                            const CPUAffinityPolicy policy,
+                            const size_t thread_count_hint,
+                            std::vector<size_t> *cores) {
+  if (cpu_max_freqs.empty()) {
    LOG(ERROR) << "CPU core is empty";
+    return MaceStatus::MACE_RUNTIME_ERROR;
+  }
+  size_t thread_count = thread_count_hint;
+  const size_t cpu_count = cpu_max_freqs.size();
+  if (thread_count == 0 || thread_count > cpu_count) {
+    thread_count = cpu_count;
  }
-}

-}  // namespace
+  if (policy != CPUAffinityPolicy::AFFINITY_NONE) {
+    std::vector<CPUFreq> cpu_freq(cpu_max_freqs.size());
+    for (size_t i = 0; i < cpu_max_freqs.size(); ++i) {
+      cpu_freq[i].core_id = i;
+      cpu_freq[i].freq = cpu_max_freqs[i];
+    }
+    if (policy == CPUAffinityPolicy::AFFINITY_POWER_SAVE ||
+        policy == CPUAffinityPolicy::AFFINITY_LITTLE_ONLY) {
+      std::sort(cpu_freq.begin(),
+                cpu_freq.end(),
+                [=](const CPUFreq &lhs, const CPUFreq &rhs) {
+                  return lhs.freq < rhs.freq;
+                });
+    } else if (policy == CPUAffinityPolicy::AFFINITY_HIGH_PERFORMANCE ||
+        policy == CPUAffinityPolicy::AFFINITY_BIG_ONLY) {
+      std::sort(cpu_freq.begin(),
+                cpu_freq.end(),
+                [](const CPUFreq &lhs, const CPUFreq &rhs) {
+                  return lhs.freq > rhs.freq;
+                });
+    }
+
+    // decide num of cores to use
+    size_t cores_to_use = 0;
+    if (policy == CPUAffinityPolicy::AFFINITY_BIG_ONLY
+        || policy == CPUAffinityPolicy::AFFINITY_LITTLE_ONLY) {
+      cores_to_use = GetCpuCoresForPerfomance(cpu_freq);
+    } else {
+      cores_to_use = thread_count;
+    }
+    MACE_CHECK(cores_to_use > 0, "number of cores to use should > 0");
+    cores->resize(cores_to_use);
+    for (size_t i = 0; i < cores_to_use; ++i) {
+      VLOG(2) << "Bind thread to core: " << cpu_freq[i].core_id
+              << " with freq "
+              << cpu_freq[i].freq;
+      (*cores)[i] = static_cast<int>(cpu_freq[i].core_id);
+    }
+  }
+
+  return MaceStatus::MACE_SUCCESS;
+}

 ThreadPool::ThreadPool(const size_t thread_count_hint,
                       const CPUAffinityPolicy policy)
@@ -173,13 +197,13 @@ void ThreadPool::Run(const std::function<void(const int64_t)> &func,
  std::unique_lock<std::mutex> run_lock(run_mutex_);

  for (size_t i = 0; i < thread_count; ++i) {
-    int64_t count = iters_per_thread + (static_cast<int64_t>(i) < remainder);
+    int64_t range_len =
+        iters_per_thread + (static_cast<int64_t>(i) < remainder);
    thread_infos_[i].range_start = iters_offset;
-    int64_t range_end = iters_offset + count;
-    thread_infos_[i].range_end = range_end;
-    thread_infos_[i].range_len = range_end - iters_offset;
+    thread_infos_[i].range_len = range_len;
+    thread_infos_[i].range_end = iters_offset + range_len;
    thread_infos_[i].func = reinterpret_cast<uintptr_t>(&func);
-    iters_offset += thread_infos_[i].range_len;
+    iters_offset = thread_infos_[i].range_end;
  }

  count_down_latch_.Reset(thread_count - 1);

--- a/mace/utils/thread_pool.h
+++ b/mace/utils/thread_pool.h
@@ -29,6 +29,11 @@
 namespace mace {
 namespace utils {

+MaceStatus GetCPUCoresToUse(const std::vector<float> &cpu_max_freqs,
+                            const CPUAffinityPolicy policy,
+                            const size_t thread_count_hint,
+                            std::vector<size_t> *cores);
+
 class ThreadPool {
 public:
  ThreadPool(const size_t thread_count,