diff --git a/mace/benchmark/benchmark_model.cc b/mace/benchmark/benchmark_model.cc index e5de57b7ddfe414579d961268a08ca1d35b2e549..73ade394fe4c8b3cd99e1f14d647e3c6d3170ec8 100644 --- a/mace/benchmark/benchmark_model.cc +++ b/mace/benchmark/benchmark_model.cc @@ -219,7 +219,7 @@ DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(omp_num_threads, -1, "num of openmp threads"); DEFINE_int32(cpu_affinity_policy, 1, - "0:AFFINITY_DEFAULT/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"); + "0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"); int Main(int argc, char **argv) { MACE_CHECK(FLAGS_device != "HEXAGON", diff --git a/mace/core/runtime/cpu/cpu_runtime.cc b/mace/core/runtime/cpu/cpu_runtime.cc index b1bd9fe932b5e27204ef62881a5fd94d953a0583..2a9984adce819923fbd9069bd69daaa9e3ef108c 100644 --- a/mace/core/runtime/cpu/cpu_runtime.cc +++ b/mace/core/runtime/cpu/cpu_runtime.cc @@ -121,10 +121,8 @@ MaceStatus GetCPUBigLittleCoreIDs(std::vector *big_core_ids, void SetOpenMPThreadsAndAffinityCPUs(int omp_num_threads, const std::vector &cpu_ids) { - std::ostringstream oss; - for (auto cpu_id : cpu_ids) oss << cpu_id << ' '; - VLOG(1) << "Set CPU openmp num_threads: " << omp_num_threads - << ", cpu_ids: " << oss.str(); + VLOG(1) << "Set OpenMP threads number: " << omp_num_threads + << ", CPU core IDs: " << MakeString(cpu_ids); omp_set_num_threads(omp_num_threads); @@ -134,7 +132,6 @@ void SetOpenMPThreadsAndAffinityCPUs(int omp_num_threads, for (auto cpu_id : cpu_ids) { CPU_SET(cpu_id, &mask); } - VLOG(3) << "Set cpu affinity with mask: " << mask.__bits[0]; #pragma omp parallel for for (int i = 0; i < omp_num_threads; ++i) { @@ -144,9 +141,10 @@ void SetOpenMPThreadsAndAffinityCPUs(int omp_num_threads, MaceStatus SetOpenMPThreadsAndAffinityPolicy(int omp_num_threads_hint, CPUAffinityPolicy policy) { - // There is no need to set affinity in default mode - if (policy == CPUAffinityPolicy::AFFINITY_DEFAULT) { - if (omp_num_threads_hint > 0) omp_set_num_threads(omp_num_threads_hint); + if (policy == CPUAffinityPolicy::AFFINITY_NONE) { + if (omp_num_threads_hint > 0) { + omp_set_num_threads(std::min(omp_num_threads_hint, omp_get_num_procs())); + } return MACE_SUCCESS; } @@ -164,7 +162,8 @@ MaceStatus SetOpenMPThreadsAndAffinityPolicy(int omp_num_threads_hint, use_cpu_ids = std::move(little_core_ids); } - if (omp_num_threads_hint < 0) { + if (omp_num_threads_hint <= 0 || + omp_num_threads_hint > use_cpu_ids.size()) { omp_num_threads_hint = use_cpu_ids.size(); } SetOpenMPThreadsAndAffinityCPUs(omp_num_threads_hint, use_cpu_ids); diff --git a/mace/core/testing/test_benchmark_main.cc b/mace/core/testing/test_benchmark_main.cc index f95f883197c02f5312414332d659c8b7ce5ec540..a246ffe8ab47127836046ecba73db28655937f30 100644 --- a/mace/core/testing/test_benchmark_main.cc +++ b/mace/core/testing/test_benchmark_main.cc @@ -14,7 +14,7 @@ DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(omp_num_threads, -1, "num of openmp threads"); DEFINE_int32(cpu_affinity_policy, 1, - "0:AFFINITY_DEFAULT/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"); + "0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"); int main(int argc, char **argv) { gflags::SetUsageMessage("some usage message"); diff --git a/mace/examples/example.cc b/mace/examples/example.cc index 62bace7eb4cad0fc3a24553a58dcc1b3a86f8f16..aa58cea7c0837bd2576a7b99a35a473f05fd1ce6 100644 --- a/mace/examples/example.cc +++ b/mace/examples/example.cc @@ -130,7 +130,7 @@ DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(omp_num_threads, -1, "num of openmp threads"); DEFINE_int32(cpu_affinity_policy, 1, - "0:AFFINITY_DEFAULT/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"); + "0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"); bool RunModel(const std::vector &input_names, const std::vector> &input_shapes, diff --git a/mace/public/mace_runtime.h b/mace/public/mace_runtime.h index f159ea415d3fc20544b3e93bc4afad1870a09cfb..1d68a15f23f5c437480a36c9a80025a351c40cb8 100644 --- a/mace/public/mace_runtime.h +++ b/mace/public/mace_runtime.h @@ -32,7 +32,7 @@ enum GPUPriorityHint { }; enum CPUAffinityPolicy { - AFFINITY_DEFAULT = 0, + AFFINITY_NONE = 0, AFFINITY_BIG_ONLY = 1, AFFINITY_LITTLE_ONLY = 2, }; @@ -66,39 +66,52 @@ class FileStorageFactory : public KVStorageFactory { std::unique_ptr impl_; }; -// Set KV store factory used as OpenCL cache +// Set KV store factory used as OpenCL cache. void SetKVStorageFactory(std::shared_ptr storage_factory); -// Set GPU hints, currently only supports Adreno GPU +// Set GPU hints, currently only supports Adreno GPU. +// +// Caution: this function may hurt performance if improper parameters provided. void SetGPUHints(GPUPerfHint perf_hint, GPUPriorityHint priority_hint); // Set OpenMP threads number and affinity policy. // -// num_threads_hint is only a hint, the function can change it when it's larger -// than 0. When num_threads_hint is not positive, the function will set the -// threads number equaling to the number of big + little, big or little cores -// according to the policy. +// Caution: this function may hurt performance if improper parameters provided. +// +// num_threads_hint is only a hint. When num_threads_hint is zero or negative, +// the function will set the threads number equaling to the number of +// big (AFFINITY_BIG_ONLY), little (AFFINITY_LITTLE_ONLY) or all +// (AFFINITY_NONE) cores according to the policy. The threads number will +// also be truncated to the corresponding cores number when num_threads_hint +// is larger than it. +// +// The OpenMP threads will be bind to (via sched_setaffinity) big cores +// (AFFINITY_BIG_ONLY) and little cores (AFFINITY_LITTLE_ONLY). // -// This function may not work well on some ships (e.g. MTK), and in such -// cases (when it returns error MACE_INVALID_ARGS) you may try to use -// SetOpenMPThreadAffinity to set affinity manually, or just set default policy. +// If successful, it returns MACE_SUCCESS and error if it can't reliabley +// detect big-LITTLE cores (see GetBigLittleCoreIDs). In such cases, it's +// suggested to use AFFINITY_NONE to use all cores. MaceStatus SetOpenMPThreadPolicy(int num_threads_hint, CPUAffinityPolicy policy); -// Set OpenMP threads number and processor affinity -// This function may not work well on some chips (e.g. MTK). Set thread affinity -// to offline cores may fail or run unexpectedly. In such cases, please use -// SetOpenMPThreadPolicy with default policy instead. +// Set OpenMP threads number and processor affinity. +// +// Caution: this function may hurt performance if improper parameters provided. +// +// This function may not work well on some chips (e.g. MTK). Setting thread +// affinity to offline cores may run very slow or unexpectedly. In such cases, +// please use SetOpenMPThreadPolicy with default policy instead. void SetOpenMPThreadAffinity(int num_threads, const std::vector &cpu_ids); // Get ARM big.LITTLE configuration. // -// This function may not work well on some chips (e.g. MTK) and miss the -// offline cores, and the user should detect the configurations manually -// in such case(when it returns error MACE_INVALID_ARGS). +// This function will detect the max frequencies of all CPU cores, and assume +// the cores with largest max frequencies as big cores, and all the remaining +// cores as little. If all cpu core's max frequencies equals, big_core_ids and +// little_core_ids will both be filled with all cpu core ids. // -// If all cpu's frequencies are equal(i.e. all cores are the same), -// big_core_ids and little_core_ids will be set to all cpu ids. +// If successful, it returns MACE_SUCCESS and error if it can't reliabley +// detect the frequency of big-LITTLE cores (e.g. MTK). MaceStatus GetBigLittleCoreIDs(std::vector *big_core_ids, std::vector *little_core_ids); diff --git a/mace/tools/validation/mace_run.cc b/mace/tools/validation/mace_run.cc index 281cbede5c50ab82b6aa0d8a41be1fc4940ff60b..bd554f747e015f2a9603a253aeecbf14ae237cac 100644 --- a/mace/tools/validation/mace_run.cc +++ b/mace/tools/validation/mace_run.cc @@ -193,7 +193,7 @@ DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(omp_num_threads, -1, "num of openmp threads"); DEFINE_int32(cpu_affinity_policy, 1, - "0:AFFINITY_DEFAULT/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"); + "0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"); bool RunModel(const std::vector &input_names, const std::vector> &input_shapes,