diff --git a/mace/benchmark/benchmark_model.cc b/mace/benchmark/benchmark_model.cc index 05297479074e41bf0b96c1ae369c81a668f3d80c..a02a0d585ec0f5ce82ed45c306c941a019925935 100644 --- a/mace/benchmark/benchmark_model.cc +++ b/mace/benchmark/benchmark_model.cc @@ -214,7 +214,7 @@ DEFINE_string(model_data_file, "", "model data file name, used when EMBED_MODEL_DATA set to 0"); DEFINE_int32(gpu_perf_hint, 2, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(gpu_priority_hint, 1, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); -DEFINE_int32(omp_num_threads, 8, "num of openmp threads"); +DEFINE_int32(omp_num_threads, 4, "num of openmp threads"); DEFINE_int32(cpu_power_option, 0, "0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE"); @@ -269,14 +269,13 @@ int Main(int argc, char **argv) { } // config runtime + mace::ConfigOmpThreads(FLAGS_omp_num_threads); + mace::ConfigCPUPowerOption( + static_cast(FLAGS_cpu_power_option)); if (device_type == OPENCL) { mace::ConfigOpenCLRuntime( static_cast(FLAGS_gpu_perf_hint), static_cast(FLAGS_gpu_priority_hint)); - } else if (device_type == CPU || device_type == NEON) { - mace::ConfigOmpThreadsAndAffinity( - FLAGS_omp_num_threads, - static_cast(FLAGS_cpu_power_option)); } std::vector input_names = diff --git a/mace/core/mace_runtime.cc b/mace/core/mace_runtime.cc index b6d08184d9ce88c9095ef078862e6f8999d99c0c..da70f96f72b2e0c76d704773c34eefd2a789b5b1 100644 --- a/mace/core/mace_runtime.cc +++ b/mace/core/mace_runtime.cc @@ -15,11 +15,14 @@ void ConfigOpenCLRuntime(GPUPerfHint gpu_perf_hint, OpenCLRuntime::Configure(gpu_perf_hint, gpu_priority_hint); } -void ConfigOmpThreadsAndAffinity(int omp_num_threads, - CPUPowerOption power_option) { - VLOG(1) << "Config CPU Runtime: omp_num_threads: " << omp_num_threads - << ", cpu_power_option: " << power_option; - SetOmpThreadsAndAffinity(omp_num_threads, power_option); +void ConfigOmpThreads(int omp_num_threads) { + VLOG(1) << "Config CPU omp_num_threads: " << omp_num_threads; + SetOmpThreads(omp_num_threads); +} + +void ConfigCPUPowerOption(CPUPowerOption power_option) { + VLOG(1) << "Config CPU power option" << power_option; + SetThreadsAffinity(power_option); } }; // namespace mace diff --git a/mace/core/runtime/cpu/cpu_runtime.cc b/mace/core/runtime/cpu/cpu_runtime.cc index 20791870e7bdfa5b83156b2d6818356673161338..5a1538b0c995f585cc67e52bc5558db02e42410a 100644 --- a/mace/core/runtime/cpu/cpu_runtime.cc +++ b/mace/core/runtime/cpu/cpu_runtime.cc @@ -20,7 +20,10 @@ int GetCPUMaxFreq(int cpu_id) { "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", cpu_id); FILE *fp = fopen(path, "rb"); - MACE_CHECK(fp, "File: ", path, " not exists"); + if (!fp) { + LOG(WARNING) << "File: " << path << " not exists."; + return 0; + } int freq = 0; fscanf(fp, "%d", &freq); @@ -73,8 +76,21 @@ void SetThreadAffinity(cpu_set_t mask) { } // namespace -void SetOmpThreadsAndAffinity(int omp_num_threads, - CPUPowerOption power_option) { +void SetOmpThreads(int omp_num_threads) { + int cpu_count = omp_get_num_procs(); + if (omp_num_threads > cpu_count) { + LOG(WARNING) << "set omp num threads greater than num of cpus can use: " + << cpu_count; + } + omp_set_num_threads(omp_num_threads); +} + +void SetThreadsAffinity(CPUPowerOption power_option) { + // There is no need to set affinity in default mode + if (power_option == CPUPowerOption::DEFAULT) { + return; + } + int cpu_count = omp_get_num_procs(); std::vector sorted_cpu_ids; sorted_cpu_ids.resize(cpu_count); @@ -82,9 +98,7 @@ void SetOmpThreadsAndAffinity(int omp_num_threads, SortCPUIdsByMaxFreqAsc(&sorted_cpu_ids, &big_core_offset); std::vector use_cpu_ids; - if (power_option == CPUPowerOption::DEFAULT) { - use_cpu_ids = sorted_cpu_ids; - } else if (power_option == CPUPowerOption::HIGH_PERFORMANCE) { + if (power_option == CPUPowerOption::HIGH_PERFORMANCE) { use_cpu_ids = std::vector(sorted_cpu_ids.begin() + big_core_offset, sorted_cpu_ids.end()); } else { @@ -96,12 +110,6 @@ void SetOmpThreadsAndAffinity(int omp_num_threads, } } - if (omp_num_threads > use_cpu_ids.size()) { - LOG(WARNING) << "set omp num threads greater than num of cpus can use: " - << use_cpu_ids.size(); - } - omp_set_num_threads(omp_num_threads); - // compute mask cpu_set_t mask; CPU_ZERO(&mask); @@ -110,6 +118,7 @@ void SetOmpThreadsAndAffinity(int omp_num_threads, } VLOG(3) << "Set cpu affinity with mask: " << mask.__bits[0]; + int omp_num_threads = omp_get_max_threads(); #pragma omp parallel for for (int i = 0; i < omp_num_threads; ++i) { SetThreadAffinity(mask); diff --git a/mace/core/runtime/cpu/cpu_runtime.h b/mace/core/runtime/cpu/cpu_runtime.h index dbe19c4d6abe91e896df8de6dab251659b984a06..4687319bff338dc2c287977e2506e854bafa5320 100644 --- a/mace/core/runtime/cpu/cpu_runtime.h +++ b/mace/core/runtime/cpu/cpu_runtime.h @@ -10,8 +10,9 @@ namespace mace { -void SetOmpThreadsAndAffinity(int omp_num_threads, - CPUPowerOption power_option); +void SetOmpThreads(int omp_num_threads); + +void SetThreadsAffinity(CPUPowerOption power_option); } diff --git a/mace/core/testing/test_benchmark_main.cc b/mace/core/testing/test_benchmark_main.cc index 079a79e79afb35ed3319380409ae6155ec4112d9..70090887a87c8da264049ba7ae14f9c6bd7f25f9 100644 --- a/mace/core/testing/test_benchmark_main.cc +++ b/mace/core/testing/test_benchmark_main.cc @@ -21,12 +21,12 @@ int main(int argc, char **argv) { gflags::ParseCommandLineFlags(&argc, &argv, true); // config runtime + mace::ConfigOmpThreads(FLAGS_omp_num_threads); + mace::ConfigCPUPowerOption( + static_cast(FLAGS_cpu_power_option)); mace::ConfigOpenCLRuntime( static_cast(FLAGS_gpu_perf_hint), static_cast(FLAGS_gpu_priority_hint)); - mace::ConfigOmpThreadsAndAffinity( - FLAGS_omp_num_threads, - static_cast(FLAGS_cpu_power_option)); mace::testing::Benchmark::Run(FLAGS_pattern.c_str()); return 0; diff --git a/mace/examples/mace_run.cc b/mace/examples/mace_run.cc index a05782baf44a7caac0c95ec0542083670f7622e8..f71ca08d34467c58a5e8d7cffb1269f16f4622f6 100644 --- a/mace/examples/mace_run.cc +++ b/mace/examples/mace_run.cc @@ -196,14 +196,13 @@ bool RunModel(const std::vector &input_names, LOG(INFO) << "Runing with device type: " << device_type; // config runtime + mace::ConfigOmpThreads(FLAGS_omp_num_threads); + mace::ConfigCPUPowerOption( + static_cast(FLAGS_cpu_power_option)); if (device_type == DeviceType::OPENCL) { mace::ConfigOpenCLRuntime( static_cast(FLAGS_gpu_perf_hint), static_cast(FLAGS_gpu_priority_hint)); - } else if (device_type == DeviceType::CPU) { - mace::ConfigOmpThreadsAndAffinity( - FLAGS_omp_num_threads, - static_cast(FLAGS_cpu_power_option)); } // Init model diff --git a/mace/public/mace_runtime.h b/mace/public/mace_runtime.h index 1887f00993301198d33f6a32e5d601c14b8a998b..63ca972324a9ff538d088dc407460e5853e67a59 100644 --- a/mace/public/mace_runtime.h +++ b/mace/public/mace_runtime.h @@ -27,8 +27,8 @@ enum GPUPriorityHint { enum CPUPowerOption { DEFAULT = 0, HIGH_PERFORMANCE = 1, BATTERY_SAVE = 2 }; void ConfigOpenCLRuntime(GPUPerfHint, GPUPriorityHint); -void ConfigOmpThreadsAndAffinity(int omp_num_threads, - CPUPowerOption power_option); +void ConfigOmpThreads(int omp_num_threads); +void ConfigCPUPowerOption(CPUPowerOption power_option); } // namespace mace