From b0d79f5d256c5e02150a3b78af921ec8c154bdbb Mon Sep 17 00:00:00 2001 From: wuchenghui Date: Fri, 16 Mar 2018 16:56:21 +0800 Subject: [PATCH] fix runtime config --- .gitlab-ci.yml | 2 +- mace/benchmark/benchmark_model.cc | 6 +-- mace/core/BUILD | 1 + mace/core/mace.cc | 5 ++- mace/core/runtime/cpu/cpu_runtime.cc | 52 +++++++++++++--------- mace/core/runtime/cpu/cpu_runtime.h | 3 +- mace/core/runtime/opencl/opencl_runtime.cc | 18 +++++--- mace/core/runtime/opencl/opencl_runtime.h | 4 +- mace/core/testing/test_benchmark_main.cc | 40 +++++++++++++---- mace/examples/mace_run.cc | 8 ++-- mace/public/mace.h | 12 +++-- 11 files changed, 98 insertions(+), 53 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 133864d9..2427a817 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -23,4 +23,4 @@ ops_benchmark: only: - master script: - - FAILURE_PATTERN="Aborted" tools/bazel-adb-run.sh //mace/ops:ops_benchmark .*CONV.* + - FAILURE_PATTERN="Aborted" tools/bazel-adb-run.sh //mace/ops:ops_benchmark --pattern=.*CONV.* diff --git a/mace/benchmark/benchmark_model.cc b/mace/benchmark/benchmark_model.cc index cf33586a..95b093c1 100644 --- a/mace/benchmark/benchmark_model.cc +++ b/mace/benchmark/benchmark_model.cc @@ -205,8 +205,8 @@ DEFINE_bool(show_flops, true, "whether to estimate the model's FLOPs"); DEFINE_int32(warmup_runs, 1, "how many runs to initialize model"); DEFINE_string(model_data_file, "", "model data file name, used when EMBED_MODEL_DATA set to 0"); DEFINE_string(gpu_type, "ADRENO", "ADRENO/MALI"); -DEFINE_int32(gpu_perf_hint, 2, "0:NA/1:LOW/2:NORMAL/3:HIGH"); -DEFINE_int32(gpu_priority_hint, 1, "0:NA/1:LOW/2:NORMAL/3:HIGH"); +DEFINE_int32(gpu_perf_hint, 2, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); +DEFINE_int32(gpu_priority_hint, 1, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(omp_num_threads, 8, "num of openmp threads"); DEFINE_int32(cpu_power_option, 0, "0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE"); @@ -266,7 +266,7 @@ int Main(int argc, char **argv) { static_cast(FLAGS_gpu_priority_hint)); } else if (device_type == CPU) { - mace::ConfigCPURuntime( + mace::ConfigOmpThreadsAndAffinity( FLAGS_omp_num_threads, static_cast(FLAGS_cpu_power_option)); } diff --git a/mace/core/BUILD b/mace/core/BUILD index f0eae294..9a957ad1 100644 --- a/mace/core/BUILD +++ b/mace/core/BUILD @@ -85,6 +85,7 @@ cc_library( ], deps = [ ":core", + "//external:gflags_nothreads", "//mace/utils", ], alwayslink = 1, diff --git a/mace/core/mace.cc b/mace/core/mace.cc index f1f0d59a..48d29343 100644 --- a/mace/core/mace.cc +++ b/mace/core/mace.cc @@ -360,10 +360,11 @@ void ConfigOpenCLRuntime(GPUType gpu_type, OpenCLRuntime::CreateGlobal(gpu_type, gpu_perf_hint, gpu_priority_hint); } -void ConfigCPURuntime(int omp_num_threads, CPUPowerOption power_option) { +void ConfigOmpThreadsAndAffinity(int omp_num_threads, + CPUPowerOption power_option) { LOG(INFO) << "Config CPU Runtime: omp_num_threads: " << omp_num_threads << ", cpu_power_option: " << power_option; - SetCPURuntime(omp_num_threads, power_option); + SetOmpThreadsAndAffinity(omp_num_threads, power_option); } // Mace Engine diff --git a/mace/core/runtime/cpu/cpu_runtime.cc b/mace/core/runtime/cpu/cpu_runtime.cc index 555372ab..20791870 100644 --- a/mace/core/runtime/cpu/cpu_runtime.cc +++ b/mace/core/runtime/cpu/cpu_runtime.cc @@ -20,7 +20,7 @@ int GetCPUMaxFreq(int cpu_id) { "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", cpu_id); FILE *fp = fopen(path, "rb"); - if (!fp) return 0; + MACE_CHECK(fp, "File: ", path, " not exists"); int freq = 0; fscanf(fp, "%d", &freq); @@ -28,7 +28,7 @@ int GetCPUMaxFreq(int cpu_id) { return freq; } -void SortCPUIdsByMaxFreqAsc(std::vector *cpu_ids) { +void SortCPUIdsByMaxFreqAsc(std::vector *cpu_ids, int *big_core_offset) { MACE_CHECK_NOTNULL(cpu_ids); int cpu_count = cpu_ids->size(); std::vector cpu_max_freq; @@ -54,51 +54,61 @@ void SortCPUIdsByMaxFreqAsc(std::vector *cpu_ids) { } } } + + *big_core_offset = 0; + for (int i = 1; i < cpu_count; ++i) { + if (cpu_max_freq[i] > cpu_max_freq[i - 1]) { + *big_core_offset = i; + break; + } + } } void SetThreadAffinity(cpu_set_t mask) { int sys_call_res; pid_t pid = gettid(); - - // TODO(chenghui): when set omp num threads to 1, - // sometiomes return EINVAL(22) error. - // https://linux.die.net/man/2/sched_setaffinity - sys_call_res = syscall(__NR_sched_setaffinity, pid, sizeof(mask), &mask); - if (sys_call_res != 0) { - LOG(FATAL) << "syscall setaffinity error: " << sys_call_res << ' ' << errno; - } + int err = sched_setaffinity(pid, sizeof(mask), &mask); + MACE_CHECK(err == 0, "set affinity error: ", errno); } } // namespace -void SetCPURuntime(int omp_num_threads, CPUPowerOption power_option) { +void SetOmpThreadsAndAffinity(int omp_num_threads, + CPUPowerOption power_option) { int cpu_count = omp_get_num_procs(); - LOG(INFO) << "cpu_count: " << cpu_count; std::vector sorted_cpu_ids; sorted_cpu_ids.resize(cpu_count); - SortCPUIdsByMaxFreqAsc(&sorted_cpu_ids); + int big_core_offset; + SortCPUIdsByMaxFreqAsc(&sorted_cpu_ids, &big_core_offset); std::vector use_cpu_ids; - if (power_option == CPUPowerOption::DEFAULT || omp_num_threads >= cpu_count) { + if (power_option == CPUPowerOption::DEFAULT) { use_cpu_ids = sorted_cpu_ids; - omp_num_threads = cpu_count; } else if (power_option == CPUPowerOption::HIGH_PERFORMANCE) { - use_cpu_ids = - std::vector(sorted_cpu_ids.begin() + cpu_count - omp_num_threads, - sorted_cpu_ids.end()); + use_cpu_ids = std::vector(sorted_cpu_ids.begin() + big_core_offset, + sorted_cpu_ids.end()); } else { - use_cpu_ids = std::vector(sorted_cpu_ids.begin(), - sorted_cpu_ids.begin() + omp_num_threads); + if (big_core_offset > 0) { + use_cpu_ids = std::vector(sorted_cpu_ids.begin(), + sorted_cpu_ids.begin() + big_core_offset); + } else { + use_cpu_ids = sorted_cpu_ids; + } } + if (omp_num_threads > use_cpu_ids.size()) { + LOG(WARNING) << "set omp num threads greater than num of cpus can use: " + << use_cpu_ids.size(); + } omp_set_num_threads(omp_num_threads); + // compute mask cpu_set_t mask; CPU_ZERO(&mask); for (auto cpu_id : use_cpu_ids) { CPU_SET(cpu_id, &mask); } - LOG(INFO) << "use cpus mask: " << mask.__bits[0]; + VLOG(3) << "Set cpu affinity with mask: " << mask.__bits[0]; #pragma omp parallel for for (int i = 0; i < omp_num_threads; ++i) { diff --git a/mace/core/runtime/cpu/cpu_runtime.h b/mace/core/runtime/cpu/cpu_runtime.h index f80ca1b8..082673a6 100644 --- a/mace/core/runtime/cpu/cpu_runtime.h +++ b/mace/core/runtime/cpu/cpu_runtime.h @@ -10,7 +10,8 @@ namespace mace { -void SetCPURuntime(int omp_num_threads, CPUPowerOption power_option); +void SetOmpThreadsAndAffinity(int omp_num_threads, + CPUPowerOption power_option); } diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc index 5a4ca0ea..37b4c59c 100644 --- a/mace/core/runtime/opencl/opencl_runtime.cc +++ b/mace/core/runtime/opencl/opencl_runtime.cc @@ -65,20 +65,24 @@ void OpenCLProfilingTimer::ClearTiming() { accumulated_micros_ = 0; } +std::unique_ptr OpenCLRuntime::runtime_instance_ = nullptr; + OpenCLRuntime *OpenCLRuntime::Global() { - if (opencl_runtime_instance == nullptr) { - return CreateGlobal(GPUType::ADRENO, GPUPerfHint::PERF_NORMAL, - GPUPriorityHint::PRIORITY_LOW); + // FIXME: not thread safe + if (runtime_instance_ == nullptr) { + return CreateGlobal(GPUType::ADRENO, GPUPerfHint::PERF_DEFAULT, + GPUPriorityHint::PRIORITY_DEFAULT); } - return opencl_runtime_instance; + return runtime_instance_.get(); } OpenCLRuntime *OpenCLRuntime::CreateGlobal(GPUType gpu_type, GPUPerfHint gpu_perf_hint, GPUPriorityHint gpu_priority_hint) { - opencl_runtime_instance = new OpenCLRuntime(gpu_type, gpu_perf_hint, - gpu_priority_hint); - return opencl_runtime_instance; + runtime_instance_ = + std::unique_ptr(new OpenCLRuntime(gpu_type, gpu_perf_hint, + gpu_priority_hint)); + return runtime_instance_.get(); } void ParseOpenCLRuntimeConfig(std::vector *properties, diff --git a/mace/core/runtime/opencl/opencl_runtime.h b/mace/core/runtime/opencl/opencl_runtime.h index 58c6cdab..69ea4233 100644 --- a/mace/core/runtime/opencl/opencl_runtime.h +++ b/mace/core/runtime/opencl/opencl_runtime.h @@ -52,10 +52,10 @@ class OpenCLRuntime { cl::Kernel BuildKernel(const std::string &program_name, const std::string &kernel_name, const std::set &build_options); + ~OpenCLRuntime(); private: OpenCLRuntime(GPUType, GPUPerfHint, GPUPriorityHint); - ~OpenCLRuntime(); OpenCLRuntime(const OpenCLRuntime &) = delete; OpenCLRuntime &operator=(const OpenCLRuntime &) = delete; @@ -74,9 +74,9 @@ class OpenCLRuntime { std::map built_program_map_; std::mutex program_build_mutex_; std::string kernel_path_; + static std::unique_ptr runtime_instance_; }; -static OpenCLRuntime *opencl_runtime_instance = nullptr; } // namespace mace #endif // MACE_CORE_RUNTIME_OPENCL_OPENCL_RUNTIME_H_ diff --git a/mace/core/testing/test_benchmark_main.cc b/mace/core/testing/test_benchmark_main.cc index 76b7d15f..143b5ef6 100644 --- a/mace/core/testing/test_benchmark_main.cc +++ b/mace/core/testing/test_benchmark_main.cc @@ -4,20 +4,42 @@ #include +#include "gflags/gflags.h" #include "mace/core/testing/test_benchmark.h" #include "mace/public/mace.h" -int main(int argc, char **argv) { - std::cout << "Running main() from test_main.cc\n"; - - mace::ConfigCPURuntime(4, mace::CPUPowerOption::HIGH_PERFORMANCE); - mace::ConfigOpenCLRuntime(mace::GPUType::ADRENO, mace::GPUPerfHint::PERF_HIGH, - mace::GPUPriorityHint::PRIORITY_HIGH); +DEFINE_string(pattern, "all", "op benchmark pattern, eg:.*CONV.*"); +DEFINE_string(gpu_type, "ADRENO", "ADRENO/MALI"); +DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); +DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); +DEFINE_int32(omp_num_threads, 1, "num of openmp threads"); +DEFINE_int32(cpu_power_option, 1, + "0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE"); - if (argc == 2) { - mace::testing::Benchmark::Run(argv[1]); +mace::GPUType ParseGPUType(const std::string &gpu_type_str) { + if (gpu_type_str.compare("ADRENO") == 0) { + return mace::GPUType::ADRENO; + } else if (gpu_type_str.compare("MALI") == 0) { + return mace::GPUType::MALI; } else { - mace::testing::Benchmark::Run("all"); + return mace::GPUType::ADRENO; } +} + +int main(int argc, char **argv) { + gflags::SetUsageMessage("some usage message"); + gflags::ParseCommandLineFlags(&argc, &argv, true); + + // config runtime + mace::GPUType gpu_type = ParseGPUType(FLAGS_gpu_type); + mace::ConfigOpenCLRuntime( + gpu_type, + static_cast(FLAGS_gpu_perf_hint), + static_cast(FLAGS_gpu_priority_hint)); + mace::ConfigOmpThreadsAndAffinity( + FLAGS_omp_num_threads, + static_cast(FLAGS_cpu_power_option)); + + mace::testing::Benchmark::Run(FLAGS_pattern.c_str()); return 0; } diff --git a/mace/examples/mace_run.cc b/mace/examples/mace_run.cc index db4378bf..68e986fb 100644 --- a/mace/examples/mace_run.cc +++ b/mace/examples/mace_run.cc @@ -171,8 +171,8 @@ DEFINE_int32(round, 1, "round"); DEFINE_int32(restart_round, 1, "restart round"); DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable"); DEFINE_string(gpu_type, "ADRENO", "ADRENO/MALI"); -DEFINE_int32(gpu_perf_hint, 2, "0:NA/1:LOW/2:NORMAL/3:HIGH"); -DEFINE_int32(gpu_priority_hint, 1, "0:NA/1:LOW/2:NORMAL/3:HIGH"); +DEFINE_int32(gpu_perf_hint, 2, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); +DEFINE_int32(gpu_priority_hint, 1, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(omp_num_threads, 8, "num of openmp threads"); DEFINE_int32(cpu_power_option, 0, "0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE"); @@ -199,7 +199,7 @@ bool SingleInputAndOutput(const std::vector &input_shape, static_cast(FLAGS_gpu_priority_hint)); } else if (device_type == DeviceType::CPU) { - mace::ConfigCPURuntime( + mace::ConfigOmpThreadsAndAffinity( FLAGS_omp_num_threads, static_cast(FLAGS_cpu_power_option)); } @@ -304,7 +304,7 @@ bool MultipleInputOrOutput(const std::vector &input_names, static_cast(FLAGS_gpu_priority_hint)); } else if (device_type == DeviceType::CPU) { - mace::ConfigCPURuntime( + mace::ConfigOmpThreadsAndAffinity( FLAGS_omp_num_threads, static_cast(FLAGS_cpu_power_option)); } diff --git a/mace/public/mace.h b/mace/public/mace.h index ec0873d1..3cf5f297 100644 --- a/mace/public/mace.h +++ b/mace/public/mace.h @@ -62,9 +62,14 @@ enum DataType { }; enum GPUType { ADRENO = 0, MALI = 1 }; -enum GPUPerfHint { PERF_NA = 0, PERF_LOW = 1, PERF_NORMAL = 2, PERF_HIGH = 3 }; +enum GPUPerfHint { + PERF_DEFAULT = 0, + PERF_LOW = 1, + PERF_NORMAL = 2, + PERF_HIGH = 3 +}; enum GPUPriorityHint { - PRIORITY_NA = 0, + PRIORITY_DEFAULT = 0, PRIORITY_LOW = 1, PRIORITY_NORMAL = 2, PRIORITY_HIGH = 3 @@ -381,7 +386,8 @@ struct MaceInputInfo { }; void ConfigOpenCLRuntime(GPUType, GPUPerfHint, GPUPriorityHint); -void ConfigCPURuntime(int omp_num_threads, CPUPowerOption power_option); +void ConfigOmpThreadsAndAffinity(int omp_num_threads, + CPUPowerOption power_option); class MaceEngine { public: -- GitLab