提交 6fcf7b6e 编写于 作者: L Liangliang He

Merge branch 'perf_config' into 'master'

fix cpu config

See merge request !341
......@@ -214,7 +214,7 @@ DEFINE_string(model_data_file, "",
"model data file name, used when EMBED_MODEL_DATA set to 0");
DEFINE_int32(gpu_perf_hint, 2, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 1, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, 8, "num of openmp threads");
DEFINE_int32(omp_num_threads, 4, "num of openmp threads");
DEFINE_int32(cpu_power_option, 0,
"0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE");
......@@ -269,14 +269,13 @@ int Main(int argc, char **argv) {
}
// config runtime
mace::ConfigOmpThreads(FLAGS_omp_num_threads);
mace::ConfigCPUPowerOption(
static_cast<CPUPowerOption>(FLAGS_cpu_power_option));
if (device_type == OPENCL) {
mace::ConfigOpenCLRuntime(
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
} else if (device_type == CPU || device_type == NEON) {
mace::ConfigOmpThreadsAndAffinity(
FLAGS_omp_num_threads,
static_cast<CPUPowerOption>(FLAGS_cpu_power_option));
}
std::vector<std::string> input_names =
......
......@@ -15,11 +15,14 @@ void ConfigOpenCLRuntime(GPUPerfHint gpu_perf_hint,
OpenCLRuntime::Configure(gpu_perf_hint, gpu_priority_hint);
}
void ConfigOmpThreadsAndAffinity(int omp_num_threads,
CPUPowerOption power_option) {
VLOG(1) << "Config CPU Runtime: omp_num_threads: " << omp_num_threads
<< ", cpu_power_option: " << power_option;
SetOmpThreadsAndAffinity(omp_num_threads, power_option);
void ConfigOmpThreads(int omp_num_threads) {
VLOG(1) << "Config CPU omp_num_threads: " << omp_num_threads;
SetOmpThreads(omp_num_threads);
}
void ConfigCPUPowerOption(CPUPowerOption power_option) {
VLOG(1) << "Config CPU power option" << power_option;
SetThreadsAffinity(power_option);
}
}; // namespace mace
......@@ -20,7 +20,10 @@ int GetCPUMaxFreq(int cpu_id) {
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq",
cpu_id);
FILE *fp = fopen(path, "rb");
MACE_CHECK(fp, "File: ", path, " not exists");
if (!fp) {
LOG(WARNING) << "File: " << path << " not exists.";
return 0;
}
int freq = 0;
fscanf(fp, "%d", &freq);
......@@ -73,8 +76,21 @@ void SetThreadAffinity(cpu_set_t mask) {
} // namespace
void SetOmpThreadsAndAffinity(int omp_num_threads,
CPUPowerOption power_option) {
void SetOmpThreads(int omp_num_threads) {
int cpu_count = omp_get_num_procs();
if (omp_num_threads > cpu_count) {
LOG(WARNING) << "set omp num threads greater than num of cpus can use: "
<< cpu_count;
}
omp_set_num_threads(omp_num_threads);
}
void SetThreadsAffinity(CPUPowerOption power_option) {
// There is no need to set affinity in default mode
if (power_option == CPUPowerOption::DEFAULT) {
return;
}
int cpu_count = omp_get_num_procs();
std::vector<int> sorted_cpu_ids;
sorted_cpu_ids.resize(cpu_count);
......@@ -82,9 +98,7 @@ void SetOmpThreadsAndAffinity(int omp_num_threads,
SortCPUIdsByMaxFreqAsc(&sorted_cpu_ids, &big_core_offset);
std::vector<int> use_cpu_ids;
if (power_option == CPUPowerOption::DEFAULT) {
use_cpu_ids = sorted_cpu_ids;
} else if (power_option == CPUPowerOption::HIGH_PERFORMANCE) {
if (power_option == CPUPowerOption::HIGH_PERFORMANCE) {
use_cpu_ids = std::vector<int>(sorted_cpu_ids.begin() + big_core_offset,
sorted_cpu_ids.end());
} else {
......@@ -96,12 +110,6 @@ void SetOmpThreadsAndAffinity(int omp_num_threads,
}
}
if (omp_num_threads > use_cpu_ids.size()) {
LOG(WARNING) << "set omp num threads greater than num of cpus can use: "
<< use_cpu_ids.size();
}
omp_set_num_threads(omp_num_threads);
// compute mask
cpu_set_t mask;
CPU_ZERO(&mask);
......@@ -110,6 +118,7 @@ void SetOmpThreadsAndAffinity(int omp_num_threads,
}
VLOG(3) << "Set cpu affinity with mask: " << mask.__bits[0];
int omp_num_threads = omp_get_max_threads();
#pragma omp parallel for
for (int i = 0; i < omp_num_threads; ++i) {
SetThreadAffinity(mask);
......
......@@ -10,8 +10,9 @@
namespace mace {
void SetOmpThreadsAndAffinity(int omp_num_threads,
CPUPowerOption power_option);
void SetOmpThreads(int omp_num_threads);
void SetThreadsAffinity(CPUPowerOption power_option);
}
......
......@@ -21,12 +21,12 @@ int main(int argc, char **argv) {
gflags::ParseCommandLineFlags(&argc, &argv, true);
// config runtime
mace::ConfigOmpThreads(FLAGS_omp_num_threads);
mace::ConfigCPUPowerOption(
static_cast<mace::CPUPowerOption>(FLAGS_cpu_power_option));
mace::ConfigOpenCLRuntime(
static_cast<mace::GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<mace::GPUPriorityHint>(FLAGS_gpu_priority_hint));
mace::ConfigOmpThreadsAndAffinity(
FLAGS_omp_num_threads,
static_cast<mace::CPUPowerOption>(FLAGS_cpu_power_option));
mace::testing::Benchmark::Run(FLAGS_pattern.c_str());
return 0;
......
......@@ -196,14 +196,13 @@ bool RunModel(const std::vector<std::string> &input_names,
LOG(INFO) << "Runing with device type: " << device_type;
// config runtime
mace::ConfigOmpThreads(FLAGS_omp_num_threads);
mace::ConfigCPUPowerOption(
static_cast<CPUPowerOption>(FLAGS_cpu_power_option));
if (device_type == DeviceType::OPENCL) {
mace::ConfigOpenCLRuntime(
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
} else if (device_type == DeviceType::CPU) {
mace::ConfigOmpThreadsAndAffinity(
FLAGS_omp_num_threads,
static_cast<CPUPowerOption>(FLAGS_cpu_power_option));
}
// Init model
......
......@@ -27,8 +27,8 @@ enum GPUPriorityHint {
enum CPUPowerOption { DEFAULT = 0, HIGH_PERFORMANCE = 1, BATTERY_SAVE = 2 };
void ConfigOpenCLRuntime(GPUPerfHint, GPUPriorityHint);
void ConfigOmpThreadsAndAffinity(int omp_num_threads,
CPUPowerOption power_option);
void ConfigOmpThreads(int omp_num_threads);
void ConfigCPUPowerOption(CPUPowerOption power_option);
} // namespace mace
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册