提交 fd0a439c 编写于 作者: L Liangliang He

Merge branch 'perf_config' into 'master'

fix runtime config

See merge request !298
......@@ -23,4 +23,4 @@ ops_benchmark:
only:
- master
script:
- FAILURE_PATTERN="Aborted" tools/bazel-adb-run.sh //mace/ops:ops_benchmark .*CONV.*
- FAILURE_PATTERN="Aborted" tools/bazel-adb-run.sh //mace/ops:ops_benchmark --pattern=.*CONV.*
......@@ -205,8 +205,8 @@ DEFINE_bool(show_flops, true, "whether to estimate the model's FLOPs");
DEFINE_int32(warmup_runs, 1, "how many runs to initialize model");
DEFINE_string(model_data_file, "", "model data file name, used when EMBED_MODEL_DATA set to 0");
DEFINE_string(gpu_type, "ADRENO", "ADRENO/MALI");
DEFINE_int32(gpu_perf_hint, 2, "0:NA/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 1, "0:NA/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_perf_hint, 2, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 1, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, 8, "num of openmp threads");
DEFINE_int32(cpu_power_option, 0, "0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE");
......@@ -266,7 +266,7 @@ int Main(int argc, char **argv) {
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
}
else if (device_type == CPU) {
mace::ConfigCPURuntime(
mace::ConfigOmpThreadsAndAffinity(
FLAGS_omp_num_threads,
static_cast<CPUPowerOption>(FLAGS_cpu_power_option));
}
......
......@@ -85,6 +85,7 @@ cc_library(
],
deps = [
":core",
"//external:gflags_nothreads",
"//mace/utils",
],
alwayslink = 1,
......
......@@ -360,10 +360,11 @@ void ConfigOpenCLRuntime(GPUType gpu_type,
OpenCLRuntime::CreateGlobal(gpu_type, gpu_perf_hint, gpu_priority_hint);
}
void ConfigCPURuntime(int omp_num_threads, CPUPowerOption power_option) {
void ConfigOmpThreadsAndAffinity(int omp_num_threads,
CPUPowerOption power_option) {
LOG(INFO) << "Config CPU Runtime: omp_num_threads: " << omp_num_threads
<< ", cpu_power_option: " << power_option;
SetCPURuntime(omp_num_threads, power_option);
SetOmpThreadsAndAffinity(omp_num_threads, power_option);
}
// Mace Engine
......
......@@ -20,7 +20,7 @@ int GetCPUMaxFreq(int cpu_id) {
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq",
cpu_id);
FILE *fp = fopen(path, "rb");
if (!fp) return 0;
MACE_CHECK(fp, "File: ", path, " not exists");
int freq = 0;
fscanf(fp, "%d", &freq);
......@@ -28,7 +28,7 @@ int GetCPUMaxFreq(int cpu_id) {
return freq;
}
void SortCPUIdsByMaxFreqAsc(std::vector<int> *cpu_ids) {
void SortCPUIdsByMaxFreqAsc(std::vector<int> *cpu_ids, int *big_core_offset) {
MACE_CHECK_NOTNULL(cpu_ids);
int cpu_count = cpu_ids->size();
std::vector<int> cpu_max_freq;
......@@ -54,51 +54,61 @@ void SortCPUIdsByMaxFreqAsc(std::vector<int> *cpu_ids) {
}
}
}
*big_core_offset = 0;
for (int i = 1; i < cpu_count; ++i) {
if (cpu_max_freq[i] > cpu_max_freq[i - 1]) {
*big_core_offset = i;
break;
}
}
}
void SetThreadAffinity(cpu_set_t mask) {
int sys_call_res;
pid_t pid = gettid();
// TODO(chenghui): when set omp num threads to 1,
// sometiomes return EINVAL(22) error.
// https://linux.die.net/man/2/sched_setaffinity
sys_call_res = syscall(__NR_sched_setaffinity, pid, sizeof(mask), &mask);
if (sys_call_res != 0) {
LOG(FATAL) << "syscall setaffinity error: " << sys_call_res << ' ' << errno;
}
int err = sched_setaffinity(pid, sizeof(mask), &mask);
MACE_CHECK(err == 0, "set affinity error: ", errno);
}
} // namespace
void SetCPURuntime(int omp_num_threads, CPUPowerOption power_option) {
void SetOmpThreadsAndAffinity(int omp_num_threads,
CPUPowerOption power_option) {
int cpu_count = omp_get_num_procs();
LOG(INFO) << "cpu_count: " << cpu_count;
std::vector<int> sorted_cpu_ids;
sorted_cpu_ids.resize(cpu_count);
SortCPUIdsByMaxFreqAsc(&sorted_cpu_ids);
int big_core_offset;
SortCPUIdsByMaxFreqAsc(&sorted_cpu_ids, &big_core_offset);
std::vector<int> use_cpu_ids;
if (power_option == CPUPowerOption::DEFAULT || omp_num_threads >= cpu_count) {
if (power_option == CPUPowerOption::DEFAULT) {
use_cpu_ids = sorted_cpu_ids;
omp_num_threads = cpu_count;
} else if (power_option == CPUPowerOption::HIGH_PERFORMANCE) {
use_cpu_ids =
std::vector<int>(sorted_cpu_ids.begin() + cpu_count - omp_num_threads,
sorted_cpu_ids.end());
use_cpu_ids = std::vector<int>(sorted_cpu_ids.begin() + big_core_offset,
sorted_cpu_ids.end());
} else {
use_cpu_ids = std::vector<int>(sorted_cpu_ids.begin(),
sorted_cpu_ids.begin() + omp_num_threads);
if (big_core_offset > 0) {
use_cpu_ids = std::vector<int>(sorted_cpu_ids.begin(),
sorted_cpu_ids.begin() + big_core_offset);
} else {
use_cpu_ids = sorted_cpu_ids;
}
}
if (omp_num_threads > use_cpu_ids.size()) {
LOG(WARNING) << "set omp num threads greater than num of cpus can use: "
<< use_cpu_ids.size();
}
omp_set_num_threads(omp_num_threads);
// compute mask
cpu_set_t mask;
CPU_ZERO(&mask);
for (auto cpu_id : use_cpu_ids) {
CPU_SET(cpu_id, &mask);
}
LOG(INFO) << "use cpus mask: " << mask.__bits[0];
VLOG(3) << "Set cpu affinity with mask: " << mask.__bits[0];
#pragma omp parallel for
for (int i = 0; i < omp_num_threads; ++i) {
......
......@@ -10,7 +10,8 @@
namespace mace {
void SetCPURuntime(int omp_num_threads, CPUPowerOption power_option);
void SetOmpThreadsAndAffinity(int omp_num_threads,
CPUPowerOption power_option);
}
......
......@@ -65,20 +65,24 @@ void OpenCLProfilingTimer::ClearTiming() {
accumulated_micros_ = 0;
}
std::unique_ptr<OpenCLRuntime> OpenCLRuntime::runtime_instance_ = nullptr;
OpenCLRuntime *OpenCLRuntime::Global() {
if (opencl_runtime_instance == nullptr) {
return CreateGlobal(GPUType::ADRENO, GPUPerfHint::PERF_NORMAL,
GPUPriorityHint::PRIORITY_LOW);
// FIXME: not thread safe
if (runtime_instance_ == nullptr) {
return CreateGlobal(GPUType::ADRENO, GPUPerfHint::PERF_DEFAULT,
GPUPriorityHint::PRIORITY_DEFAULT);
}
return opencl_runtime_instance;
return runtime_instance_.get();
}
OpenCLRuntime *OpenCLRuntime::CreateGlobal(GPUType gpu_type,
GPUPerfHint gpu_perf_hint,
GPUPriorityHint gpu_priority_hint) {
opencl_runtime_instance = new OpenCLRuntime(gpu_type, gpu_perf_hint,
gpu_priority_hint);
return opencl_runtime_instance;
runtime_instance_ =
std::unique_ptr<OpenCLRuntime>(new OpenCLRuntime(gpu_type, gpu_perf_hint,
gpu_priority_hint));
return runtime_instance_.get();
}
void ParseOpenCLRuntimeConfig(std::vector<cl_context_properties> *properties,
......
......@@ -52,10 +52,10 @@ class OpenCLRuntime {
cl::Kernel BuildKernel(const std::string &program_name,
const std::string &kernel_name,
const std::set<std::string> &build_options);
~OpenCLRuntime();
private:
OpenCLRuntime(GPUType, GPUPerfHint, GPUPriorityHint);
~OpenCLRuntime();
OpenCLRuntime(const OpenCLRuntime &) = delete;
OpenCLRuntime &operator=(const OpenCLRuntime &) = delete;
......@@ -74,9 +74,9 @@ class OpenCLRuntime {
std::map<std::string, cl::Program> built_program_map_;
std::mutex program_build_mutex_;
std::string kernel_path_;
static std::unique_ptr<OpenCLRuntime> runtime_instance_;
};
static OpenCLRuntime *opencl_runtime_instance = nullptr;
} // namespace mace
#endif // MACE_CORE_RUNTIME_OPENCL_OPENCL_RUNTIME_H_
......@@ -4,20 +4,42 @@
#include <iostream>
#include "gflags/gflags.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/public/mace.h"
int main(int argc, char **argv) {
std::cout << "Running main() from test_main.cc\n";
mace::ConfigCPURuntime(4, mace::CPUPowerOption::HIGH_PERFORMANCE);
mace::ConfigOpenCLRuntime(mace::GPUType::ADRENO, mace::GPUPerfHint::PERF_HIGH,
mace::GPUPriorityHint::PRIORITY_HIGH);
DEFINE_string(pattern, "all", "op benchmark pattern, eg:.*CONV.*");
DEFINE_string(gpu_type, "ADRENO", "ADRENO/MALI");
DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, 1, "num of openmp threads");
DEFINE_int32(cpu_power_option, 1,
"0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE");
if (argc == 2) {
mace::testing::Benchmark::Run(argv[1]);
mace::GPUType ParseGPUType(const std::string &gpu_type_str) {
if (gpu_type_str.compare("ADRENO") == 0) {
return mace::GPUType::ADRENO;
} else if (gpu_type_str.compare("MALI") == 0) {
return mace::GPUType::MALI;
} else {
mace::testing::Benchmark::Run("all");
return mace::GPUType::ADRENO;
}
}
int main(int argc, char **argv) {
gflags::SetUsageMessage("some usage message");
gflags::ParseCommandLineFlags(&argc, &argv, true);
// config runtime
mace::GPUType gpu_type = ParseGPUType(FLAGS_gpu_type);
mace::ConfigOpenCLRuntime(
gpu_type,
static_cast<mace::GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<mace::GPUPriorityHint>(FLAGS_gpu_priority_hint));
mace::ConfigOmpThreadsAndAffinity(
FLAGS_omp_num_threads,
static_cast<mace::CPUPowerOption>(FLAGS_cpu_power_option));
mace::testing::Benchmark::Run(FLAGS_pattern.c_str());
return 0;
}
......@@ -171,8 +171,8 @@ DEFINE_int32(round, 1, "round");
DEFINE_int32(restart_round, 1, "restart round");
DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable");
DEFINE_string(gpu_type, "ADRENO", "ADRENO/MALI");
DEFINE_int32(gpu_perf_hint, 2, "0:NA/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 1, "0:NA/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_perf_hint, 2, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 1, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, 8, "num of openmp threads");
DEFINE_int32(cpu_power_option, 0, "0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE");
......@@ -199,7 +199,7 @@ bool SingleInputAndOutput(const std::vector<int64_t> &input_shape,
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
}
else if (device_type == DeviceType::CPU) {
mace::ConfigCPURuntime(
mace::ConfigOmpThreadsAndAffinity(
FLAGS_omp_num_threads,
static_cast<CPUPowerOption>(FLAGS_cpu_power_option));
}
......@@ -304,7 +304,7 @@ bool MultipleInputOrOutput(const std::vector<std::string> &input_names,
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
}
else if (device_type == DeviceType::CPU) {
mace::ConfigCPURuntime(
mace::ConfigOmpThreadsAndAffinity(
FLAGS_omp_num_threads,
static_cast<CPUPowerOption>(FLAGS_cpu_power_option));
}
......
......@@ -62,9 +62,14 @@ enum DataType {
};
enum GPUType { ADRENO = 0, MALI = 1 };
enum GPUPerfHint { PERF_NA = 0, PERF_LOW = 1, PERF_NORMAL = 2, PERF_HIGH = 3 };
enum GPUPerfHint {
PERF_DEFAULT = 0,
PERF_LOW = 1,
PERF_NORMAL = 2,
PERF_HIGH = 3
};
enum GPUPriorityHint {
PRIORITY_NA = 0,
PRIORITY_DEFAULT = 0,
PRIORITY_LOW = 1,
PRIORITY_NORMAL = 2,
PRIORITY_HIGH = 3
......@@ -381,7 +386,8 @@ struct MaceInputInfo {
};
void ConfigOpenCLRuntime(GPUType, GPUPerfHint, GPUPriorityHint);
void ConfigCPURuntime(int omp_num_threads, CPUPowerOption power_option);
void ConfigOmpThreadsAndAffinity(int omp_num_threads,
CPUPowerOption power_option);
class MaceEngine {
public:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册