提交 fd0a439c 编写于 作者: L Liangliang He

Merge branch 'perf_config' into 'master'

fix runtime config

See merge request !298
...@@ -23,4 +23,4 @@ ops_benchmark: ...@@ -23,4 +23,4 @@ ops_benchmark:
only: only:
- master - master
script: script:
- FAILURE_PATTERN="Aborted" tools/bazel-adb-run.sh //mace/ops:ops_benchmark .*CONV.* - FAILURE_PATTERN="Aborted" tools/bazel-adb-run.sh //mace/ops:ops_benchmark --pattern=.*CONV.*
...@@ -205,8 +205,8 @@ DEFINE_bool(show_flops, true, "whether to estimate the model's FLOPs"); ...@@ -205,8 +205,8 @@ DEFINE_bool(show_flops, true, "whether to estimate the model's FLOPs");
DEFINE_int32(warmup_runs, 1, "how many runs to initialize model"); DEFINE_int32(warmup_runs, 1, "how many runs to initialize model");
DEFINE_string(model_data_file, "", "model data file name, used when EMBED_MODEL_DATA set to 0"); DEFINE_string(model_data_file, "", "model data file name, used when EMBED_MODEL_DATA set to 0");
DEFINE_string(gpu_type, "ADRENO", "ADRENO/MALI"); DEFINE_string(gpu_type, "ADRENO", "ADRENO/MALI");
DEFINE_int32(gpu_perf_hint, 2, "0:NA/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(gpu_perf_hint, 2, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 1, "0:NA/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(gpu_priority_hint, 1, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, 8, "num of openmp threads"); DEFINE_int32(omp_num_threads, 8, "num of openmp threads");
DEFINE_int32(cpu_power_option, 0, "0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE"); DEFINE_int32(cpu_power_option, 0, "0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE");
...@@ -266,7 +266,7 @@ int Main(int argc, char **argv) { ...@@ -266,7 +266,7 @@ int Main(int argc, char **argv) {
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint)); static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
} }
else if (device_type == CPU) { else if (device_type == CPU) {
mace::ConfigCPURuntime( mace::ConfigOmpThreadsAndAffinity(
FLAGS_omp_num_threads, FLAGS_omp_num_threads,
static_cast<CPUPowerOption>(FLAGS_cpu_power_option)); static_cast<CPUPowerOption>(FLAGS_cpu_power_option));
} }
......
...@@ -85,6 +85,7 @@ cc_library( ...@@ -85,6 +85,7 @@ cc_library(
], ],
deps = [ deps = [
":core", ":core",
"//external:gflags_nothreads",
"//mace/utils", "//mace/utils",
], ],
alwayslink = 1, alwayslink = 1,
......
...@@ -360,10 +360,11 @@ void ConfigOpenCLRuntime(GPUType gpu_type, ...@@ -360,10 +360,11 @@ void ConfigOpenCLRuntime(GPUType gpu_type,
OpenCLRuntime::CreateGlobal(gpu_type, gpu_perf_hint, gpu_priority_hint); OpenCLRuntime::CreateGlobal(gpu_type, gpu_perf_hint, gpu_priority_hint);
} }
void ConfigCPURuntime(int omp_num_threads, CPUPowerOption power_option) { void ConfigOmpThreadsAndAffinity(int omp_num_threads,
CPUPowerOption power_option) {
LOG(INFO) << "Config CPU Runtime: omp_num_threads: " << omp_num_threads LOG(INFO) << "Config CPU Runtime: omp_num_threads: " << omp_num_threads
<< ", cpu_power_option: " << power_option; << ", cpu_power_option: " << power_option;
SetCPURuntime(omp_num_threads, power_option); SetOmpThreadsAndAffinity(omp_num_threads, power_option);
} }
// Mace Engine // Mace Engine
......
...@@ -20,7 +20,7 @@ int GetCPUMaxFreq(int cpu_id) { ...@@ -20,7 +20,7 @@ int GetCPUMaxFreq(int cpu_id) {
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq",
cpu_id); cpu_id);
FILE *fp = fopen(path, "rb"); FILE *fp = fopen(path, "rb");
if (!fp) return 0; MACE_CHECK(fp, "File: ", path, " not exists");
int freq = 0; int freq = 0;
fscanf(fp, "%d", &freq); fscanf(fp, "%d", &freq);
...@@ -28,7 +28,7 @@ int GetCPUMaxFreq(int cpu_id) { ...@@ -28,7 +28,7 @@ int GetCPUMaxFreq(int cpu_id) {
return freq; return freq;
} }
void SortCPUIdsByMaxFreqAsc(std::vector<int> *cpu_ids) { void SortCPUIdsByMaxFreqAsc(std::vector<int> *cpu_ids, int *big_core_offset) {
MACE_CHECK_NOTNULL(cpu_ids); MACE_CHECK_NOTNULL(cpu_ids);
int cpu_count = cpu_ids->size(); int cpu_count = cpu_ids->size();
std::vector<int> cpu_max_freq; std::vector<int> cpu_max_freq;
...@@ -54,51 +54,61 @@ void SortCPUIdsByMaxFreqAsc(std::vector<int> *cpu_ids) { ...@@ -54,51 +54,61 @@ void SortCPUIdsByMaxFreqAsc(std::vector<int> *cpu_ids) {
} }
} }
} }
*big_core_offset = 0;
for (int i = 1; i < cpu_count; ++i) {
if (cpu_max_freq[i] > cpu_max_freq[i - 1]) {
*big_core_offset = i;
break;
}
}
} }
void SetThreadAffinity(cpu_set_t mask) { void SetThreadAffinity(cpu_set_t mask) {
int sys_call_res; int sys_call_res;
pid_t pid = gettid(); pid_t pid = gettid();
int err = sched_setaffinity(pid, sizeof(mask), &mask);
// TODO(chenghui): when set omp num threads to 1, MACE_CHECK(err == 0, "set affinity error: ", errno);
// sometiomes return EINVAL(22) error.
// https://linux.die.net/man/2/sched_setaffinity
sys_call_res = syscall(__NR_sched_setaffinity, pid, sizeof(mask), &mask);
if (sys_call_res != 0) {
LOG(FATAL) << "syscall setaffinity error: " << sys_call_res << ' ' << errno;
}
} }
} // namespace } // namespace
void SetCPURuntime(int omp_num_threads, CPUPowerOption power_option) { void SetOmpThreadsAndAffinity(int omp_num_threads,
CPUPowerOption power_option) {
int cpu_count = omp_get_num_procs(); int cpu_count = omp_get_num_procs();
LOG(INFO) << "cpu_count: " << cpu_count;
std::vector<int> sorted_cpu_ids; std::vector<int> sorted_cpu_ids;
sorted_cpu_ids.resize(cpu_count); sorted_cpu_ids.resize(cpu_count);
SortCPUIdsByMaxFreqAsc(&sorted_cpu_ids); int big_core_offset;
SortCPUIdsByMaxFreqAsc(&sorted_cpu_ids, &big_core_offset);
std::vector<int> use_cpu_ids; std::vector<int> use_cpu_ids;
if (power_option == CPUPowerOption::DEFAULT || omp_num_threads >= cpu_count) { if (power_option == CPUPowerOption::DEFAULT) {
use_cpu_ids = sorted_cpu_ids; use_cpu_ids = sorted_cpu_ids;
omp_num_threads = cpu_count;
} else if (power_option == CPUPowerOption::HIGH_PERFORMANCE) { } else if (power_option == CPUPowerOption::HIGH_PERFORMANCE) {
use_cpu_ids = use_cpu_ids = std::vector<int>(sorted_cpu_ids.begin() + big_core_offset,
std::vector<int>(sorted_cpu_ids.begin() + cpu_count - omp_num_threads,
sorted_cpu_ids.end()); sorted_cpu_ids.end());
} else { } else {
if (big_core_offset > 0) {
use_cpu_ids = std::vector<int>(sorted_cpu_ids.begin(), use_cpu_ids = std::vector<int>(sorted_cpu_ids.begin(),
sorted_cpu_ids.begin() + omp_num_threads); sorted_cpu_ids.begin() + big_core_offset);
} else {
use_cpu_ids = sorted_cpu_ids;
}
} }
if (omp_num_threads > use_cpu_ids.size()) {
LOG(WARNING) << "set omp num threads greater than num of cpus can use: "
<< use_cpu_ids.size();
}
omp_set_num_threads(omp_num_threads); omp_set_num_threads(omp_num_threads);
// compute mask // compute mask
cpu_set_t mask; cpu_set_t mask;
CPU_ZERO(&mask); CPU_ZERO(&mask);
for (auto cpu_id : use_cpu_ids) { for (auto cpu_id : use_cpu_ids) {
CPU_SET(cpu_id, &mask); CPU_SET(cpu_id, &mask);
} }
LOG(INFO) << "use cpus mask: " << mask.__bits[0]; VLOG(3) << "Set cpu affinity with mask: " << mask.__bits[0];
#pragma omp parallel for #pragma omp parallel for
for (int i = 0; i < omp_num_threads; ++i) { for (int i = 0; i < omp_num_threads; ++i) {
......
...@@ -10,7 +10,8 @@ ...@@ -10,7 +10,8 @@
namespace mace { namespace mace {
void SetCPURuntime(int omp_num_threads, CPUPowerOption power_option); void SetOmpThreadsAndAffinity(int omp_num_threads,
CPUPowerOption power_option);
} }
......
...@@ -65,20 +65,24 @@ void OpenCLProfilingTimer::ClearTiming() { ...@@ -65,20 +65,24 @@ void OpenCLProfilingTimer::ClearTiming() {
accumulated_micros_ = 0; accumulated_micros_ = 0;
} }
std::unique_ptr<OpenCLRuntime> OpenCLRuntime::runtime_instance_ = nullptr;
OpenCLRuntime *OpenCLRuntime::Global() { OpenCLRuntime *OpenCLRuntime::Global() {
if (opencl_runtime_instance == nullptr) { // FIXME: not thread safe
return CreateGlobal(GPUType::ADRENO, GPUPerfHint::PERF_NORMAL, if (runtime_instance_ == nullptr) {
GPUPriorityHint::PRIORITY_LOW); return CreateGlobal(GPUType::ADRENO, GPUPerfHint::PERF_DEFAULT,
GPUPriorityHint::PRIORITY_DEFAULT);
} }
return opencl_runtime_instance; return runtime_instance_.get();
} }
OpenCLRuntime *OpenCLRuntime::CreateGlobal(GPUType gpu_type, OpenCLRuntime *OpenCLRuntime::CreateGlobal(GPUType gpu_type,
GPUPerfHint gpu_perf_hint, GPUPerfHint gpu_perf_hint,
GPUPriorityHint gpu_priority_hint) { GPUPriorityHint gpu_priority_hint) {
opencl_runtime_instance = new OpenCLRuntime(gpu_type, gpu_perf_hint, runtime_instance_ =
gpu_priority_hint); std::unique_ptr<OpenCLRuntime>(new OpenCLRuntime(gpu_type, gpu_perf_hint,
return opencl_runtime_instance; gpu_priority_hint));
return runtime_instance_.get();
} }
void ParseOpenCLRuntimeConfig(std::vector<cl_context_properties> *properties, void ParseOpenCLRuntimeConfig(std::vector<cl_context_properties> *properties,
......
...@@ -52,10 +52,10 @@ class OpenCLRuntime { ...@@ -52,10 +52,10 @@ class OpenCLRuntime {
cl::Kernel BuildKernel(const std::string &program_name, cl::Kernel BuildKernel(const std::string &program_name,
const std::string &kernel_name, const std::string &kernel_name,
const std::set<std::string> &build_options); const std::set<std::string> &build_options);
~OpenCLRuntime();
private: private:
OpenCLRuntime(GPUType, GPUPerfHint, GPUPriorityHint); OpenCLRuntime(GPUType, GPUPerfHint, GPUPriorityHint);
~OpenCLRuntime();
OpenCLRuntime(const OpenCLRuntime &) = delete; OpenCLRuntime(const OpenCLRuntime &) = delete;
OpenCLRuntime &operator=(const OpenCLRuntime &) = delete; OpenCLRuntime &operator=(const OpenCLRuntime &) = delete;
...@@ -74,9 +74,9 @@ class OpenCLRuntime { ...@@ -74,9 +74,9 @@ class OpenCLRuntime {
std::map<std::string, cl::Program> built_program_map_; std::map<std::string, cl::Program> built_program_map_;
std::mutex program_build_mutex_; std::mutex program_build_mutex_;
std::string kernel_path_; std::string kernel_path_;
static std::unique_ptr<OpenCLRuntime> runtime_instance_;
}; };
static OpenCLRuntime *opencl_runtime_instance = nullptr;
} // namespace mace } // namespace mace
#endif // MACE_CORE_RUNTIME_OPENCL_OPENCL_RUNTIME_H_ #endif // MACE_CORE_RUNTIME_OPENCL_OPENCL_RUNTIME_H_
...@@ -4,20 +4,42 @@ ...@@ -4,20 +4,42 @@
#include <iostream> #include <iostream>
#include "gflags/gflags.h"
#include "mace/core/testing/test_benchmark.h" #include "mace/core/testing/test_benchmark.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
int main(int argc, char **argv) { DEFINE_string(pattern, "all", "op benchmark pattern, eg:.*CONV.*");
std::cout << "Running main() from test_main.cc\n"; DEFINE_string(gpu_type, "ADRENO", "ADRENO/MALI");
DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
mace::ConfigCPURuntime(4, mace::CPUPowerOption::HIGH_PERFORMANCE); DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
mace::ConfigOpenCLRuntime(mace::GPUType::ADRENO, mace::GPUPerfHint::PERF_HIGH, DEFINE_int32(omp_num_threads, 1, "num of openmp threads");
mace::GPUPriorityHint::PRIORITY_HIGH); DEFINE_int32(cpu_power_option, 1,
"0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE");
if (argc == 2) { mace::GPUType ParseGPUType(const std::string &gpu_type_str) {
mace::testing::Benchmark::Run(argv[1]); if (gpu_type_str.compare("ADRENO") == 0) {
return mace::GPUType::ADRENO;
} else if (gpu_type_str.compare("MALI") == 0) {
return mace::GPUType::MALI;
} else { } else {
mace::testing::Benchmark::Run("all"); return mace::GPUType::ADRENO;
} }
}
int main(int argc, char **argv) {
gflags::SetUsageMessage("some usage message");
gflags::ParseCommandLineFlags(&argc, &argv, true);
// config runtime
mace::GPUType gpu_type = ParseGPUType(FLAGS_gpu_type);
mace::ConfigOpenCLRuntime(
gpu_type,
static_cast<mace::GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<mace::GPUPriorityHint>(FLAGS_gpu_priority_hint));
mace::ConfigOmpThreadsAndAffinity(
FLAGS_omp_num_threads,
static_cast<mace::CPUPowerOption>(FLAGS_cpu_power_option));
mace::testing::Benchmark::Run(FLAGS_pattern.c_str());
return 0; return 0;
} }
...@@ -171,8 +171,8 @@ DEFINE_int32(round, 1, "round"); ...@@ -171,8 +171,8 @@ DEFINE_int32(round, 1, "round");
DEFINE_int32(restart_round, 1, "restart round"); DEFINE_int32(restart_round, 1, "restart round");
DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable"); DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable");
DEFINE_string(gpu_type, "ADRENO", "ADRENO/MALI"); DEFINE_string(gpu_type, "ADRENO", "ADRENO/MALI");
DEFINE_int32(gpu_perf_hint, 2, "0:NA/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(gpu_perf_hint, 2, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 1, "0:NA/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(gpu_priority_hint, 1, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, 8, "num of openmp threads"); DEFINE_int32(omp_num_threads, 8, "num of openmp threads");
DEFINE_int32(cpu_power_option, 0, "0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE"); DEFINE_int32(cpu_power_option, 0, "0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE");
...@@ -199,7 +199,7 @@ bool SingleInputAndOutput(const std::vector<int64_t> &input_shape, ...@@ -199,7 +199,7 @@ bool SingleInputAndOutput(const std::vector<int64_t> &input_shape,
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint)); static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
} }
else if (device_type == DeviceType::CPU) { else if (device_type == DeviceType::CPU) {
mace::ConfigCPURuntime( mace::ConfigOmpThreadsAndAffinity(
FLAGS_omp_num_threads, FLAGS_omp_num_threads,
static_cast<CPUPowerOption>(FLAGS_cpu_power_option)); static_cast<CPUPowerOption>(FLAGS_cpu_power_option));
} }
...@@ -304,7 +304,7 @@ bool MultipleInputOrOutput(const std::vector<std::string> &input_names, ...@@ -304,7 +304,7 @@ bool MultipleInputOrOutput(const std::vector<std::string> &input_names,
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint)); static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
} }
else if (device_type == DeviceType::CPU) { else if (device_type == DeviceType::CPU) {
mace::ConfigCPURuntime( mace::ConfigOmpThreadsAndAffinity(
FLAGS_omp_num_threads, FLAGS_omp_num_threads,
static_cast<CPUPowerOption>(FLAGS_cpu_power_option)); static_cast<CPUPowerOption>(FLAGS_cpu_power_option));
} }
......
...@@ -62,9 +62,14 @@ enum DataType { ...@@ -62,9 +62,14 @@ enum DataType {
}; };
enum GPUType { ADRENO = 0, MALI = 1 }; enum GPUType { ADRENO = 0, MALI = 1 };
enum GPUPerfHint { PERF_NA = 0, PERF_LOW = 1, PERF_NORMAL = 2, PERF_HIGH = 3 }; enum GPUPerfHint {
PERF_DEFAULT = 0,
PERF_LOW = 1,
PERF_NORMAL = 2,
PERF_HIGH = 3
};
enum GPUPriorityHint { enum GPUPriorityHint {
PRIORITY_NA = 0, PRIORITY_DEFAULT = 0,
PRIORITY_LOW = 1, PRIORITY_LOW = 1,
PRIORITY_NORMAL = 2, PRIORITY_NORMAL = 2,
PRIORITY_HIGH = 3 PRIORITY_HIGH = 3
...@@ -381,7 +386,8 @@ struct MaceInputInfo { ...@@ -381,7 +386,8 @@ struct MaceInputInfo {
}; };
void ConfigOpenCLRuntime(GPUType, GPUPerfHint, GPUPriorityHint); void ConfigOpenCLRuntime(GPUType, GPUPerfHint, GPUPriorityHint);
void ConfigCPURuntime(int omp_num_threads, CPUPowerOption power_option); void ConfigOmpThreadsAndAffinity(int omp_num_threads,
CPUPowerOption power_option);
class MaceEngine { class MaceEngine {
public: public:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册