提交 b53ddcc9 编写于 作者: W wuchenghui

refactor mace runtime api

上级 7d53c473
......@@ -215,11 +215,11 @@ DEFINE_bool(show_flops, true, "whether to estimate the model's FLOPs");
DEFINE_int32(warmup_runs, 1, "how many runs to initialize model");
DEFINE_string(model_data_file, "",
"model data file name, used when EMBED_MODEL_DATA set to 0");
DEFINE_int32(gpu_perf_hint, 0, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 0, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, 4, "num of openmp threads");
DEFINE_int32(cpu_power_option, 0,
"0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE");
DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, -1, "num of openmp threads");
DEFINE_int32(cpu_affinity_policy, 1,
"0:AFFINITY_DEFAULT/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY");
int Main(int argc, char **argv) {
MACE_CHECK(FLAGS_device != "HEXAGON",
......@@ -232,7 +232,7 @@ int Main(int argc, char **argv) {
LOG(INFO) << "gpu_perf_hint: [" << FLAGS_gpu_perf_hint << "]";
LOG(INFO) << "gpu_priority_hint: [" << FLAGS_gpu_priority_hint << "]";
LOG(INFO) << "omp_num_threads: [" << FLAGS_omp_num_threads << "]";
LOG(INFO) << "cpu_power_option: [" << FLAGS_cpu_power_option << "]";
LOG(INFO) << "cpu_affinity_policy: [" << FLAGS_cpu_affinity_policy << "]";
LOG(INFO) << "Input node: [" << FLAGS_input_node<< "]";
LOG(INFO) << "Input shapes: [" << FLAGS_input_shape << "]";
LOG(INFO) << "Output node: [" << FLAGS_output_node<< "]";
......@@ -267,11 +267,11 @@ int Main(int argc, char **argv) {
mace::DeviceType device_type = ParseDeviceType(FLAGS_device);
// config runtime
mace::ConfigOmpThreads(FLAGS_omp_num_threads);
mace::ConfigCPUPowerOption(
static_cast<CPUPowerOption>(FLAGS_cpu_power_option));
if (device_type == OPENCL) {
mace::ConfigOpenCLRuntime(
mace::SetOpenMPThreadPolicy(
FLAGS_omp_num_threads,
static_cast<CPUAffinityPolicy >(FLAGS_cpu_affinity_policy));
if (device_type == DeviceType::OPENCL) {
mace::SetGPUHints(
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
}
......
......@@ -10,26 +10,31 @@ namespace mace {
std::shared_ptr<KVStorageFactory> kStorageFactory = nullptr;
void ConfigOpenCLRuntime(GPUPerfHint gpu_perf_hint,
GPUPriorityHint gpu_priority_hint) {
void SetGPUHints(GPUPerfHint gpu_perf_hint, GPUPriorityHint gpu_priority_hint) {
VLOG(1) << "Set GPU configurations, gpu_perf_hint: " << gpu_perf_hint
<< ", gpu_priority_hint: " << gpu_priority_hint;
OpenCLRuntime::Configure(gpu_perf_hint, gpu_priority_hint);
}
void ConfigKVStorageFactory(std::shared_ptr<KVStorageFactory> storage_factory) {
void SetKVStorageFactory(std::shared_ptr<KVStorageFactory> storage_factory) {
VLOG(1) << "Set internal KV Storage Engine";
kStorageFactory = storage_factory;
}
void ConfigOmpThreads(int omp_num_threads) {
VLOG(1) << "Config CPU omp_num_threads: " << omp_num_threads;
SetOmpThreads(omp_num_threads);
MaceStatus SetOpenMPThreadPolicy(int num_threads_hint,
CPUAffinityPolicy policy) {
VLOG(1) << "Set CPU openmp num_threads_hint: " << num_threads_hint
<< ", affinity policy: " << policy;
return SetOpenMPThreadsAndAffinityPolicy(num_threads_hint, policy);
}
void ConfigCPUPowerOption(CPUPowerOption power_option) {
VLOG(1) << "Config CPU power option" << power_option;
SetThreadsAffinity(power_option);
void SetOpenMPThreadAffinity(int num_threads, const std::vector<int> &cpu_ids) {
return SetOpenMPThreadsAndAffinityCPUs(num_threads, cpu_ids);
}
MaceStatus GetBigLittleCoreIDs(std::vector<int> *big_core_ids,
std::vector<int> *little_core_ids) {
return GetCPUBigLittleCoreIDs(big_core_ids, little_core_ids);
}
}; // namespace mace
......@@ -2,12 +2,14 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/runtime/cpu/cpu_runtime.h"
#include <omp.h>
#include <sys/syscall.h>
#include <unistd.h>
#include <algorithm>
#include <utility>
#include <vector>
#include "mace/core/runtime/cpu/cpu_runtime.h"
#include "mace/public/mace.h"
#include "mace/utils/logging.h"
namespace mace {
......@@ -76,54 +78,98 @@ void SetThreadAffinity(cpu_set_t mask) {
} // namespace
void SetOmpThreads(int omp_num_threads) {
MaceStatus GetCPUBigLittleCoreIDs(std::vector<int> *big_core_ids,
std::vector<int> *little_core_ids) {
MACE_CHECK_NOTNULL(big_core_ids);
MACE_CHECK_NOTNULL(little_core_ids);
int cpu_count = omp_get_num_procs();
if (omp_num_threads > cpu_count) {
LOG(WARNING) << "set omp num threads greater than num of cpus can use: "
<< cpu_count;
}
omp_set_num_threads(omp_num_threads);
}
std::vector<int> cpu_max_freq(cpu_count);
std::vector<int> cpu_ids(cpu_count);
void SetThreadsAffinity(CPUPowerOption power_option) {
// There is no need to set affinity in default mode
if (power_option == CPUPowerOption::DEFAULT) {
return;
// set cpu max frequency
for (int i = 0; i < cpu_count; ++i) {
cpu_max_freq[i] = GetCPUMaxFreq(i);
if (cpu_max_freq[i] == 0) {
LOG(WARNING) << "Cannot get cpu" << i
<< "'s max frequency info, maybe it is offline.";
return MACE_INVALID_ARGS;
}
cpu_ids[i] = i;
}
int cpu_count = omp_get_num_procs();
std::vector<int> sorted_cpu_ids;
sorted_cpu_ids.resize(cpu_count);
int big_core_offset;
SortCPUIdsByMaxFreqAsc(&sorted_cpu_ids, &big_core_offset);
// sort cpu ids by max frequency asc
std::sort(cpu_ids.begin(), cpu_ids.end(),
[&cpu_max_freq](int a, int b) {
return cpu_max_freq[a] < cpu_max_freq[b];
});
std::vector<int> use_cpu_ids;
if (power_option == CPUPowerOption::HIGH_PERFORMANCE) {
use_cpu_ids = std::vector<int>(sorted_cpu_ids.begin() + big_core_offset,
sorted_cpu_ids.end());
} else {
if (big_core_offset > 0) {
use_cpu_ids = std::vector<int>(sorted_cpu_ids.begin(),
sorted_cpu_ids.begin() + big_core_offset);
} else {
use_cpu_ids = sorted_cpu_ids;
big_core_ids->reserve(cpu_count);
little_core_ids->reserve(cpu_count);
int little_core_freq = cpu_max_freq.front();
int big_core_freq = cpu_max_freq.back();
for (int i = 0; i < cpu_count; ++i) {
if (cpu_max_freq[i] == little_core_freq) {
little_core_ids->push_back(cpu_ids[i]);
}
if (cpu_max_freq[i] == big_core_freq) {
big_core_ids->push_back(cpu_ids[i]);
}
}
return MACE_SUCCESS;
}
void SetOpenMPThreadsAndAffinityCPUs(int omp_num_threads,
const std::vector<int> &cpu_ids) {
std::ostringstream oss;
for (auto cpu_id : cpu_ids) oss << cpu_id << ' ';
VLOG(1) << "Set CPU openmp num_threads: " << omp_num_threads
<< ", cpu_ids: " << oss.str();
omp_set_num_threads(omp_num_threads);
// compute mask
cpu_set_t mask;
CPU_ZERO(&mask);
for (auto cpu_id : use_cpu_ids) {
for (auto cpu_id : cpu_ids) {
CPU_SET(cpu_id, &mask);
}
VLOG(3) << "Set cpu affinity with mask: " << mask.__bits[0];
int omp_num_threads = omp_get_max_threads();
#pragma omp parallel for
for (int i = 0; i < omp_num_threads; ++i) {
SetThreadAffinity(mask);
}
}
MaceStatus SetOpenMPThreadsAndAffinityPolicy(int omp_num_threads_hint,
CPUAffinityPolicy policy) {
// There is no need to set affinity in default mode
if (policy == CPUAffinityPolicy::AFFINITY_DEFAULT) {
if (omp_num_threads_hint > 0) omp_set_num_threads(omp_num_threads_hint);
return MACE_SUCCESS;
}
std::vector<int> big_core_ids;
std::vector<int> little_core_ids;
MaceStatus res = GetCPUBigLittleCoreIDs(&big_core_ids, &little_core_ids);
if (res != MACE_SUCCESS) {
return res;
}
std::vector<int> use_cpu_ids;
if (policy == CPUAffinityPolicy::AFFINITY_BIG_ONLY) {
use_cpu_ids = std::move(big_core_ids);
} else {
use_cpu_ids = std::move(little_core_ids);
}
if (omp_num_threads_hint < 0) {
omp_num_threads_hint = use_cpu_ids.size();
}
SetOpenMPThreadsAndAffinityCPUs(omp_num_threads_hint, use_cpu_ids);
return MACE_SUCCESS;
}
} // namespace mace
......@@ -6,14 +6,22 @@
#ifndef MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H_
#define MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H_
#include <vector>
#include "mace/public/mace.h"
#include "mace/public/mace_runtime.h"
namespace mace {
void SetOmpThreads(int omp_num_threads);
MaceStatus GetCPUBigLittleCoreIDs(std::vector<int> *big_core_ids,
std::vector<int> *little_core_ids);
void SetOpenMPThreadsAndAffinityCPUs(int omp_num_threads,
const std::vector<int> &cpu_ids);
void SetThreadsAffinity(CPUPowerOption power_option);
MaceStatus SetOpenMPThreadsAndAffinityPolicy(int omp_num_threads_hint,
CPUAffinityPolicy policy);
}
} // namespace mace
#endif // MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H_
......@@ -12,19 +12,19 @@
DEFINE_string(filter, "all", "op benchmark regex filter, eg:.*CONV.*");
DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, 1, "num of openmp threads");
DEFINE_int32(cpu_power_option, 1,
"0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE");
DEFINE_int32(omp_num_threads, -1, "num of openmp threads");
DEFINE_int32(cpu_affinity_policy, 1,
"0:AFFINITY_DEFAULT/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY");
int main(int argc, char **argv) {
gflags::SetUsageMessage("some usage message");
gflags::ParseCommandLineFlags(&argc, &argv, true);
// config runtime
mace::ConfigOmpThreads(FLAGS_omp_num_threads);
mace::ConfigCPUPowerOption(
static_cast<mace::CPUPowerOption>(FLAGS_cpu_power_option));
mace::ConfigOpenCLRuntime(
mace::SetOpenMPThreadPolicy(
FLAGS_omp_num_threads,
static_cast<mace::CPUAffinityPolicy >(FLAGS_cpu_affinity_policy));
mace::SetGPUHints(
static_cast<mace::GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<mace::GPUPriorityHint>(FLAGS_gpu_priority_hint));
......
......@@ -126,12 +126,11 @@ DEFINE_string(device, "OPENCL", "CPU/NEON/OPENCL/HEXAGON");
DEFINE_int32(round, 1, "round");
DEFINE_int32(restart_round, 1, "restart round");
DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable");
DEFINE_int32(gpu_perf_hint, 2, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 1, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, 8, "num of openmp threads");
DEFINE_int32(cpu_power_option,
0,
"0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE");
DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, -1, "num of openmp threads");
DEFINE_int32(cpu_affinity_policy, 1,
"0:AFFINITY_DEFAULT/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY");
bool RunModel(const std::vector<std::string> &input_names,
const std::vector<std::vector<int64_t>> &input_shapes,
......@@ -145,11 +144,11 @@ bool RunModel(const std::vector<std::string> &input_names,
DeviceType device_type = ParseDeviceType(FLAGS_device);
// config runtime
mace::ConfigOmpThreads(FLAGS_omp_num_threads);
mace::ConfigCPUPowerOption(
static_cast<CPUPowerOption>(FLAGS_cpu_power_option));
MaceStatus res = mace::SetOpenMPThreadPolicy(
FLAGS_omp_num_threads,
static_cast<CPUAffinityPolicy >(FLAGS_cpu_affinity_policy));
if (device_type == DeviceType::OPENCL) {
mace::ConfigOpenCLRuntime(
mace::SetGPUHints(
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
}
......@@ -160,7 +159,7 @@ bool RunModel(const std::vector<std::string> &input_names,
// Config internal kv storage factory.
std::shared_ptr<KVStorageFactory> storage_factory(
new FileStorageFactory(kernel_file_path));
ConfigKVStorageFactory(storage_factory);
SetKVStorageFactory(storage_factory);
// Init model
mace::MaceEngine engine(&net_def, device_type, input_names,
output_names);
......@@ -249,7 +248,7 @@ int Main(int argc, char **argv) {
LOG(INFO) << "gpu_perf_hint: " << FLAGS_gpu_perf_hint;
LOG(INFO) << "gpu_priority_hint: " << FLAGS_gpu_priority_hint;
LOG(INFO) << "omp_num_threads: " << FLAGS_omp_num_threads;
LOG(INFO) << "cpu_power_option: " << FLAGS_cpu_power_option;
LOG(INFO) << "cpu_affinity_policy: " << FLAGS_cpu_affinity_policy;
std::vector<std::string> input_names = str_util::Split(FLAGS_input_node, ',');
std::vector<std::string> output_names =
......
......@@ -13,6 +13,8 @@
#include <string>
#include <vector>
#include "mace/public/mace.h"
namespace mace {
enum GPUPerfHint {
......@@ -29,7 +31,11 @@ enum GPUPriorityHint {
PRIORITY_HIGH = 3
};
enum CPUPowerOption { DEFAULT = 0, HIGH_PERFORMANCE = 1, BATTERY_SAVE = 2 };
enum CPUAffinityPolicy {
AFFINITY_DEFAULT = 0,
AFFINITY_BIG_ONLY = 1,
AFFINITY_LITTLE_ONLY = 2,
};
class KVStorage {
public:
......@@ -60,12 +66,41 @@ class FileStorageFactory : public KVStorageFactory {
std::unique_ptr<Impl> impl_;
};
void ConfigKVStorageFactory(std::shared_ptr<KVStorageFactory> storage_factory);
// Set KV store factory used as OpenCL cache
void SetKVStorageFactory(std::shared_ptr<KVStorageFactory> storage_factory);
void ConfigOpenCLRuntime(GPUPerfHint, GPUPriorityHint);
void ConfigOmpThreads(int omp_num_threads);
void ConfigCPUPowerOption(CPUPowerOption power_option);
// Set GPU hints, currently only supports Adreno GPU
void SetGPUHints(GPUPerfHint perf_hint, GPUPriorityHint priority_hint);
// Set OpenMP threads number and affinity policy.
//
// num_threads_hint is only a hint, the function can change it when it's larger
// than 0. When num_threads_hint is not positive, the function will set the
// threads number equaling to the number of big + little, big or little cores
// according to the policy.
//
// This function may not work well on some ships (e.g. MTK), and in such
// cases (when it returns error MACE_INVALID_ARGS) you may try to use
// SetOpenMPThreadAffinity to set affinity manually, or just set default policy.
MaceStatus SetOpenMPThreadPolicy(int num_threads_hint,
CPUAffinityPolicy policy);
// Set OpenMP threads number and processor affinity
// This function may not work well on some chips (e.g. MTK). Set thread affinity
// to offline cores may fail or run unexpectedly. In such cases, please use
// SetOpenMPThreadPolicy with default policy instead.
void SetOpenMPThreadAffinity(int num_threads, const std::vector<int> &cpu_ids);
// Get ARM big.LITTLE configuration.
//
// This function may not work well on some chips (e.g. MTK) and miss the
// offline cores, and the user should detect the configurations manually
// in such case(when it returns error MACE_INVALID_ARGS).
//
// If all cpu's frequencies are equal(i.e. all cores are the same),
// big_core_ids and little_core_ids will be set to all cpu ids.
MaceStatus GetBigLittleCoreIDs(std::vector<int> *big_core_ids,
std::vector<int> *little_core_ids);
} // namespace mace
......
......@@ -188,12 +188,11 @@ DEFINE_string(device, "OPENCL", "CPU/NEON/OPENCL/HEXAGON");
DEFINE_int32(round, 1, "round");
DEFINE_int32(restart_round, 1, "restart round");
DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable");
DEFINE_int32(gpu_perf_hint, 0, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 0, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, 4, "num of openmp threads");
DEFINE_int32(cpu_power_option,
0,
"0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE");
DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, -1, "num of openmp threads");
DEFINE_int32(cpu_affinity_policy, 1,
"0:AFFINITY_DEFAULT/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY");
bool RunModel(const std::vector<std::string> &input_names,
const std::vector<std::vector<int64_t>> &input_shapes,
......@@ -212,11 +211,11 @@ bool RunModel(const std::vector<std::string> &input_names,
LOG(INFO) << "Runing with device type: " << device_type;
// config runtime
mace::ConfigOmpThreads(FLAGS_omp_num_threads);
mace::ConfigCPUPowerOption(
static_cast<CPUPowerOption>(FLAGS_cpu_power_option));
mace::SetOpenMPThreadPolicy(
FLAGS_omp_num_threads,
static_cast<CPUAffinityPolicy >(FLAGS_cpu_affinity_policy));
if (device_type == DeviceType::OPENCL) {
mace::ConfigOpenCLRuntime(
mace::SetGPUHints(
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
}
......@@ -230,7 +229,7 @@ bool RunModel(const std::vector<std::string> &input_names,
LOG(INFO) << "Run init";
std::shared_ptr<KVStorageFactory> storage_factory(
new FileStorageFactory(kernel_file_path));
ConfigKVStorageFactory(storage_factory);
SetKVStorageFactory(storage_factory);
mace::MaceEngine engine(&net_def, device_type, input_names, output_names);
if (device_type == DeviceType::OPENCL || device_type == DeviceType::HEXAGON) {
mace::MACE_MODEL_TAG::UnloadModelData(model_data);
......@@ -350,7 +349,7 @@ int Main(int argc, char **argv) {
LOG(INFO) << "gpu_perf_hint: " << FLAGS_gpu_perf_hint;
LOG(INFO) << "gpu_priority_hint: " << FLAGS_gpu_priority_hint;
LOG(INFO) << "omp_num_threads: " << FLAGS_omp_num_threads;
LOG(INFO) << "cpu_power_option: " << FLAGS_cpu_power_option;
LOG(INFO) << "cpu_affinity_policy: " << FLAGS_cpu_affinity_policy;
std::vector<std::string> input_names = str_util::Split(FLAGS_input_node, ',');
std::vector<std::string> output_names =
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册