提交 f617e03d 编写于 作者: 吴承辉

Merge branch 'mace_runtime_doc' into 'master'

Update comments for mace_tuntime.h

See merge request !382
......@@ -219,7 +219,7 @@ DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, -1, "num of openmp threads");
DEFINE_int32(cpu_affinity_policy, 1,
"0:AFFINITY_DEFAULT/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY");
"0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY");
int Main(int argc, char **argv) {
MACE_CHECK(FLAGS_device != "HEXAGON",
......
......@@ -121,10 +121,8 @@ MaceStatus GetCPUBigLittleCoreIDs(std::vector<int> *big_core_ids,
void SetOpenMPThreadsAndAffinityCPUs(int omp_num_threads,
const std::vector<int> &cpu_ids) {
std::ostringstream oss;
for (auto cpu_id : cpu_ids) oss << cpu_id << ' ';
VLOG(1) << "Set CPU openmp num_threads: " << omp_num_threads
<< ", cpu_ids: " << oss.str();
VLOG(1) << "Set OpenMP threads number: " << omp_num_threads
<< ", CPU core IDs: " << MakeString(cpu_ids);
omp_set_num_threads(omp_num_threads);
......@@ -134,7 +132,6 @@ void SetOpenMPThreadsAndAffinityCPUs(int omp_num_threads,
for (auto cpu_id : cpu_ids) {
CPU_SET(cpu_id, &mask);
}
VLOG(3) << "Set cpu affinity with mask: " << mask.__bits[0];
#pragma omp parallel for
for (int i = 0; i < omp_num_threads; ++i) {
......@@ -144,9 +141,10 @@ void SetOpenMPThreadsAndAffinityCPUs(int omp_num_threads,
MaceStatus SetOpenMPThreadsAndAffinityPolicy(int omp_num_threads_hint,
CPUAffinityPolicy policy) {
// There is no need to set affinity in default mode
if (policy == CPUAffinityPolicy::AFFINITY_DEFAULT) {
if (omp_num_threads_hint > 0) omp_set_num_threads(omp_num_threads_hint);
if (policy == CPUAffinityPolicy::AFFINITY_NONE) {
if (omp_num_threads_hint > 0) {
omp_set_num_threads(std::min(omp_num_threads_hint, omp_get_num_procs()));
}
return MACE_SUCCESS;
}
......@@ -164,7 +162,8 @@ MaceStatus SetOpenMPThreadsAndAffinityPolicy(int omp_num_threads_hint,
use_cpu_ids = std::move(little_core_ids);
}
if (omp_num_threads_hint < 0) {
if (omp_num_threads_hint <= 0 ||
omp_num_threads_hint > use_cpu_ids.size()) {
omp_num_threads_hint = use_cpu_ids.size();
}
SetOpenMPThreadsAndAffinityCPUs(omp_num_threads_hint, use_cpu_ids);
......
......@@ -14,7 +14,7 @@ DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, -1, "num of openmp threads");
DEFINE_int32(cpu_affinity_policy, 1,
"0:AFFINITY_DEFAULT/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY");
"0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY");
int main(int argc, char **argv) {
gflags::SetUsageMessage("some usage message");
......
......@@ -130,7 +130,7 @@ DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, -1, "num of openmp threads");
DEFINE_int32(cpu_affinity_policy, 1,
"0:AFFINITY_DEFAULT/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY");
"0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY");
bool RunModel(const std::vector<std::string> &input_names,
const std::vector<std::vector<int64_t>> &input_shapes,
......
......@@ -32,7 +32,7 @@ enum GPUPriorityHint {
};
enum CPUAffinityPolicy {
AFFINITY_DEFAULT = 0,
AFFINITY_NONE = 0,
AFFINITY_BIG_ONLY = 1,
AFFINITY_LITTLE_ONLY = 2,
};
......@@ -66,39 +66,52 @@ class FileStorageFactory : public KVStorageFactory {
std::unique_ptr<Impl> impl_;
};
// Set KV store factory used as OpenCL cache
// Set KV store factory used as OpenCL cache.
void SetKVStorageFactory(std::shared_ptr<KVStorageFactory> storage_factory);
// Set GPU hints, currently only supports Adreno GPU
// Set GPU hints, currently only supports Adreno GPU.
//
// Caution: this function may hurt performance if improper parameters provided.
void SetGPUHints(GPUPerfHint perf_hint, GPUPriorityHint priority_hint);
// Set OpenMP threads number and affinity policy.
//
// num_threads_hint is only a hint, the function can change it when it's larger
// than 0. When num_threads_hint is not positive, the function will set the
// threads number equaling to the number of big + little, big or little cores
// according to the policy.
// Caution: this function may hurt performance if improper parameters provided.
//
// num_threads_hint is only a hint. When num_threads_hint is zero or negative,
// the function will set the threads number equaling to the number of
// big (AFFINITY_BIG_ONLY), little (AFFINITY_LITTLE_ONLY) or all
// (AFFINITY_NONE) cores according to the policy. The threads number will
// also be truncated to the corresponding cores number when num_threads_hint
// is larger than it.
//
// The OpenMP threads will be bind to (via sched_setaffinity) big cores
// (AFFINITY_BIG_ONLY) and little cores (AFFINITY_LITTLE_ONLY).
//
// This function may not work well on some ships (e.g. MTK), and in such
// cases (when it returns error MACE_INVALID_ARGS) you may try to use
// SetOpenMPThreadAffinity to set affinity manually, or just set default policy.
// If successful, it returns MACE_SUCCESS and error if it can't reliabley
// detect big-LITTLE cores (see GetBigLittleCoreIDs). In such cases, it's
// suggested to use AFFINITY_NONE to use all cores.
MaceStatus SetOpenMPThreadPolicy(int num_threads_hint,
CPUAffinityPolicy policy);
// Set OpenMP threads number and processor affinity
// This function may not work well on some chips (e.g. MTK). Set thread affinity
// to offline cores may fail or run unexpectedly. In such cases, please use
// SetOpenMPThreadPolicy with default policy instead.
// Set OpenMP threads number and processor affinity.
//
// Caution: this function may hurt performance if improper parameters provided.
//
// This function may not work well on some chips (e.g. MTK). Setting thread
// affinity to offline cores may run very slow or unexpectedly. In such cases,
// please use SetOpenMPThreadPolicy with default policy instead.
void SetOpenMPThreadAffinity(int num_threads, const std::vector<int> &cpu_ids);
// Get ARM big.LITTLE configuration.
//
// This function may not work well on some chips (e.g. MTK) and miss the
// offline cores, and the user should detect the configurations manually
// in such case(when it returns error MACE_INVALID_ARGS).
// This function will detect the max frequencies of all CPU cores, and assume
// the cores with largest max frequencies as big cores, and all the remaining
// cores as little. If all cpu core's max frequencies equals, big_core_ids and
// little_core_ids will both be filled with all cpu core ids.
//
// If all cpu's frequencies are equal(i.e. all cores are the same),
// big_core_ids and little_core_ids will be set to all cpu ids.
// If successful, it returns MACE_SUCCESS and error if it can't reliabley
// detect the frequency of big-LITTLE cores (e.g. MTK).
MaceStatus GetBigLittleCoreIDs(std::vector<int> *big_core_ids,
std::vector<int> *little_core_ids);
......
......@@ -193,7 +193,7 @@ DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, -1, "num of openmp threads");
DEFINE_int32(cpu_affinity_policy, 1,
"0:AFFINITY_DEFAULT/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY");
"0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY");
bool RunModel(const std::vector<std::string> &input_names,
const std::vector<std::vector<int64_t>> &input_shapes,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册