提交 c9490fd3 编写于 作者: L Liangliang He

Merge branch 'runtime' into 'master'

remove gpu/dsp openmp dependency

See merge request !446
......@@ -42,7 +42,10 @@ cc_library(
"runtime/opencl/*.h",
],
)) + if_hexagon_enabled(glob(["runtime/hexagon/*.h"])),
copts = if_openmp_enabled(["-fopenmp"]) + if_android([
copts = if_openmp_enabled([
"-fopenmp",
"-DMACE_ENABLE_OPENMP",
]) + if_android([
"-DMACE_ENABLE_OPENCL",
]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]),
linkopts = ["-ldl"] + if_android([
......
......@@ -14,7 +14,10 @@
#include "mace/core/runtime/cpu/cpu_runtime.h"
#ifdef MACE_ENABLE_OPENMP
#include <omp.h>
#endif
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/types.h>
......@@ -30,6 +33,24 @@ namespace mace {
namespace {
int GetCPUCount() {
char path[32];
int cpu_count = 0;
int result = 0;
while (true) {
snprintf(path, sizeof(path), "/sys/devices/system/cpu/cpu%d", cpu_count);
result = access(path, F_OK);
if (result != 0) {
if (errno != ENOENT) {
LOG(ERROR) << "Access " << path << " failed, errno: " << errno;
}
return cpu_count;
}
cpu_count++;
}
}
int GetCPUMaxFreq(int cpu_id) {
char path[64];
snprintf(path, sizeof(path),
......@@ -99,7 +120,11 @@ MaceStatus GetCPUBigLittleCoreIDs(std::vector<int> *big_core_ids,
std::vector<int> *little_core_ids) {
MACE_CHECK_NOTNULL(big_core_ids);
MACE_CHECK_NOTNULL(little_core_ids);
#ifdef MACE_ENABLE_OPENMP
int cpu_count = omp_get_num_procs();
#else
int cpu_count = GetCPUCount();
#endif
std::vector<int> cpu_max_freq(cpu_count);
std::vector<int> cpu_ids(cpu_count);
......@@ -141,7 +166,11 @@ void SetOpenMPThreadsAndAffinityCPUs(int omp_num_threads,
VLOG(1) << "Set OpenMP threads number: " << omp_num_threads
<< ", CPU core IDs: " << MakeString(cpu_ids);
#ifdef MACE_ENABLE_OPENMP
omp_set_num_threads(omp_num_threads);
#else
LOG(WARNING) << "OpenMP not enabled. Set OpenMP threads number failed.";
#endif
// compute mask
cpu_set_t mask;
......@@ -150,18 +179,27 @@ void SetOpenMPThreadsAndAffinityCPUs(int omp_num_threads,
CPU_SET(cpu_id, &mask);
}
#ifdef MACE_ENABLE_OPENMP
#pragma omp parallel for
for (int i = 0; i < omp_num_threads; ++i) {
SetThreadAffinity(mask);
}
#else
SetThreadAffinity(mask);
LOG(INFO) << "SetThreadAffinity: " << mask.__bits[0];
#endif
}
MaceStatus SetOpenMPThreadsAndAffinityPolicy(int omp_num_threads_hint,
CPUAffinityPolicy policy) {
if (policy == CPUAffinityPolicy::AFFINITY_NONE) {
#ifdef MACE_ENABLE_OPENMP
if (omp_num_threads_hint > 0) {
omp_set_num_threads(std::min(omp_num_threads_hint, omp_get_num_procs()));
}
#else
LOG(WARNING) << "OpenMP not enabled. Set OpenMP threads number failed.";
#endif
return MACE_SUCCESS;
}
......
......@@ -170,26 +170,33 @@ def tuning_run(runtime,
phone_data_dir,
tuning=False,
limit_opencl_kernel_time=0,
option_args=""):
omp_num_threads=-1,
cpu_affinity_policy=1,
gpu_perf_hint=3,
gpu_priority_hint=3):
stdout = sh_commands.tuning_run(
target_abi,
serialno,
vlog_level,
embed_model_data,
model_output_dir,
input_nodes,
output_nodes,
input_shapes,
output_shapes,
model_name,
device_type,
running_round,
restart_round,
limit_opencl_kernel_time,
tuning,
out_of_range_check,
phone_data_dir,
option_args)
target_abi,
serialno,
vlog_level,
embed_model_data,
model_output_dir,
input_nodes,
output_nodes,
input_shapes,
output_shapes,
model_name,
device_type,
running_round,
restart_round,
limit_opencl_kernel_time,
tuning,
out_of_range_check,
phone_data_dir,
omp_num_threads,
cpu_affinity_policy,
gpu_perf_hint,
gpu_priority_hint
)
if running_round > 0 and FLAGS.collect_report:
model_benchmark_stdout_processor(
......@@ -201,16 +208,19 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi,
model_output_dir, input_nodes, output_nodes,
input_shapes, output_shapes, model_name, device_type,
running_round, restart_round, tuning,
limit_opencl_kernel_time, phone_data_dir):
limit_opencl_kernel_time, phone_data_dir,
enable_openmp):
mace_run_target = "//mace/tools/validation:mace_run"
if runtime == "gpu":
gen_opencl_and_tuning_code(target_abi, serialno, [], False)
sh_commands.bazel_build(
mace_run_target,
abi=target_abi,
model_tag=model_name,
production_mode=False,
hexagon_mode=hexagon_mode)
mace_run_target,
abi=target_abi,
model_tag=model_name,
production_mode=False,
hexagon_mode=hexagon_mode,
enable_openmp=enable_openmp
)
sh_commands.update_mace_run_lib(model_output_dir, target_abi,
model_name, embed_model_data)
......@@ -230,21 +240,25 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi,
gen_opencl_and_tuning_code(target_abi, serialno, [model_output_dir],
True)
sh_commands.bazel_build(
mace_run_target,
abi=target_abi,
model_tag=model_name,
production_mode=True,
hexagon_mode=hexagon_mode)
mace_run_target,
abi=target_abi,
model_tag=model_name,
production_mode=True,
hexagon_mode=hexagon_mode,
enable_openmp=enable_openmp
)
sh_commands.update_mace_run_lib(model_output_dir, target_abi,
model_name, embed_model_data)
else:
gen_opencl_and_tuning_code(target_abi, serialno, [], False)
sh_commands.bazel_build(
mace_run_target,
abi=target_abi,
model_tag=model_name,
production_mode=True,
hexagon_mode=hexagon_mode)
mace_run_target,
abi=target_abi,
model_tag=model_name,
production_mode=True,
hexagon_mode=hexagon_mode,
enable_openmp=enable_openmp
)
sh_commands.update_mace_run_lib(model_output_dir, target_abi,
model_name, embed_model_data)
......@@ -344,6 +358,31 @@ def parse_args():
type="bool",
default="false",
help="Enable out of range check for opencl.")
parser.add_argument(
"--enable_openmp",
type="bool",
default="true",
help="Enable openmp.")
parser.add_argument(
"--omp_num_threads",
type=int,
default=-1,
help="num of openmp threads")
parser.add_argument(
"--cpu_affinity_policy",
type=int,
default=1,
help="0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY")
parser.add_argument(
"--gpu_perf_hint",
type=int,
default=3,
help="0:DEFAULT/1:LOW/2:NORMAL/3:HIGH")
parser.add_argument(
"--gpu_priority_hint",
type=int,
default=3,
help="0:DEFAULT/1:LOW/2:NORMAL/3:HIGH")
parser.add_argument(
"--collect_report",
type="bool",
......@@ -358,8 +397,7 @@ def parse_args():
def process_models(project_name, configs, embed_model_data, vlog_level,
target_abi, phone_data_dir, option_args,
target_soc="", serialno=""):
target_abi, phone_data_dir, target_soc="", serialno=""):
hexagon_mode = get_hexagon_mode(configs)
model_output_dirs = []
for model_name in configs["models"]:
......@@ -450,7 +488,8 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
FLAGS.restart_round,
FLAGS.tuning,
model_config["limit_opencl_kernel_time"],
phone_data_dir)
phone_data_dir,
FLAGS.enable_openmp)
if FLAGS.mode == "run" or FLAGS.mode == "validate" or \
FLAGS.mode == "all":
......@@ -469,7 +508,11 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
FLAGS.round,
FLAGS.restart_round,
FLAGS.out_of_range_check,
phone_data_dir)
phone_data_dir,
omp_num_threads=FLAGS.omp_num_threads,
cpu_affinity_policy=FLAGS.cpu_affinity_policy,
gpu_perf_hint=FLAGS.gpu_perf_hint,
gpu_priority_hint=FLAGS.gpu_priority_hint)
if FLAGS.mode == "benchmark":
gen_opencl_and_tuning_code(
......@@ -487,7 +530,10 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
device_type,
hexagon_mode,
phone_data_dir,
option_args)
FLAGS.omp_num_threads,
FLAGS.cpu_affinity_policy,
FLAGS.gpu_perf_hint,
FLAGS.gpu_priority_hint)
if FLAGS.mode == "validate" or FLAGS.mode == "all":
sh_commands.validate_model(target_abi,
......@@ -573,9 +619,6 @@ def main(unused_args):
sh_commands.gen_mace_version()
sh_commands.gen_encrypted_opencl_source()
option_args = ' '.join(
[arg for arg in unused_args if arg.startswith('--')])
target_socs = get_target_socs(configs)
embed_model_data = configs.get("embed_model_data", 1)
......@@ -597,13 +640,12 @@ def main(unused_args):
props["ro.product.model"]))
process_models(project_name, configs, embed_model_data,
vlog_level, target_abi, phone_data_dir,
option_args, target_soc, serialno)
target_soc, serialno)
else:
print("====================================================")
print("Run on host")
process_models(project_name, configs, embed_model_data,
vlog_level, target_abi, phone_data_dir,
option_args)
vlog_level, target_abi, phone_data_dir)
if FLAGS.mode == "build" or FLAGS.mode == "all":
sh_commands.packaging_lib(FLAGS.output_dir, project_name)
......
......@@ -273,7 +273,8 @@ def bazel_build(target,
production_mode=False,
hexagon_mode=False,
disable_no_tuning_warning=False,
debug=False):
debug=False,
enable_openmp=True):
print("* Build %s with ABI %s" % (target, abi))
stdout_buff = []
process_output = make_output_processor(stdout_buff)
......@@ -292,7 +293,7 @@ def bazel_build(target,
"--copt=-DMACE_MODEL_TAG=%s" % model_tag,
"--copt=-O3",
"--define",
"openmp=true",
"openmp=%s" % str(enable_openmp).lower(),
"--define",
"production=%s" % str(production_mode).lower(),
_out=process_output,
......@@ -320,7 +321,7 @@ def bazel_build(target,
"--define",
"neon=true",
"--define",
"openmp=true",
"openmp=%s" % str(enable_openmp).lower(),
"--define",
"production=%s" % str(production_mode).lower(),
"--define",
......@@ -576,15 +577,21 @@ def tuning_run(abi,
tuning,
out_of_range_check,
phone_data_dir,
option_args="",
omp_num_threads=-1,
cpu_affinity_policy=1,
gpu_perf_hint=3,
gpu_priority_hint=3,
input_file_name="model_input",
output_file_name="model_out"):
print("* Run '%s' with round=%s, restart_round=%s, tuning=%s, "
"out_of_range_check=%s" %
"out_of_range_check=%s, omp_num_threads=%s, cpu_affinity_policy=%s, "
"gpu_perf_hint=%s, gpu_priority_hint=%s" %
(model_tag, running_round, restart_round, str(tuning),
str(out_of_range_check)))
str(out_of_range_check), omp_num_threads, cpu_affinity_policy,
gpu_perf_hint, gpu_priority_hint))
if abi == "host":
p = subprocess.Popen([
p = subprocess.Popen(
[
"env",
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"%s/mace_run" % model_output_dir,
......@@ -598,9 +605,13 @@ def tuning_run(abi,
"--device=%s" % device_type,
"--round=%s" % running_round,
"--restart_round=%s" % restart_round,
"%s" % option_args],
stderr=subprocess.PIPE,
stdout=subprocess.PIPE)
"--omp_num_threads=%s" % omp_num_threads,
"--cpu_affinity_policy=%s" % cpu_affinity_policy,
"--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint,
],
stderr=subprocess.PIPE,
stdout=subprocess.PIPE)
out, err = p.communicate()
stdout = err + out
print stdout
......@@ -627,33 +638,36 @@ def tuning_run(abi,
stdout_buff = []
process_output = make_output_processor(stdout_buff)
p = sh.adb(
"-s",
serialno,
"shell",
"LD_LIBRARY_PATH=%s" % phone_data_dir,
"MACE_TUNING=%s" % int(tuning),
"MACE_OUT_OF_RANGE_CHECK=%s" % int(out_of_range_check),
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"MACE_RUN_PARAMETER_PATH=%s/mace_run.config" %
phone_data_dir,
"MACE_CL_PROGRAM_PATH=%s/cl_program" % phone_data_dir,
"MACE_LIMIT_OPENCL_KERNEL_TIME=%s" %
limit_opencl_kernel_time,
"%s/mace_run" % phone_data_dir,
"--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes),
"--output_shape=%s" % ":".join(output_shapes),
"--input_file=%s/%s" % (phone_data_dir, input_file_name),
"--output_file=%s/%s" % (phone_data_dir, output_file_name),
"--model_data_file=%s/%s.data" % (phone_data_dir, model_tag),
"--device=%s" % device_type,
"--round=%s" % running_round,
"--restart_round=%s" % restart_round,
"%s" % option_args,
_out=process_output,
_bg=True,
_err_to_out=True)
"-s",
serialno,
"shell",
"LD_LIBRARY_PATH=%s" % phone_data_dir,
"MACE_TUNING=%s" % int(tuning),
"MACE_OUT_OF_RANGE_CHECK=%s" % int(out_of_range_check),
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"MACE_RUN_PARAMETER_PATH=%s/mace_run.config" %
phone_data_dir,
"MACE_CL_PROGRAM_PATH=%s/cl_program" % phone_data_dir,
"MACE_LIMIT_OPENCL_KERNEL_TIME=%s" %
limit_opencl_kernel_time,
"%s/mace_run" % phone_data_dir,
"--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes),
"--output_shape=%s" % ":".join(output_shapes),
"--input_file=%s/%s" % (phone_data_dir, input_file_name),
"--output_file=%s/%s" % (phone_data_dir, output_file_name),
"--model_data_file=%s/%s.data" % (phone_data_dir, model_tag),
"--device=%s" % device_type,
"--round=%s" % running_round,
"--restart_round=%s" % restart_round,
"--omp_num_threads=%s" % omp_num_threads,
"--cpu_affinity_policy=%s" % cpu_affinity_policy,
"--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint,
_out=process_output,
_bg=True,
_err_to_out=True)
p.wait()
print("Running finished!\n")
return "".join(stdout_buff)
......@@ -900,7 +914,10 @@ def benchmark_model(abi,
device_type,
hexagon_mode,
phone_data_dir,
option_args="",
omp_num_threads=-1,
cpu_affinity_policy=1,
gpu_perf_hint=3,
gpu_priority_hint=3,
input_file_name="model_input",
output_file_name="model_out"):
print("* Benchmark for %s" % model_tag)
......@@ -924,7 +941,8 @@ def benchmark_model(abi,
stdout_buff = []
process_output = make_output_processor(stdout_buff)
if abi == "host":
p = subprocess.Popen([
p = subprocess.Popen(
[
"env",
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"%s/benchmark_model" % model_output_dir,
......@@ -935,7 +953,11 @@ def benchmark_model(abi,
"--input_file=%s/%s" % (model_output_dir, input_file_name),
"--model_data_file=%s/%s.data" % (model_output_dir, model_tag),
"--device=%s" % device_type,
"%s" % option_args])
"--omp_num_threads=%s" % omp_num_threads,
"--cpu_affinity_policy=%s" % cpu_affinity_policy,
"--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint,
])
p.wait()
else:
sh.adb("-s", serialno, "shell", "mkdir", "-p", phone_data_dir)
......@@ -951,26 +973,29 @@ def benchmark_model(abi,
adb_push("%s/%s.data" % (model_output_dir, model_tag),
phone_data_dir, serialno)
p = sh.adb(
"-s",
serialno,
"shell",
"LD_LIBRARY_PATH=%s" % phone_data_dir,
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"MACE_RUN_PARAMETER_PATH=%s/mace_run.config" %
phone_data_dir,
"MACE_OPENCL_PROFILING=1",
"%s/benchmark_model" % phone_data_dir,
"--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes),
"--output_shape=%s" % ":".join(output_shapes),
"--input_file=%s/%s" % (phone_data_dir, input_file_name),
"--model_data_file=%s/%s.data" % (phone_data_dir, model_tag),
"--device=%s" % device_type,
"%s" % option_args,
_out=process_output,
_bg=True,
_err_to_out=True)
"-s",
serialno,
"shell",
"LD_LIBRARY_PATH=%s" % phone_data_dir,
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"MACE_RUN_PARAMETER_PATH=%s/mace_run.config" %
phone_data_dir,
"MACE_OPENCL_PROFILING=1",
"%s/benchmark_model" % phone_data_dir,
"--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes),
"--output_shape=%s" % ":".join(output_shapes),
"--input_file=%s/%s" % (phone_data_dir, input_file_name),
"--model_data_file=%s/%s.data" % (phone_data_dir, model_tag),
"--device=%s" % device_type,
"--omp_num_threads=%s" % omp_num_threads,
"--cpu_affinity_policy=%s" % cpu_affinity_policy,
"--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint,
_out=process_output,
_bg=True,
_err_to_out=True)
p.wait()
print("Benchmark done!\n")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册