提交 ab4048ed 编写于 作者: W wuchenghui

make CPU&GPU Runtime configurable

上级 eb442e35
...@@ -7,3 +7,4 @@ mace/codegen/models/ ...@@ -7,3 +7,4 @@ mace/codegen/models/
mace/codegen/opencl/ mace/codegen/opencl/
mace/codegen/opencl_bin/ mace/codegen/opencl_bin/
mace/codegen/version/ mace/codegen/version/
build/
# Benchmark # Benchmark
# Examples # Examples
load( load(
"//:mace.bzl", "//mace:mace.bzl",
"if_production_mode", "if_production_mode",
"if_not_production_mode", "if_not_production_mode",
"if_hexagon_enabled", "if_hexagon_enabled",
...@@ -16,7 +16,7 @@ cc_library( ...@@ -16,7 +16,7 @@ cc_library(
hdrs = ["stat_summarizer.h"], hdrs = ["stat_summarizer.h"],
linkstatic = 1, linkstatic = 1,
deps = [ deps = [
"@mace//:mace_headers", "//mace/core",
], ],
) )
...@@ -29,17 +29,9 @@ cc_binary( ...@@ -29,17 +29,9 @@ cc_binary(
linkstatic = 1, linkstatic = 1,
deps = [ deps = [
":stat_summarizer", ":stat_summarizer",
"//mace/codegen:generated_models",
"//external:gflags_nothreads", "//external:gflags_nothreads",
] + if_hexagon_enabled([ "//mace/codegen:generated_models",
"//lib/hexagon:hexagon", ],
]) + if_production_mode([
"@mace//:mace_prod",
"//codegen:generated_opencl_prod",
"//codegen:generated_tuning_params",
]) + if_not_production_mode([
"@mace//:mace_dev",
]),
) )
cc_library( cc_library(
...@@ -58,9 +50,6 @@ cc_binary( ...@@ -58,9 +50,6 @@ cc_binary(
deps = [ deps = [
":libmace_merged", ":libmace_merged",
"//external:gflags_nothreads", "//external:gflags_nothreads",
"//lib/hexagon", "//mace/core",
"@mace//:mace",
"@mace//:mace_headers",
"@mace//:mace_prod",
], ],
) )
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
#include "benchmark/stat_summarizer.h" #include "mace/benchmark/stat_summarizer.h"
#include <cstdlib> #include <cstdlib>
#include <fstream> #include <fstream>
...@@ -204,6 +204,11 @@ DEFINE_bool(show_summary, true, "whether to show a summary of the stats"); ...@@ -204,6 +204,11 @@ DEFINE_bool(show_summary, true, "whether to show a summary of the stats");
DEFINE_bool(show_flops, true, "whether to estimate the model's FLOPs"); DEFINE_bool(show_flops, true, "whether to estimate the model's FLOPs");
DEFINE_int32(warmup_runs, 1, "how many runs to initialize model"); DEFINE_int32(warmup_runs, 1, "how many runs to initialize model");
DEFINE_string(model_data_file, "", "model data file name, used when EMBED_MODEL_DATA set to 0"); DEFINE_string(model_data_file, "", "model data file name, used when EMBED_MODEL_DATA set to 0");
DEFINE_string(gpu_type, "ADRENO", "ADRENO/MALI");
DEFINE_int32(gpu_perf_hint, 2, "0:NA/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 1, "0:NA/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, 8, "num of openmp threads");
DEFINE_int32(cpu_power_option, 0, "0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE");
int Main(int argc, char **argv) { int Main(int argc, char **argv) {
MACE_CHECK(FLAGS_device != "HEXAGON", "Model benchmark tool do not support DSP."); MACE_CHECK(FLAGS_device != "HEXAGON", "Model benchmark tool do not support DSP.");
...@@ -212,6 +217,11 @@ int Main(int argc, char **argv) { ...@@ -212,6 +217,11 @@ int Main(int argc, char **argv) {
LOG(INFO) << "Benchmark name: [" << FLAGS_benchmark_name << "]"; LOG(INFO) << "Benchmark name: [" << FLAGS_benchmark_name << "]";
LOG(INFO) << "Device: [" << FLAGS_device << "]"; LOG(INFO) << "Device: [" << FLAGS_device << "]";
LOG(INFO) << "gpu_type: [" << FLAGS_gpu_type << "]";
LOG(INFO) << "gpu_perf_hint: [" << FLAGS_gpu_perf_hint << "]";
LOG(INFO) << "gpu_priority_hint: [" << FLAGS_gpu_priority_hint << "]";
LOG(INFO) << "omp_num_threads: [" << FLAGS_omp_num_threads << "]";
LOG(INFO) << "cpu_power_option: [" << FLAGS_cpu_power_option << "]";
LOG(INFO) << "Input node: [" << FLAGS_input_node<< "]"; LOG(INFO) << "Input node: [" << FLAGS_input_node<< "]";
LOG(INFO) << "Input shapes: [" << FLAGS_input_shape << "]"; LOG(INFO) << "Input shapes: [" << FLAGS_input_shape << "]";
LOG(INFO) << "Output node: [" << FLAGS_output_node<< "]"; LOG(INFO) << "Output node: [" << FLAGS_output_node<< "]";
...@@ -246,6 +256,21 @@ int Main(int argc, char **argv) { ...@@ -246,6 +256,21 @@ int Main(int argc, char **argv) {
device_type = OPENCL; device_type = OPENCL;
} }
// config runtime
if (device_type == OPENCL) {
GPUType gpu_type = ADRENO;
if (FLAGS_gpu_type == "MALI") gpu_type = MALI;
mace::ConfigOpenCLRuntime(
gpu_type,
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
}
else if (device_type == CPU) {
mace::ConfigCPURuntime(
FLAGS_omp_num_threads,
static_cast<CPUPowerOption>(FLAGS_cpu_power_option));
}
std::vector<std::string> input_names = str_util::Split(FLAGS_input_node, ','); std::vector<std::string> input_names = str_util::Split(FLAGS_input_node, ',');
std::vector<std::string> output_names = str_util::Split(FLAGS_output_node, ','); std::vector<std::string> output_names = str_util::Split(FLAGS_output_node, ',');
std::vector<std::string> input_shapes = str_util::Split(FLAGS_input_shape, ':'); std::vector<std::string> input_shapes = str_util::Split(FLAGS_input_shape, ':');
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Copyright (c) 2017 XiaoMi All rights reserved. // Copyright (c) 2017 XiaoMi All rights reserved.
// //
#include "benchmark/stat_summarizer.h" #include "mace/benchmark/stat_summarizer.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
......
...@@ -14,6 +14,7 @@ load( ...@@ -14,6 +14,7 @@ load(
"if_not_hexagon_enabled", "if_not_hexagon_enabled",
"if_production_mode", "if_production_mode",
"if_not_production_mode", "if_not_production_mode",
"if_openmp_enabled",
) )
cc_library( cc_library(
...@@ -21,6 +22,7 @@ cc_library( ...@@ -21,6 +22,7 @@ cc_library(
srcs = glob( srcs = glob(
[ [
"*.cc", "*.cc",
"runtime/cpu/*.cc",
"runtime/opencl/*.cc", "runtime/opencl/*.cc",
"runtime/hexagon/*.cc", "runtime/hexagon/*.cc",
], ],
...@@ -37,9 +39,11 @@ cc_library( ...@@ -37,9 +39,11 @@ cc_library(
]), ]),
hdrs = glob([ hdrs = glob([
"*.h", "*.h",
"runtime/cpu/*.h",
"runtime/opencl/*.h", "runtime/opencl/*.h",
"runtime/hexagon/*.h", "runtime/hexagon/*.h",
]), ]),
copts = if_openmp_enabled(["-fopenmp"]),
linkopts = ["-ldl"] + if_android([ linkopts = ["-ldl"] + if_android([
"-pie", "-pie",
"-lm", "-lm",
......
...@@ -5,6 +5,8 @@ ...@@ -5,6 +5,8 @@
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/core/net.h" #include "mace/core/net.h"
#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h" #include "mace/core/runtime/hexagon/hexagon_control_wrapper.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/runtime/cpu/cpu_runtime.h"
#include "mace/core/types.h" #include "mace/core/types.h"
namespace mace { namespace mace {
...@@ -349,6 +351,21 @@ const OperatorDef &NetDef::op(const int idx) const { ...@@ -349,6 +351,21 @@ const OperatorDef &NetDef::op(const int idx) const {
return op_[idx]; return op_[idx];
} }
void ConfigOpenCLRuntime(GPUType gpu_type,
GPUPerfHint gpu_perf_hint,
GPUPriorityHint gpu_priority_hint) {
LOG(INFO) << "Config OpenCL Runtime: gpu_type: " << gpu_type
<< ", gpu_perf_hint: " << gpu_perf_hint
<< ", gpu_priority_hint: " << gpu_priority_hint;
OpenCLRuntime::CreateGlobal(gpu_type, gpu_perf_hint, gpu_priority_hint);
}
void ConfigCPURuntime(int omp_num_threads, CPUPowerOption power_option) {
LOG(INFO) << "Config CPU Runtime: omp_num_threads: " << omp_num_threads
<< ", cpu_power_option: " << power_option;
SetCPURuntime(omp_num_threads, power_option);
}
// Mace Engine // Mace Engine
MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type) MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type)
: op_registry_(new OperatorRegistry()), : op_registry_(new OperatorRegistry()),
......
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/public/mace.h"
#include "mace/utils/logging.h"
#include <omp.h>
#include <sys/syscall.h>
#include <unistd.h>
namespace mace {
namespace {
static int GetCPUMaxFreq(int cpu_id) {
char path[64];
sprintf(path,
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq",
cpu_id);
FILE *fp = fopen(path, "rb");
if (!fp) return 0;
int freq = 0;
fscanf(fp, "%d", &freq);
fclose(fp);
return freq;
}
static void SortCPUIdsByMaxFreqAsc(std::vector<int> &cpu_ids) {
int cpu_count = cpu_ids.size();
std::vector<int> cpu_max_freq;
cpu_max_freq.resize(cpu_count);
// set cpu max frequency
for (int i = 0; i < cpu_count; ++i) {
cpu_max_freq[i] = GetCPUMaxFreq(i);
cpu_ids[i] = i;
}
// sort cpu ids by max frequency asc, bubble sort
for (int i = 0; i < cpu_count - 1; ++i) {
for (int j = i + 1; j < cpu_count; ++j) {
if (cpu_max_freq[i] > cpu_max_freq[j]) {
int tmp = cpu_ids[i];
cpu_ids[i] = cpu_ids[j];
cpu_ids[j] = tmp;
tmp = cpu_max_freq[i];
cpu_max_freq[i] = cpu_max_freq[j];
cpu_max_freq[j] = tmp;
}
}
}
}
static void SetThreadAffinity(cpu_set_t mask) {
int sys_call_res;
pid_t pid = gettid();
// TODO: when set omp num threads to 1, sometiomes return EINVAL(22) error
// https://linux.die.net/man/2/sched_setaffinity
sys_call_res = syscall(__NR_sched_setaffinity, pid, sizeof(mask), &mask);
if (sys_call_res != 0) {
LOG(FATAL) << "syscall setaffinity error: " << sys_call_res << ' ' << errno;
}
}
} // namespace
void SetCPURuntime(int omp_num_threads, CPUPowerOption power_option) {
int cpu_count = omp_get_num_procs();
LOG(INFO) << "cpu_count: " << cpu_count;
std::vector<int> sorted_cpu_ids;
sorted_cpu_ids.resize(cpu_count);
SortCPUIdsByMaxFreqAsc(sorted_cpu_ids);
std::vector<int> use_cpu_ids;
if (power_option == CPUPowerOption::DEFAULT || omp_num_threads >= cpu_count) {
use_cpu_ids = sorted_cpu_ids;
omp_num_threads = cpu_count;
} else if (power_option == CPUPowerOption::HIGH_PERFORMANCE) {
use_cpu_ids =
std::vector<int>(sorted_cpu_ids.begin() + cpu_count - omp_num_threads,
sorted_cpu_ids.end());
} else {
use_cpu_ids = std::vector<int>(sorted_cpu_ids.begin(),
sorted_cpu_ids.begin() + omp_num_threads);
}
omp_set_num_threads(omp_num_threads);
// compute mask
cpu_set_t mask;
CPU_ZERO(&mask);
for (auto cpu_id: use_cpu_ids) {
CPU_SET(cpu_id, &mask);
}
LOG(INFO) << "use cpus mask: " << mask.__bits[0];
#pragma omp parallel for
for (int i = 0; i < omp_num_threads; ++i) {
SetThreadAffinity(mask);
}
}
} // namespace mace
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H
#define MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H
#include "mace/public/mace.h"
namespace mace {
void SetCPURuntime(int omp_num_threads, CPUPowerOption power_option);
}
#endif //MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H
...@@ -63,11 +63,67 @@ void OpenCLProfilingTimer::ClearTiming() { ...@@ -63,11 +63,67 @@ void OpenCLProfilingTimer::ClearTiming() {
} }
OpenCLRuntime *OpenCLRuntime::Global() { OpenCLRuntime *OpenCLRuntime::Global() {
static OpenCLRuntime instance; if (opencl_runtime_instance == nullptr) {
return &instance; return CreateGlobal(GPUType::ADRENO, GPUPerfHint::PERF_NORMAL,
GPUPriorityHint::PRIORITY_LOW);
}
return opencl_runtime_instance;
}
OpenCLRuntime *OpenCLRuntime::CreateGlobal(GPUType gpu_type,
GPUPerfHint gpu_perf_hint,
GPUPriorityHint gpu_priority_hint) {
opencl_runtime_instance = new OpenCLRuntime(gpu_type, gpu_perf_hint,
gpu_priority_hint);
return opencl_runtime_instance;
}
void ParseOpenCLRuntimeConfig(cl_context_properties *properties,
GPUType gpu_type,
GPUPerfHint gpu_perf_hint,
GPUPriorityHint gpu_priority_hint) {
int index = 0;
if (gpu_type == GPUType::ADRENO) {
switch (gpu_perf_hint) {
case GPUPerfHint::PERF_LOW:
properties[index++] = CL_CONTEXT_PERF_HINT_QCOM;
properties[index++] = CL_PERF_HINT_LOW_QCOM;
break;
case GPUPerfHint::PERF_NORMAL:
properties[index++] = CL_CONTEXT_PERF_HINT_QCOM;
properties[index++] = CL_PERF_HINT_NORMAL_QCOM;
break;
case GPUPerfHint::PERF_HIGH:
properties[index++] = CL_CONTEXT_PERF_HINT_QCOM;
properties[index++] = CL_PERF_HINT_HIGH_QCOM;
break;
default:break;
}
switch (gpu_priority_hint) {
case GPUPriorityHint::PRIORITY_LOW:
properties[index++] = CL_CONTEXT_PRIORITY_HINT_QCOM;
properties[index++] = CL_PRIORITY_HINT_LOW_QCOM;
break;
case GPUPriorityHint::PRIORITY_NORMAL:
properties[index++] = CL_CONTEXT_PRIORITY_HINT_QCOM;
properties[index++] = CL_PRIORITY_HINT_NORMAL_QCOM;
break;
case GPUPriorityHint::PRIORITY_HIGH:
properties[index++] = CL_CONTEXT_PRIORITY_HINT_QCOM;
properties[index++] = CL_PRIORITY_HINT_HIGH_QCOM;
break;
default:break;
}
} else {
// TODO: support Mali GPU context properties
}
// The properties list should be terminated with 0
properties[index] = 0;
} }
OpenCLRuntime::OpenCLRuntime() { OpenCLRuntime::OpenCLRuntime(GPUType gpu_type, GPUPerfHint gpu_perf_hint,
GPUPriorityHint gpu_priority_hint) {
LoadOpenCLLibrary(); LoadOpenCLLibrary();
std::vector<cl::Platform> all_platforms; std::vector<cl::Platform> all_platforms;
...@@ -109,15 +165,12 @@ OpenCLRuntime::OpenCLRuntime() { ...@@ -109,15 +165,12 @@ OpenCLRuntime::OpenCLRuntime() {
properties |= CL_QUEUE_PROFILING_ENABLE; properties |= CL_QUEUE_PROFILING_ENABLE;
} }
// TODO (heliangliang) Make this configurable (e.g.HIGH for benchmark, std::unique_ptr<cl_context_properties[]> context_properties(
// disabled for Mali) new cl_context_properties[5]);
cl_context_properties context_properties[] = { ParseOpenCLRuntimeConfig(context_properties.get(), gpu_type, gpu_perf_hint,
// Set context perf hint to normal gpu_priority_hint);
CL_CONTEXT_PERF_HINT_QCOM, CL_PERF_HINT_NORMAL_QCOM,
// Set context priority hint to low
CL_CONTEXT_PRIORITY_HINT_QCOM, CL_PRIORITY_HINT_LOW_QCOM, 0};
cl::Context context({gpu_device}, context_properties); cl::Context context({gpu_device}, context_properties.get());
cl::CommandQueue command_queue(context, gpu_device, properties); cl::CommandQueue command_queue(context, gpu_device, properties);
const char *kernel_path = getenv("MACE_KERNEL_PATH"); const char *kernel_path = getenv("MACE_KERNEL_PATH");
......
...@@ -20,7 +20,7 @@ namespace mace { ...@@ -20,7 +20,7 @@ namespace mace {
class OpenCLProfilingTimer : public Timer { class OpenCLProfilingTimer : public Timer {
public: public:
explicit OpenCLProfilingTimer(const cl::Event *event) explicit OpenCLProfilingTimer(const cl::Event *event)
: event_(event), accumulated_micros_(0){}; : event_(event), accumulated_micros_(0) {};
void StartTiming() override; void StartTiming() override;
void StopTiming() override; void StopTiming() override;
void AccumulateTiming() override; void AccumulateTiming() override;
...@@ -38,6 +38,7 @@ class OpenCLProfilingTimer : public Timer { ...@@ -38,6 +38,7 @@ class OpenCLProfilingTimer : public Timer {
class OpenCLRuntime { class OpenCLRuntime {
public: public:
static OpenCLRuntime *Global(); static OpenCLRuntime *Global();
static OpenCLRuntime *CreateGlobal(GPUType, GPUPerfHint, GPUPriorityHint);
cl::Context &context(); cl::Context &context();
cl::Device &device(); cl::Device &device();
...@@ -51,7 +52,7 @@ class OpenCLRuntime { ...@@ -51,7 +52,7 @@ class OpenCLRuntime {
const std::set<std::string> &build_options); const std::set<std::string> &build_options);
private: private:
OpenCLRuntime(); OpenCLRuntime(GPUType, GPUPerfHint, GPUPriorityHint);
~OpenCLRuntime(); ~OpenCLRuntime();
OpenCLRuntime(const OpenCLRuntime &) = delete; OpenCLRuntime(const OpenCLRuntime &) = delete;
OpenCLRuntime &operator=(const OpenCLRuntime &) = delete; OpenCLRuntime &operator=(const OpenCLRuntime &) = delete;
...@@ -73,6 +74,7 @@ class OpenCLRuntime { ...@@ -73,6 +74,7 @@ class OpenCLRuntime {
std::string kernel_path_; std::string kernel_path_;
}; };
static OpenCLRuntime *opencl_runtime_instance = nullptr;
} // namespace mace } // namespace mace
#endif // MACE_CORE_RUNTIME_OPENCL_OPENCL_RUNTIME_H_ #endif // MACE_CORE_RUNTIME_OPENCL_OPENCL_RUNTIME_H_
...@@ -5,10 +5,15 @@ ...@@ -5,10 +5,15 @@
#include <iostream> #include <iostream>
#include "mace/core/testing/test_benchmark.h" #include "mace/core/testing/test_benchmark.h"
#include "mace/public/mace.h"
int main(int argc, char **argv) { int main(int argc, char **argv) {
std::cout << "Running main() from test_main.cc\n"; std::cout << "Running main() from test_main.cc\n";
mace::ConfigCPURuntime(4, mace::CPUPowerOption::HIGH_PERFORMANCE);
mace::ConfigOpenCLRuntime(mace::GPUType::ADRENO, mace::GPUPerfHint::PERF_HIGH,
mace::GPUPriorityHint::PRIORITY_HIGH);
// TODO Use gflags // TODO Use gflags
if (argc == 2) { if (argc == 2) {
mace::testing::Benchmark::Run(argv[1]); mace::testing::Benchmark::Run(argv[1]);
......
...@@ -101,6 +101,16 @@ DeviceType ParseDeviceType(const string &device_str) { ...@@ -101,6 +101,16 @@ DeviceType ParseDeviceType(const string &device_str) {
} }
} }
GPUType ParseGPUType(const string &gpu_type_str) {
if (gpu_type_str.compare("ADRENO") == 0) {
return GPUType::ADRENO;
} else if (gpu_type_str.compare("MALI") == 0) {
return GPUType::MALI;
} else {
return GPUType::ADRENO;
}
}
struct mallinfo LogMallinfoChange(struct mallinfo prev) { struct mallinfo LogMallinfoChange(struct mallinfo prev) {
struct mallinfo curr = mallinfo(); struct mallinfo curr = mallinfo();
if (prev.arena != curr.arena) { if (prev.arena != curr.arena) {
...@@ -160,6 +170,11 @@ DEFINE_string(device, "OPENCL", "CPU/NEON/OPENCL/HEXAGON"); ...@@ -160,6 +170,11 @@ DEFINE_string(device, "OPENCL", "CPU/NEON/OPENCL/HEXAGON");
DEFINE_int32(round, 1, "round"); DEFINE_int32(round, 1, "round");
DEFINE_int32(restart_round, 1, "restart round"); DEFINE_int32(restart_round, 1, "restart round");
DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable"); DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable");
DEFINE_string(gpu_type, "ADRENO", "ADRENO/MALI");
DEFINE_int32(gpu_perf_hint, 2, "0:NA/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 1, "0:NA/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, 8, "num of openmp threads");
DEFINE_int32(cpu_power_option, 0, "0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE");
bool SingleInputAndOutput(const std::vector<int64_t> &input_shape, bool SingleInputAndOutput(const std::vector<int64_t> &input_shape,
const std::vector<int64_t> &output_shape) { const std::vector<int64_t> &output_shape) {
...@@ -175,6 +190,20 @@ bool SingleInputAndOutput(const std::vector<int64_t> &input_shape, ...@@ -175,6 +190,20 @@ bool SingleInputAndOutput(const std::vector<int64_t> &input_shape,
DeviceType device_type = ParseDeviceType(FLAGS_device); DeviceType device_type = ParseDeviceType(FLAGS_device);
LOG(INFO) << "Runing with device type: " << device_type; LOG(INFO) << "Runing with device type: " << device_type;
// config runtime
if (device_type == DeviceType::OPENCL) {
GPUType gpu_type = ParseGPUType(FLAGS_gpu_type);
mace::ConfigOpenCLRuntime(
gpu_type,
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
}
else if (device_type == DeviceType::CPU) {
mace::ConfigCPURuntime(
FLAGS_omp_num_threads,
static_cast<CPUPowerOption>(FLAGS_cpu_power_option));
}
// Init model // Init model
LOG(INFO) << "Run init"; LOG(INFO) << "Run init";
t0 = NowMicros(); t0 = NowMicros();
...@@ -266,6 +295,20 @@ bool MultipleInputOrOutput(const std::vector<std::string> &input_names, ...@@ -266,6 +295,20 @@ bool MultipleInputOrOutput(const std::vector<std::string> &input_names,
DeviceType device_type = ParseDeviceType(FLAGS_device); DeviceType device_type = ParseDeviceType(FLAGS_device);
LOG(INFO) << "Runing with device type: " << device_type; LOG(INFO) << "Runing with device type: " << device_type;
// config runtime
if (device_type == DeviceType::OPENCL) {
GPUType gpu_type = ParseGPUType(FLAGS_gpu_type);
mace::ConfigOpenCLRuntime(
gpu_type,
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
}
else if (device_type == DeviceType::CPU) {
mace::ConfigCPURuntime(
FLAGS_omp_num_threads,
static_cast<CPUPowerOption>(FLAGS_cpu_power_option));
}
// Init model // Init model
LOG(INFO) << "Run init"; LOG(INFO) << "Run init";
t0 = NowMicros(); t0 = NowMicros();
...@@ -367,8 +410,13 @@ int main(int argc, char **argv) { ...@@ -367,8 +410,13 @@ int main(int argc, char **argv) {
LOG(INFO) << "output_file: " << FLAGS_output_file; LOG(INFO) << "output_file: " << FLAGS_output_file;
LOG(INFO) << "model_data_file: " << FLAGS_model_data_file; LOG(INFO) << "model_data_file: " << FLAGS_model_data_file;
LOG(INFO) << "device: " << FLAGS_device; LOG(INFO) << "device: " << FLAGS_device;
LOG(INFO) << "round: " << FLAGS_restart_round; LOG(INFO) << "round: " << FLAGS_round;
LOG(INFO) << "restart_round: " << FLAGS_round; LOG(INFO) << "restart_round: " << FLAGS_restart_round;
LOG(INFO) << "gpu_type: " << FLAGS_gpu_type;
LOG(INFO) << "gpu_perf_hint: " << FLAGS_gpu_perf_hint;
LOG(INFO) << "gpu_priority_hint: " << FLAGS_gpu_priority_hint;
LOG(INFO) << "omp_num_threads: " << FLAGS_omp_num_threads;
LOG(INFO) << "cpu_power_option: " << FLAGS_cpu_power_option;
std::vector<std::string> input_names = str_util::Split(FLAGS_input_node, ','); std::vector<std::string> input_names = str_util::Split(FLAGS_input_node, ',');
std::vector<std::string> output_names = str_util::Split(FLAGS_output_node, ','); std::vector<std::string> output_names = str_util::Split(FLAGS_output_node, ',');
......
...@@ -61,6 +61,17 @@ enum DataType { ...@@ -61,6 +61,17 @@ enum DataType {
DT_UINT32 = 22 DT_UINT32 = 22
}; };
enum GPUType { ADRENO = 0, MALI = 1 };
enum GPUPerfHint { PERF_NA = 0, PERF_LOW = 1, PERF_NORMAL = 2, PERF_HIGH = 3 };
enum GPUPriorityHint {
PRIORITY_NA = 0,
PRIORITY_LOW = 1,
PRIORITY_NORMAL = 2,
PRIORITY_HIGH = 3
};
enum CPUPowerOption { DEFAULT = 0, HIGH_PERFORMANCE = 1, BATTERY_SAVE = 2};
class ConstTensor { class ConstTensor {
public: public:
ConstTensor(const std::string &name, ConstTensor(const std::string &name,
...@@ -369,6 +380,9 @@ struct MaceInputInfo { ...@@ -369,6 +380,9 @@ struct MaceInputInfo {
const float *data; const float *data;
}; };
void ConfigOpenCLRuntime(GPUType, GPUPerfHint, GPUPriorityHint);
void ConfigCPURuntime(int omp_num_threads, CPUPowerOption power_option);
class MaceEngine { class MaceEngine {
public: public:
// Single input and output // Single input and output
......
...@@ -13,6 +13,9 @@ CURRENT_DIR=`dirname $0` ...@@ -13,6 +13,9 @@ CURRENT_DIR=`dirname $0`
source ${CURRENT_DIR}/env.sh source ${CURRENT_DIR}/env.sh
MODEL_OUTPUT_DIR=$1 MODEL_OUTPUT_DIR=$1
OPTION_ARGS=$2
echo $OPTION_ARGS
if [ -f "$MODEL_OUTPUT_DIR/benchmark_model" ]; then if [ -f "$MODEL_OUTPUT_DIR/benchmark_model" ]; then
rm -rf $MODEL_OUTPUT_DIR/benchmark_model rm -rf $MODEL_OUTPUT_DIR/benchmark_model
...@@ -23,7 +26,7 @@ if [ "$EMBED_MODEL_DATA" = 0 ]; then ...@@ -23,7 +26,7 @@ if [ "$EMBED_MODEL_DATA" = 0 ]; then
fi fi
if [ x"$TARGET_ABI" == x"host" ]; then if [ x"$TARGET_ABI" == x"host" ]; then
bazel build --verbose_failures -c opt --strip always benchmark:benchmark_model \ bazel build --verbose_failures -c opt --strip always //mace/benchmark:benchmark_model \
--copt="-std=c++11" \ --copt="-std=c++11" \
--copt="-D_GLIBCXX_USE_C99_MATH_TR1" \ --copt="-D_GLIBCXX_USE_C99_MATH_TR1" \
--copt="-Werror=return-type" \ --copt="-Werror=return-type" \
...@@ -42,10 +45,11 @@ if [ x"$TARGET_ABI" == x"host" ]; then ...@@ -42,10 +45,11 @@ if [ x"$TARGET_ABI" == x"host" ]; then
--input_shape="${INPUT_SHAPES}"\ --input_shape="${INPUT_SHAPES}"\
--output_node="${OUTPUT_NODES}" \ --output_node="${OUTPUT_NODES}" \
--output_shape="${OUTPUT_SHAPES}"\ --output_shape="${OUTPUT_SHAPES}"\
--input_file=${MODEL_OUTPUT_DIR}/${INPUT_FILE_NAME} || exit 1 --input_file=${MODEL_OUTPUT_DIR}/${INPUT_FILE_NAME}_${INPUT_NODES} \
$OPTION_ARGS || exit 1
else else
bazel build --verbose_failures -c opt --strip always benchmark:benchmark_model \ bazel build --verbose_failures -c opt --strip always //mace/benchmark:benchmark_model \
--crosstool_top=//external:android/crosstool \ --crosstool_top=//external:android/crosstool \
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain \ --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
--cpu=${TARGET_ABI} \ --cpu=${TARGET_ABI} \
...@@ -57,7 +61,7 @@ else ...@@ -57,7 +61,7 @@ else
--define openmp=true \ --define openmp=true \
--define production=true || exit 1 --define production=true || exit 1
cp bazel-bin/benchmark/benchmark_model $MODEL_OUTPUT_DIR cp bazel-bin/mace/benchmark/benchmark_model $MODEL_OUTPUT_DIR
adb shell "mkdir -p ${PHONE_DATA_DIR}" || exit 1 adb shell "mkdir -p ${PHONE_DATA_DIR}" || exit 1
IFS=',' read -r -a INPUT_NAMES <<< "${INPUT_NODES}" IFS=',' read -r -a INPUT_NAMES <<< "${INPUT_NODES}"
...@@ -83,5 +87,6 @@ else ...@@ -83,5 +87,6 @@ else
--input_shape="${INPUT_SHAPES}"\ --input_shape="${INPUT_SHAPES}"\
--output_node="${OUTPUT_NODES}" \ --output_node="${OUTPUT_NODES}" \
--output_shape="${OUTPUT_SHAPES}"\ --output_shape="${OUTPUT_SHAPES}"\
--input_file=${PHONE_DATA_DIR}/${INPUT_FILE_NAME} || exit 1 --input_file=${PHONE_DATA_DIR}/${INPUT_FILE_NAME} \
$OPTION_ARGS || exit 1
fi fi
...@@ -29,9 +29,9 @@ if [ "$DSP_MODEL_TAG" != '' ]; then ...@@ -29,9 +29,9 @@ if [ "$DSP_MODEL_TAG" != '' ]; then
DSP_MODEL_TAG_BUILD_FLAGS="--copt=-DMACE_DSP_MODEL_TAG=${DSP_MODEL_TAG}" DSP_MODEL_TAG_BUILD_FLAGS="--copt=-DMACE_DSP_MODEL_TAG=${DSP_MODEL_TAG}"
fi fi
cp $MERGED_LIB_FILE benchmark/libmace_merged.a cp $MERGED_LIB_FILE mace/benchmark/libmace_merged.a
bazel build --verbose_failures -c opt --strip always benchmark:model_throughput_test \ bazel build --verbose_failures -c opt --strip always //mace/benchmark:model_throughput_test \
--crosstool_top=//external:android/crosstool \ --crosstool_top=//external:android/crosstool \
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain \ --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
--cpu=${TARGET_ABI} \ --cpu=${TARGET_ABI} \
...@@ -44,18 +44,18 @@ bazel build --verbose_failures -c opt --strip always benchmark:model_throughput_ ...@@ -44,18 +44,18 @@ bazel build --verbose_failures -c opt --strip always benchmark:model_throughput_
--define openmp=true \ --define openmp=true \
--copt="-O3" || exit 1 --copt="-O3" || exit 1
rm benchmark/libmace_merged.a rm mace/benchmark/libmace_merged.a
adb shell "mkdir -p ${PHONE_DATA_DIR}" || exit 1 adb shell "mkdir -p ${PHONE_DATA_DIR}" || exit 1
adb push ${MODEL_INPUT_DIR}/${INPUT_FILE_NAME} ${PHONE_DATA_DIR} || exit 1 adb push ${MODEL_INPUT_DIR}/${INPUT_FILE_NAME}_${INPUT_NODES} ${PHONE_DATA_DIR} || exit 1
adb push bazel-bin/benchmark/model_throughput_test ${PHONE_DATA_DIR} || exit 1 adb push bazel-bin/mace/benchmark/model_throughput_test ${PHONE_DATA_DIR} || exit 1
if [ "$EMBED_MODEL_DATA" = 0 ]; then if [ "$EMBED_MODEL_DATA" = 0 ]; then
adb push codegen/models/${CPU_MODEL_TAG}/${CPU_MODEL_TAG}.data ${PHONE_DATA_DIR} || exit 1 adb push codegen/models/${CPU_MODEL_TAG}/${CPU_MODEL_TAG}.data ${PHONE_DATA_DIR} || exit 1
adb push codegen/models/${GPU_MODEL_TAG}/${GPU_MODEL_TAG}.data ${PHONE_DATA_DIR} || exit 1 adb push codegen/models/${GPU_MODEL_TAG}/${GPU_MODEL_TAG}.data ${PHONE_DATA_DIR} || exit 1
adb push codegen/models/${DSP_MODEL_TAG}/${DSP_MODEL_TAG}.data ${PHONE_DATA_DIR} || exit 1 adb push codegen/models/${DSP_MODEL_TAG}/${DSP_MODEL_TAG}.data ${PHONE_DATA_DIR} || exit 1
fi fi
adb push lib/hexagon/libhexagon_controller.so ${PHONE_DATA_DIR} || exit 1 adb push mace/core/runtime/hexagon/libhexagon_controller.so ${PHONE_DATA_DIR} || exit 1
adb </dev/null shell \ adb </dev/null shell \
LD_LIBRARY_PATH=${PHONE_DATA_DIR} \ LD_LIBRARY_PATH=${PHONE_DATA_DIR} \
...@@ -64,9 +64,9 @@ MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \ ...@@ -64,9 +64,9 @@ MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \
MACE_KERNEL_PATH=$KERNEL_DIR \ MACE_KERNEL_PATH=$KERNEL_DIR \
MACE_LIMIT_OPENCL_KERNEL_TIME=${LIMIT_OPENCL_KERNEL_TIME} \ MACE_LIMIT_OPENCL_KERNEL_TIME=${LIMIT_OPENCL_KERNEL_TIME} \
${PHONE_DATA_DIR}/model_throughput_test \ ${PHONE_DATA_DIR}/model_throughput_test \
--input_shape="${INPUT_SHAPE}" \ --input_shape="${INPUT_SHAPES}" \
--output_shape="${OUTPUT_SHAPE}" \ --output_shape="${OUTPUT_SHAPES}" \
--input_file=${PHONE_DATA_DIR}/${INPUT_FILE_NAME} \ --input_file=${PHONE_DATA_DIR}/${INPUT_FILE_NAME}_${INPUT_NODES} \
--cpu_model_data_file=${PHONE_DATA_DIR}/${CPU_MODEL_TAG}.data \ --cpu_model_data_file=${PHONE_DATA_DIR}/${CPU_MODEL_TAG}.data \
--gpu_model_data_file=${PHONE_DATA_DIR}/${GPU_MODEL_TAG}.data \ --gpu_model_data_file=${PHONE_DATA_DIR}/${GPU_MODEL_TAG}.data \
--dsp_model_data_file=${PHONE_DATA_DIR}/${DSP_MODEL_TAG}.data \ --dsp_model_data_file=${PHONE_DATA_DIR}/${DSP_MODEL_TAG}.data \
......
...@@ -80,19 +80,22 @@ def build_mace_run(production_mode, model_output_dir, hexagon_mode): ...@@ -80,19 +80,22 @@ def build_mace_run(production_mode, model_output_dir, hexagon_mode):
run_command(command) run_command(command)
def tuning_run(model_output_dir, running_round, tuning, production_mode, restart_round): def tuning_run(model_output_dir, running_round, tuning, production_mode,
command = "bash tools/tuning_run.sh {} {} {} {} {}".format( restart_round, option_args=''):
model_output_dir, running_round, int(tuning), int(production_mode), restart_round) command = "bash tools/tuning_run.sh {} {} {} {} {} \"{}\"".format(
model_output_dir, running_round, int(tuning), int(production_mode),
restart_round, option_args)
run_command(command) run_command(command)
def benchmark_model(model_output_dir): def benchmark_model(model_output_dir, option_args=''):
command = "bash tools/benchmark.sh {}".format(model_output_dir) command = "bash tools/benchmark.sh {} \"{}\"".format(model_output_dir, option_args)
run_command(command) run_command(command)
def run_model(model_output_dir, running_round, restart_round): def run_model(model_output_dir, running_round, restart_round, option_args):
tuning_run(model_output_dir, running_round, False, False, restart_round) tuning_run(model_output_dir, running_round, False, False, restart_round,
option_args)
def generate_production_code(model_output_dirs, pull_or_not): def generate_production_code(model_output_dirs, pull_or_not):
...@@ -204,6 +207,7 @@ def main(unused_args): ...@@ -204,6 +207,7 @@ def main(unused_args):
os.environ["PROJECT_NAME"] = os.path.splitext(os.path.basename(FLAGS.config))[0] os.environ["PROJECT_NAME"] = os.path.splitext(os.path.basename(FLAGS.config))[0]
generate_opencl_and_version_code() generate_opencl_and_version_code()
option_args = ' '.join([arg for arg in unused_args if arg.startswith('--')])
for target_abi in configs["target_abis"]: for target_abi in configs["target_abis"]:
global_runtime = get_global_runtime(configs) global_runtime = get_global_runtime(configs)
...@@ -255,10 +259,10 @@ def main(unused_args): ...@@ -255,10 +259,10 @@ def main(unused_args):
build_mace_run_prod(model_output_dir, FLAGS.tuning, global_runtime) build_mace_run_prod(model_output_dir, FLAGS.tuning, global_runtime)
if FLAGS.mode == "run" or FLAGS.mode == "validate" or FLAGS.mode == "all": if FLAGS.mode == "run" or FLAGS.mode == "validate" or FLAGS.mode == "all":
run_model(model_output_dir, FLAGS.round, FLAGS.restart_round) run_model(model_output_dir, FLAGS.round, FLAGS.restart_round, option_args)
if FLAGS.mode == "benchmark": if FLAGS.mode == "benchmark":
benchmark_model(model_output_dir) benchmark_model(model_output_dir, option_args)
if FLAGS.mode == "validate" or FLAGS.mode == "all": if FLAGS.mode == "validate" or FLAGS.mode == "all":
validate_model(model_output_dir) validate_model(model_output_dir)
......
...@@ -17,6 +17,9 @@ ROUND=$2 ...@@ -17,6 +17,9 @@ ROUND=$2
TUNING_OR_NOT=$3 TUNING_OR_NOT=$3
PRODUCTION_MODE=$4 PRODUCTION_MODE=$4
RESTART_ROUND=$5 RESTART_ROUND=$5
OPTION_ARGS=$6
echo $OPTION_ARGS
if [ x"$TARGET_ABI" = x"host" ]; then if [ x"$TARGET_ABI" = x"host" ]; then
MACE_CPP_MIN_VLOG_LEVEL=$VLOG_LEVEL \ MACE_CPP_MIN_VLOG_LEVEL=$VLOG_LEVEL \
...@@ -30,7 +33,8 @@ if [ x"$TARGET_ABI" = x"host" ]; then ...@@ -30,7 +33,8 @@ if [ x"$TARGET_ABI" = x"host" ]; then
--model_data_file=${MODEL_OUTPUT_DIR}/${MODEL_TAG}.data \ --model_data_file=${MODEL_OUTPUT_DIR}/${MODEL_TAG}.data \
--device=${DEVICE_TYPE} \ --device=${DEVICE_TYPE} \
--round=1 \ --round=1 \
--restart_round=1 || exit 1 --restart_round=1 \
$OPTION_ARGS || exit 1
else else
if [[ "${TUNING_OR_NOT}" != "0" && "$PRODUCTION_MODE" != 1 ]];then if [[ "${TUNING_OR_NOT}" != "0" && "$PRODUCTION_MODE" != 1 ]];then
tuning_flag=1 tuning_flag=1
...@@ -54,9 +58,8 @@ else ...@@ -54,9 +58,8 @@ else
adb push ${MODEL_OUTPUT_DIR}/${MODEL_TAG}.data ${PHONE_DATA_DIR} > /dev/null || exit 1 adb push ${MODEL_OUTPUT_DIR}/${MODEL_TAG}.data ${PHONE_DATA_DIR} > /dev/null || exit 1
fi fi
adb push mace/core/runtime/hexagon/libhexagon_controller.so ${PHONE_DATA_DIR} > /dev/null || exit 1 adb push mace/core/runtime/hexagon/libhexagon_controller.so ${PHONE_DATA_DIR} > /dev/null || exit 1
mace_adb_output=`adb </dev/null shell \ ADB_CMD_STR="LD_LIBRARY_PATH=${PHONE_DATA_DIR} \
"LD_LIBRARY_PATH=${PHONE_DATA_DIR} \
MACE_TUNING=${tuning_flag} \ MACE_TUNING=${tuning_flag} \
MACE_CPP_MIN_VLOG_LEVEL=$VLOG_LEVEL \ MACE_CPP_MIN_VLOG_LEVEL=$VLOG_LEVEL \
MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \ MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \
...@@ -72,7 +75,10 @@ else ...@@ -72,7 +75,10 @@ else
--model_data_file=${PHONE_DATA_DIR}/${MODEL_TAG}.data \ --model_data_file=${PHONE_DATA_DIR}/${MODEL_TAG}.data \
--device=${DEVICE_TYPE} \ --device=${DEVICE_TYPE} \
--round=$ROUND \ --round=$ROUND \
--restart_round=$RESTART_ROUND; echo \\$?"` || exit 1 --restart_round=$RESTART_ROUND \
$OPTION_ARGS; echo \\$?"
echo $ADB_CMD_STR
mace_adb_output=`adb </dev/null shell "$ADB_CMD_STR"` || exit 1
echo "$mace_adb_output" | head -n -1 echo "$mace_adb_output" | head -n -1
mace_adb_return_code=`echo "$mace_adb_output" | tail -1` mace_adb_return_code=`echo "$mace_adb_output" | tail -1`
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册