提交 cedb6e8d 编写于 作者: 李寅

Merge branch 'perf_config' into 'master'

make CPU&GPU Runtime configurable

See merge request !289
......@@ -7,3 +7,4 @@ mace/codegen/models/
mace/codegen/opencl/
mace/codegen/opencl_bin/
mace/codegen/version/
build/
# Benchmark
# Examples
load(
"//:mace.bzl",
"//mace:mace.bzl",
"if_production_mode",
"if_not_production_mode",
"if_hexagon_enabled",
......@@ -16,7 +16,7 @@ cc_library(
hdrs = ["stat_summarizer.h"],
linkstatic = 1,
deps = [
"@mace//:mace_headers",
"//mace/core",
],
)
......@@ -29,17 +29,9 @@ cc_binary(
linkstatic = 1,
deps = [
":stat_summarizer",
"//mace/codegen:generated_models",
"//external:gflags_nothreads",
] + if_hexagon_enabled([
"//lib/hexagon:hexagon",
]) + if_production_mode([
"@mace//:mace_prod",
"//codegen:generated_opencl_prod",
"//codegen:generated_tuning_params",
]) + if_not_production_mode([
"@mace//:mace_dev",
]),
"//mace/codegen:generated_models",
],
)
cc_library(
......@@ -58,9 +50,6 @@ cc_binary(
deps = [
":libmace_merged",
"//external:gflags_nothreads",
"//lib/hexagon",
"@mace//:mace",
"@mace//:mace_headers",
"@mace//:mace_prod",
"//mace/core",
],
)
......@@ -5,7 +5,7 @@
#include "gflags/gflags.h"
#include "mace/public/mace.h"
#include "mace/utils/logging.h"
#include "benchmark/stat_summarizer.h"
#include "mace/benchmark/stat_summarizer.h"
#include <cstdlib>
#include <fstream>
......@@ -204,6 +204,11 @@ DEFINE_bool(show_summary, true, "whether to show a summary of the stats");
DEFINE_bool(show_flops, true, "whether to estimate the model's FLOPs");
DEFINE_int32(warmup_runs, 1, "how many runs to initialize model");
DEFINE_string(model_data_file, "", "model data file name, used when EMBED_MODEL_DATA set to 0");
DEFINE_string(gpu_type, "ADRENO", "ADRENO/MALI");
DEFINE_int32(gpu_perf_hint, 2, "0:NA/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 1, "0:NA/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, 8, "num of openmp threads");
DEFINE_int32(cpu_power_option, 0, "0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE");
int Main(int argc, char **argv) {
MACE_CHECK(FLAGS_device != "HEXAGON", "Model benchmark tool do not support DSP.");
......@@ -212,6 +217,11 @@ int Main(int argc, char **argv) {
LOG(INFO) << "Benchmark name: [" << FLAGS_benchmark_name << "]";
LOG(INFO) << "Device: [" << FLAGS_device << "]";
LOG(INFO) << "gpu_type: [" << FLAGS_gpu_type << "]";
LOG(INFO) << "gpu_perf_hint: [" << FLAGS_gpu_perf_hint << "]";
LOG(INFO) << "gpu_priority_hint: [" << FLAGS_gpu_priority_hint << "]";
LOG(INFO) << "omp_num_threads: [" << FLAGS_omp_num_threads << "]";
LOG(INFO) << "cpu_power_option: [" << FLAGS_cpu_power_option << "]";
LOG(INFO) << "Input node: [" << FLAGS_input_node<< "]";
LOG(INFO) << "Input shapes: [" << FLAGS_input_shape << "]";
LOG(INFO) << "Output node: [" << FLAGS_output_node<< "]";
......@@ -246,6 +256,21 @@ int Main(int argc, char **argv) {
device_type = OPENCL;
}
// config runtime
if (device_type == OPENCL) {
GPUType gpu_type = ADRENO;
if (FLAGS_gpu_type == "MALI") gpu_type = MALI;
mace::ConfigOpenCLRuntime(
gpu_type,
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
}
else if (device_type == CPU) {
mace::ConfigCPURuntime(
FLAGS_omp_num_threads,
static_cast<CPUPowerOption>(FLAGS_cpu_power_option));
}
std::vector<std::string> input_names = str_util::Split(FLAGS_input_node, ',');
std::vector<std::string> output_names = str_util::Split(FLAGS_output_node, ',');
std::vector<std::string> input_shapes = str_util::Split(FLAGS_input_shape, ':');
......
......@@ -2,7 +2,7 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "benchmark/stat_summarizer.h"
#include "mace/benchmark/stat_summarizer.h"
#include "mace/public/mace.h"
#include "mace/utils/logging.h"
......
......@@ -14,6 +14,7 @@ load(
"if_not_hexagon_enabled",
"if_production_mode",
"if_not_production_mode",
"if_openmp_enabled",
)
cc_library(
......@@ -21,6 +22,7 @@ cc_library(
srcs = glob(
[
"*.cc",
"runtime/cpu/*.cc",
"runtime/opencl/*.cc",
"runtime/hexagon/*.cc",
],
......@@ -37,9 +39,11 @@ cc_library(
]),
hdrs = glob([
"*.h",
"runtime/cpu/*.h",
"runtime/opencl/*.h",
"runtime/hexagon/*.h",
]),
copts = if_openmp_enabled(["-fopenmp"]),
linkopts = ["-ldl"] + if_android([
"-pie",
"-lm",
......
......@@ -5,6 +5,8 @@
#include "mace/public/mace.h"
#include "mace/core/net.h"
#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/runtime/cpu/cpu_runtime.h"
#include "mace/core/types.h"
namespace mace {
......@@ -349,6 +351,21 @@ const OperatorDef &NetDef::op(const int idx) const {
return op_[idx];
}
void ConfigOpenCLRuntime(GPUType gpu_type,
GPUPerfHint gpu_perf_hint,
GPUPriorityHint gpu_priority_hint) {
LOG(INFO) << "Config OpenCL Runtime: gpu_type: " << gpu_type
<< ", gpu_perf_hint: " << gpu_perf_hint
<< ", gpu_priority_hint: " << gpu_priority_hint;
OpenCLRuntime::CreateGlobal(gpu_type, gpu_perf_hint, gpu_priority_hint);
}
void ConfigCPURuntime(int omp_num_threads, CPUPowerOption power_option) {
LOG(INFO) << "Config CPU Runtime: omp_num_threads: " << omp_num_threads
<< ", cpu_power_option: " << power_option;
SetCPURuntime(omp_num_threads, power_option);
}
// Mace Engine
MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type)
: op_registry_(new OperatorRegistry()),
......
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/public/mace.h"
#include "mace/utils/logging.h"
#include <omp.h>
#include <sys/syscall.h>
#include <unistd.h>
namespace mace {
namespace {
static int GetCPUMaxFreq(int cpu_id) {
char path[64];
sprintf(path,
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq",
cpu_id);
FILE *fp = fopen(path, "rb");
if (!fp) return 0;
int freq = 0;
fscanf(fp, "%d", &freq);
fclose(fp);
return freq;
}
static void SortCPUIdsByMaxFreqAsc(std::vector<int> &cpu_ids) {
int cpu_count = cpu_ids.size();
std::vector<int> cpu_max_freq;
cpu_max_freq.resize(cpu_count);
// set cpu max frequency
for (int i = 0; i < cpu_count; ++i) {
cpu_max_freq[i] = GetCPUMaxFreq(i);
cpu_ids[i] = i;
}
// sort cpu ids by max frequency asc, bubble sort
for (int i = 0; i < cpu_count - 1; ++i) {
for (int j = i + 1; j < cpu_count; ++j) {
if (cpu_max_freq[i] > cpu_max_freq[j]) {
int tmp = cpu_ids[i];
cpu_ids[i] = cpu_ids[j];
cpu_ids[j] = tmp;
tmp = cpu_max_freq[i];
cpu_max_freq[i] = cpu_max_freq[j];
cpu_max_freq[j] = tmp;
}
}
}
}
static void SetThreadAffinity(cpu_set_t mask) {
int sys_call_res;
pid_t pid = gettid();
// TODO: when set omp num threads to 1, sometiomes return EINVAL(22) error
// https://linux.die.net/man/2/sched_setaffinity
sys_call_res = syscall(__NR_sched_setaffinity, pid, sizeof(mask), &mask);
if (sys_call_res != 0) {
LOG(FATAL) << "syscall setaffinity error: " << sys_call_res << ' ' << errno;
}
}
} // namespace
void SetCPURuntime(int omp_num_threads, CPUPowerOption power_option) {
int cpu_count = omp_get_num_procs();
LOG(INFO) << "cpu_count: " << cpu_count;
std::vector<int> sorted_cpu_ids;
sorted_cpu_ids.resize(cpu_count);
SortCPUIdsByMaxFreqAsc(sorted_cpu_ids);
std::vector<int> use_cpu_ids;
if (power_option == CPUPowerOption::DEFAULT || omp_num_threads >= cpu_count) {
use_cpu_ids = sorted_cpu_ids;
omp_num_threads = cpu_count;
} else if (power_option == CPUPowerOption::HIGH_PERFORMANCE) {
use_cpu_ids =
std::vector<int>(sorted_cpu_ids.begin() + cpu_count - omp_num_threads,
sorted_cpu_ids.end());
} else {
use_cpu_ids = std::vector<int>(sorted_cpu_ids.begin(),
sorted_cpu_ids.begin() + omp_num_threads);
}
omp_set_num_threads(omp_num_threads);
// compute mask
cpu_set_t mask;
CPU_ZERO(&mask);
for (auto cpu_id: use_cpu_ids) {
CPU_SET(cpu_id, &mask);
}
LOG(INFO) << "use cpus mask: " << mask.__bits[0];
#pragma omp parallel for
for (int i = 0; i < omp_num_threads; ++i) {
SetThreadAffinity(mask);
}
}
} // namespace mace
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H
#define MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H
#include "mace/public/mace.h"
namespace mace {
void SetCPURuntime(int omp_num_threads, CPUPowerOption power_option);
}
#endif //MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H
......@@ -63,11 +63,67 @@ void OpenCLProfilingTimer::ClearTiming() {
}
OpenCLRuntime *OpenCLRuntime::Global() {
static OpenCLRuntime instance;
return &instance;
if (opencl_runtime_instance == nullptr) {
return CreateGlobal(GPUType::ADRENO, GPUPerfHint::PERF_NORMAL,
GPUPriorityHint::PRIORITY_LOW);
}
return opencl_runtime_instance;
}
OpenCLRuntime *OpenCLRuntime::CreateGlobal(GPUType gpu_type,
GPUPerfHint gpu_perf_hint,
GPUPriorityHint gpu_priority_hint) {
opencl_runtime_instance = new OpenCLRuntime(gpu_type, gpu_perf_hint,
gpu_priority_hint);
return opencl_runtime_instance;
}
void ParseOpenCLRuntimeConfig(cl_context_properties *properties,
GPUType gpu_type,
GPUPerfHint gpu_perf_hint,
GPUPriorityHint gpu_priority_hint) {
int index = 0;
if (gpu_type == GPUType::ADRENO) {
switch (gpu_perf_hint) {
case GPUPerfHint::PERF_LOW:
properties[index++] = CL_CONTEXT_PERF_HINT_QCOM;
properties[index++] = CL_PERF_HINT_LOW_QCOM;
break;
case GPUPerfHint::PERF_NORMAL:
properties[index++] = CL_CONTEXT_PERF_HINT_QCOM;
properties[index++] = CL_PERF_HINT_NORMAL_QCOM;
break;
case GPUPerfHint::PERF_HIGH:
properties[index++] = CL_CONTEXT_PERF_HINT_QCOM;
properties[index++] = CL_PERF_HINT_HIGH_QCOM;
break;
default:break;
}
switch (gpu_priority_hint) {
case GPUPriorityHint::PRIORITY_LOW:
properties[index++] = CL_CONTEXT_PRIORITY_HINT_QCOM;
properties[index++] = CL_PRIORITY_HINT_LOW_QCOM;
break;
case GPUPriorityHint::PRIORITY_NORMAL:
properties[index++] = CL_CONTEXT_PRIORITY_HINT_QCOM;
properties[index++] = CL_PRIORITY_HINT_NORMAL_QCOM;
break;
case GPUPriorityHint::PRIORITY_HIGH:
properties[index++] = CL_CONTEXT_PRIORITY_HINT_QCOM;
properties[index++] = CL_PRIORITY_HINT_HIGH_QCOM;
break;
default:break;
}
} else {
// TODO: support Mali GPU context properties
}
// The properties list should be terminated with 0
properties[index] = 0;
}
OpenCLRuntime::OpenCLRuntime() {
OpenCLRuntime::OpenCLRuntime(GPUType gpu_type, GPUPerfHint gpu_perf_hint,
GPUPriorityHint gpu_priority_hint) {
LoadOpenCLLibrary();
std::vector<cl::Platform> all_platforms;
......@@ -109,15 +165,12 @@ OpenCLRuntime::OpenCLRuntime() {
properties |= CL_QUEUE_PROFILING_ENABLE;
}
// TODO (heliangliang) Make this configurable (e.g.HIGH for benchmark,
// disabled for Mali)
cl_context_properties context_properties[] = {
// Set context perf hint to normal
CL_CONTEXT_PERF_HINT_QCOM, CL_PERF_HINT_NORMAL_QCOM,
// Set context priority hint to low
CL_CONTEXT_PRIORITY_HINT_QCOM, CL_PRIORITY_HINT_LOW_QCOM, 0};
std::unique_ptr<cl_context_properties[]> context_properties(
new cl_context_properties[5]);
ParseOpenCLRuntimeConfig(context_properties.get(), gpu_type, gpu_perf_hint,
gpu_priority_hint);
cl::Context context({gpu_device}, context_properties);
cl::Context context({gpu_device}, context_properties.get());
cl::CommandQueue command_queue(context, gpu_device, properties);
const char *kernel_path = getenv("MACE_KERNEL_PATH");
......
......@@ -20,7 +20,7 @@ namespace mace {
class OpenCLProfilingTimer : public Timer {
public:
explicit OpenCLProfilingTimer(const cl::Event *event)
: event_(event), accumulated_micros_(0){};
: event_(event), accumulated_micros_(0) {};
void StartTiming() override;
void StopTiming() override;
void AccumulateTiming() override;
......@@ -38,6 +38,7 @@ class OpenCLProfilingTimer : public Timer {
class OpenCLRuntime {
public:
static OpenCLRuntime *Global();
static OpenCLRuntime *CreateGlobal(GPUType, GPUPerfHint, GPUPriorityHint);
cl::Context &context();
cl::Device &device();
......@@ -51,7 +52,7 @@ class OpenCLRuntime {
const std::set<std::string> &build_options);
private:
OpenCLRuntime();
OpenCLRuntime(GPUType, GPUPerfHint, GPUPriorityHint);
~OpenCLRuntime();
OpenCLRuntime(const OpenCLRuntime &) = delete;
OpenCLRuntime &operator=(const OpenCLRuntime &) = delete;
......@@ -73,6 +74,7 @@ class OpenCLRuntime {
std::string kernel_path_;
};
static OpenCLRuntime *opencl_runtime_instance = nullptr;
} // namespace mace
#endif // MACE_CORE_RUNTIME_OPENCL_OPENCL_RUNTIME_H_
......@@ -5,10 +5,15 @@
#include <iostream>
#include "mace/core/testing/test_benchmark.h"
#include "mace/public/mace.h"
int main(int argc, char **argv) {
std::cout << "Running main() from test_main.cc\n";
mace::ConfigCPURuntime(4, mace::CPUPowerOption::HIGH_PERFORMANCE);
mace::ConfigOpenCLRuntime(mace::GPUType::ADRENO, mace::GPUPerfHint::PERF_HIGH,
mace::GPUPriorityHint::PRIORITY_HIGH);
// TODO Use gflags
if (argc == 2) {
mace::testing::Benchmark::Run(argv[1]);
......
......@@ -101,6 +101,16 @@ DeviceType ParseDeviceType(const string &device_str) {
}
}
GPUType ParseGPUType(const string &gpu_type_str) {
if (gpu_type_str.compare("ADRENO") == 0) {
return GPUType::ADRENO;
} else if (gpu_type_str.compare("MALI") == 0) {
return GPUType::MALI;
} else {
return GPUType::ADRENO;
}
}
struct mallinfo LogMallinfoChange(struct mallinfo prev) {
struct mallinfo curr = mallinfo();
if (prev.arena != curr.arena) {
......@@ -160,6 +170,11 @@ DEFINE_string(device, "OPENCL", "CPU/NEON/OPENCL/HEXAGON");
DEFINE_int32(round, 1, "round");
DEFINE_int32(restart_round, 1, "restart round");
DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable");
DEFINE_string(gpu_type, "ADRENO", "ADRENO/MALI");
DEFINE_int32(gpu_perf_hint, 2, "0:NA/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 1, "0:NA/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, 8, "num of openmp threads");
DEFINE_int32(cpu_power_option, 0, "0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE");
bool SingleInputAndOutput(const std::vector<int64_t> &input_shape,
const std::vector<int64_t> &output_shape) {
......@@ -175,6 +190,20 @@ bool SingleInputAndOutput(const std::vector<int64_t> &input_shape,
DeviceType device_type = ParseDeviceType(FLAGS_device);
LOG(INFO) << "Runing with device type: " << device_type;
// config runtime
if (device_type == DeviceType::OPENCL) {
GPUType gpu_type = ParseGPUType(FLAGS_gpu_type);
mace::ConfigOpenCLRuntime(
gpu_type,
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
}
else if (device_type == DeviceType::CPU) {
mace::ConfigCPURuntime(
FLAGS_omp_num_threads,
static_cast<CPUPowerOption>(FLAGS_cpu_power_option));
}
// Init model
LOG(INFO) << "Run init";
t0 = NowMicros();
......@@ -266,6 +295,20 @@ bool MultipleInputOrOutput(const std::vector<std::string> &input_names,
DeviceType device_type = ParseDeviceType(FLAGS_device);
LOG(INFO) << "Runing with device type: " << device_type;
// config runtime
if (device_type == DeviceType::OPENCL) {
GPUType gpu_type = ParseGPUType(FLAGS_gpu_type);
mace::ConfigOpenCLRuntime(
gpu_type,
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
}
else if (device_type == DeviceType::CPU) {
mace::ConfigCPURuntime(
FLAGS_omp_num_threads,
static_cast<CPUPowerOption>(FLAGS_cpu_power_option));
}
// Init model
LOG(INFO) << "Run init";
t0 = NowMicros();
......@@ -367,8 +410,13 @@ int main(int argc, char **argv) {
LOG(INFO) << "output_file: " << FLAGS_output_file;
LOG(INFO) << "model_data_file: " << FLAGS_model_data_file;
LOG(INFO) << "device: " << FLAGS_device;
LOG(INFO) << "round: " << FLAGS_restart_round;
LOG(INFO) << "restart_round: " << FLAGS_round;
LOG(INFO) << "round: " << FLAGS_round;
LOG(INFO) << "restart_round: " << FLAGS_restart_round;
LOG(INFO) << "gpu_type: " << FLAGS_gpu_type;
LOG(INFO) << "gpu_perf_hint: " << FLAGS_gpu_perf_hint;
LOG(INFO) << "gpu_priority_hint: " << FLAGS_gpu_priority_hint;
LOG(INFO) << "omp_num_threads: " << FLAGS_omp_num_threads;
LOG(INFO) << "cpu_power_option: " << FLAGS_cpu_power_option;
std::vector<std::string> input_names = str_util::Split(FLAGS_input_node, ',');
std::vector<std::string> output_names = str_util::Split(FLAGS_output_node, ',');
......
......@@ -61,6 +61,17 @@ enum DataType {
DT_UINT32 = 22
};
enum GPUType { ADRENO = 0, MALI = 1 };
enum GPUPerfHint { PERF_NA = 0, PERF_LOW = 1, PERF_NORMAL = 2, PERF_HIGH = 3 };
enum GPUPriorityHint {
PRIORITY_NA = 0,
PRIORITY_LOW = 1,
PRIORITY_NORMAL = 2,
PRIORITY_HIGH = 3
};
enum CPUPowerOption { DEFAULT = 0, HIGH_PERFORMANCE = 1, BATTERY_SAVE = 2};
class ConstTensor {
public:
ConstTensor(const std::string &name,
......@@ -369,6 +380,9 @@ struct MaceInputInfo {
const float *data;
};
void ConfigOpenCLRuntime(GPUType, GPUPerfHint, GPUPriorityHint);
void ConfigCPURuntime(int omp_num_threads, CPUPowerOption power_option);
class MaceEngine {
public:
// Single input and output
......
......@@ -13,6 +13,9 @@ CURRENT_DIR=`dirname $0`
source ${CURRENT_DIR}/env.sh
MODEL_OUTPUT_DIR=$1
OPTION_ARGS=$2
echo $OPTION_ARGS
if [ -f "$MODEL_OUTPUT_DIR/benchmark_model" ]; then
rm -rf $MODEL_OUTPUT_DIR/benchmark_model
......@@ -23,7 +26,7 @@ if [ "$EMBED_MODEL_DATA" = 0 ]; then
fi
if [ x"$TARGET_ABI" == x"host" ]; then
bazel build --verbose_failures -c opt --strip always benchmark:benchmark_model \
bazel build --verbose_failures -c opt --strip always //mace/benchmark:benchmark_model \
--copt="-std=c++11" \
--copt="-D_GLIBCXX_USE_C99_MATH_TR1" \
--copt="-Werror=return-type" \
......@@ -42,10 +45,11 @@ if [ x"$TARGET_ABI" == x"host" ]; then
--input_shape="${INPUT_SHAPES}"\
--output_node="${OUTPUT_NODES}" \
--output_shape="${OUTPUT_SHAPES}"\
--input_file=${MODEL_OUTPUT_DIR}/${INPUT_FILE_NAME} || exit 1
--input_file=${MODEL_OUTPUT_DIR}/${INPUT_FILE_NAME}_${INPUT_NODES} \
$OPTION_ARGS || exit 1
else
bazel build --verbose_failures -c opt --strip always benchmark:benchmark_model \
bazel build --verbose_failures -c opt --strip always //mace/benchmark:benchmark_model \
--crosstool_top=//external:android/crosstool \
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
--cpu=${TARGET_ABI} \
......@@ -57,7 +61,7 @@ else
--define openmp=true \
--define production=true || exit 1
cp bazel-bin/benchmark/benchmark_model $MODEL_OUTPUT_DIR
cp bazel-bin/mace/benchmark/benchmark_model $MODEL_OUTPUT_DIR
adb shell "mkdir -p ${PHONE_DATA_DIR}" || exit 1
IFS=',' read -r -a INPUT_NAMES <<< "${INPUT_NODES}"
......@@ -83,5 +87,6 @@ else
--input_shape="${INPUT_SHAPES}"\
--output_node="${OUTPUT_NODES}" \
--output_shape="${OUTPUT_SHAPES}"\
--input_file=${PHONE_DATA_DIR}/${INPUT_FILE_NAME} || exit 1
--input_file=${PHONE_DATA_DIR}/${INPUT_FILE_NAME} \
$OPTION_ARGS || exit 1
fi
......@@ -29,9 +29,9 @@ if [ "$DSP_MODEL_TAG" != '' ]; then
DSP_MODEL_TAG_BUILD_FLAGS="--copt=-DMACE_DSP_MODEL_TAG=${DSP_MODEL_TAG}"
fi
cp $MERGED_LIB_FILE benchmark/libmace_merged.a
cp $MERGED_LIB_FILE mace/benchmark/libmace_merged.a
bazel build --verbose_failures -c opt --strip always benchmark:model_throughput_test \
bazel build --verbose_failures -c opt --strip always //mace/benchmark:model_throughput_test \
--crosstool_top=//external:android/crosstool \
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
--cpu=${TARGET_ABI} \
......@@ -44,18 +44,18 @@ bazel build --verbose_failures -c opt --strip always benchmark:model_throughput_
--define openmp=true \
--copt="-O3" || exit 1
rm benchmark/libmace_merged.a
rm mace/benchmark/libmace_merged.a
adb shell "mkdir -p ${PHONE_DATA_DIR}" || exit 1
adb push ${MODEL_INPUT_DIR}/${INPUT_FILE_NAME} ${PHONE_DATA_DIR} || exit 1
adb push bazel-bin/benchmark/model_throughput_test ${PHONE_DATA_DIR} || exit 1
adb push ${MODEL_INPUT_DIR}/${INPUT_FILE_NAME}_${INPUT_NODES} ${PHONE_DATA_DIR} || exit 1
adb push bazel-bin/mace/benchmark/model_throughput_test ${PHONE_DATA_DIR} || exit 1
if [ "$EMBED_MODEL_DATA" = 0 ]; then
adb push codegen/models/${CPU_MODEL_TAG}/${CPU_MODEL_TAG}.data ${PHONE_DATA_DIR} || exit 1
adb push codegen/models/${GPU_MODEL_TAG}/${GPU_MODEL_TAG}.data ${PHONE_DATA_DIR} || exit 1
adb push codegen/models/${DSP_MODEL_TAG}/${DSP_MODEL_TAG}.data ${PHONE_DATA_DIR} || exit 1
fi
adb push lib/hexagon/libhexagon_controller.so ${PHONE_DATA_DIR} || exit 1
adb push mace/core/runtime/hexagon/libhexagon_controller.so ${PHONE_DATA_DIR} || exit 1
adb </dev/null shell \
LD_LIBRARY_PATH=${PHONE_DATA_DIR} \
......@@ -64,9 +64,9 @@ MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \
MACE_KERNEL_PATH=$KERNEL_DIR \
MACE_LIMIT_OPENCL_KERNEL_TIME=${LIMIT_OPENCL_KERNEL_TIME} \
${PHONE_DATA_DIR}/model_throughput_test \
--input_shape="${INPUT_SHAPE}" \
--output_shape="${OUTPUT_SHAPE}" \
--input_file=${PHONE_DATA_DIR}/${INPUT_FILE_NAME} \
--input_shape="${INPUT_SHAPES}" \
--output_shape="${OUTPUT_SHAPES}" \
--input_file=${PHONE_DATA_DIR}/${INPUT_FILE_NAME}_${INPUT_NODES} \
--cpu_model_data_file=${PHONE_DATA_DIR}/${CPU_MODEL_TAG}.data \
--gpu_model_data_file=${PHONE_DATA_DIR}/${GPU_MODEL_TAG}.data \
--dsp_model_data_file=${PHONE_DATA_DIR}/${DSP_MODEL_TAG}.data \
......
......@@ -80,19 +80,22 @@ def build_mace_run(production_mode, model_output_dir, hexagon_mode):
run_command(command)
def tuning_run(model_output_dir, running_round, tuning, production_mode, restart_round):
command = "bash tools/tuning_run.sh {} {} {} {} {}".format(
model_output_dir, running_round, int(tuning), int(production_mode), restart_round)
def tuning_run(model_output_dir, running_round, tuning, production_mode,
restart_round, option_args=''):
command = "bash tools/tuning_run.sh {} {} {} {} {} \"{}\"".format(
model_output_dir, running_round, int(tuning), int(production_mode),
restart_round, option_args)
run_command(command)
def benchmark_model(model_output_dir):
command = "bash tools/benchmark.sh {}".format(model_output_dir)
def benchmark_model(model_output_dir, option_args=''):
command = "bash tools/benchmark.sh {} \"{}\"".format(model_output_dir, option_args)
run_command(command)
def run_model(model_output_dir, running_round, restart_round):
tuning_run(model_output_dir, running_round, False, False, restart_round)
def run_model(model_output_dir, running_round, restart_round, option_args):
tuning_run(model_output_dir, running_round, False, False, restart_round,
option_args)
def generate_production_code(model_output_dirs, pull_or_not):
......@@ -204,6 +207,7 @@ def main(unused_args):
os.environ["PROJECT_NAME"] = os.path.splitext(os.path.basename(FLAGS.config))[0]
generate_opencl_and_version_code()
option_args = ' '.join([arg for arg in unused_args if arg.startswith('--')])
for target_abi in configs["target_abis"]:
global_runtime = get_global_runtime(configs)
......@@ -255,10 +259,10 @@ def main(unused_args):
build_mace_run_prod(model_output_dir, FLAGS.tuning, global_runtime)
if FLAGS.mode == "run" or FLAGS.mode == "validate" or FLAGS.mode == "all":
run_model(model_output_dir, FLAGS.round, FLAGS.restart_round)
run_model(model_output_dir, FLAGS.round, FLAGS.restart_round, option_args)
if FLAGS.mode == "benchmark":
benchmark_model(model_output_dir)
benchmark_model(model_output_dir, option_args)
if FLAGS.mode == "validate" or FLAGS.mode == "all":
validate_model(model_output_dir)
......
......@@ -17,6 +17,9 @@ ROUND=$2
TUNING_OR_NOT=$3
PRODUCTION_MODE=$4
RESTART_ROUND=$5
OPTION_ARGS=$6
echo $OPTION_ARGS
if [ x"$TARGET_ABI" = x"host" ]; then
MACE_CPP_MIN_VLOG_LEVEL=$VLOG_LEVEL \
......@@ -30,7 +33,8 @@ if [ x"$TARGET_ABI" = x"host" ]; then
--model_data_file=${MODEL_OUTPUT_DIR}/${MODEL_TAG}.data \
--device=${DEVICE_TYPE} \
--round=1 \
--restart_round=1 || exit 1
--restart_round=1 \
$OPTION_ARGS || exit 1
else
if [[ "${TUNING_OR_NOT}" != "0" && "$PRODUCTION_MODE" != 1 ]];then
tuning_flag=1
......@@ -54,9 +58,8 @@ else
adb push ${MODEL_OUTPUT_DIR}/${MODEL_TAG}.data ${PHONE_DATA_DIR} > /dev/null || exit 1
fi
adb push mace/core/runtime/hexagon/libhexagon_controller.so ${PHONE_DATA_DIR} > /dev/null || exit 1
mace_adb_output=`adb </dev/null shell \
"LD_LIBRARY_PATH=${PHONE_DATA_DIR} \
ADB_CMD_STR="LD_LIBRARY_PATH=${PHONE_DATA_DIR} \
MACE_TUNING=${tuning_flag} \
MACE_CPP_MIN_VLOG_LEVEL=$VLOG_LEVEL \
MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \
......@@ -72,7 +75,10 @@ else
--model_data_file=${PHONE_DATA_DIR}/${MODEL_TAG}.data \
--device=${DEVICE_TYPE} \
--round=$ROUND \
--restart_round=$RESTART_ROUND; echo \\$?"` || exit 1
--restart_round=$RESTART_ROUND \
$OPTION_ARGS; echo \\$?"
echo $ADB_CMD_STR
mace_adb_output=`adb </dev/null shell "$ADB_CMD_STR"` || exit 1
echo "$mace_adb_output" | head -n -1
mace_adb_return_code=`echo "$mace_adb_output" | tail -1`
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册