提交 dbf67ad9 编写于 作者: L liuqi

Add CreateMaceEngine API and speed up build logic.

上级 5c239b6e
......@@ -8,6 +8,7 @@ mace/codegen/opencl/
mace/codegen/opencl_bin/
mace/codegen/tuning/
mace/codegen/version/
mace/codegen/engine/
build/
docs/_build/
......
......@@ -6,6 +6,7 @@ load(
"if_not_production_mode",
"if_hexagon_enabled",
"if_openmp_enabled",
"if_android",
)
licenses(["notice"]) # Apache 2.0
......@@ -26,12 +27,14 @@ cc_binary(
srcs = [
"benchmark_model.cc",
],
copts = if_android(["-DMACE_ENABLE_OPENCL"]),
linkopts = if_openmp_enabled(["-fopenmp"]),
linkstatic = 1,
deps = [
":statistics",
"//external:gflags_nothreads",
"//mace/codegen:generated_models",
"//mace/codegen:generated_mace_engine_creator",
],
)
......
......@@ -26,20 +26,6 @@
#include "mace/utils/logging.h"
#include "mace/benchmark/statistics.h"
namespace mace {
namespace MACE_MODEL_TAG {
extern const unsigned char *LoadModelData(const char *model_data_file);
extern void UnloadModelData(const unsigned char *model_data);
extern NetDef CreateNet(const unsigned char *model_data);
extern const std::string ModelChecksum();
} // namespace MACE_MODEL_TAG
} // namespace mace
namespace mace {
namespace benchmark {
namespace str_util {
......@@ -188,6 +174,7 @@ bool Run(const std::string &title,
return true;
}
DEFINE_string(model_tag, "", "model tag");
DEFINE_string(device, "CPU", "Device [CPU|GPU|DSP]");
DEFINE_string(input_node, "input_node0,input_node1",
"input nodes, separated by comma");
......@@ -198,7 +185,6 @@ DEFINE_string(output_shape, "", "output shape, separated by colon and comma");
DEFINE_string(input_file, "", "input file name");
DEFINE_int32(max_num_runs, 100, "number of runs max");
DEFINE_string(max_time, "10.0", "length to run max");
DEFINE_string(benchmark_name, "", "benchmark name");
DEFINE_int32(warmup_runs, 1, "how many runs to initialize model");
DEFINE_string(model_data_file, "",
"model data file name, used when EMBED_MODEL_DATA set to 0");
......@@ -214,7 +200,7 @@ int Main(int argc, char **argv) {
gflags::SetUsageMessage("some usage message");
gflags::ParseCommandLineFlags(&argc, &argv, true);
LOG(INFO) << "Benchmark name: [" << FLAGS_benchmark_name << "]";
LOG(INFO) << "Model tag: [" << FLAGS_model_tag << "]";
LOG(INFO) << "Device: [" << FLAGS_device << "]";
LOG(INFO) << "gpu_perf_hint: [" << FLAGS_gpu_perf_hint << "]";
LOG(INFO) << "gpu_priority_hint: [" << FLAGS_gpu_priority_hint << "]";
......@@ -233,17 +219,6 @@ int Main(int argc, char **argv) {
std::unique_ptr<OpStat> statistician(new OpStat());
mace::DeviceType device_type = ParseDeviceType(FLAGS_device);
// config runtime
mace::SetOpenMPThreadPolicy(
FLAGS_omp_num_threads,
static_cast<CPUAffinityPolicy >(FLAGS_cpu_affinity_policy));
if (device_type == DeviceType::GPU) {
mace::SetGPUHints(
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
}
std::vector<std::string> input_names =
str_util::Split(FLAGS_input_node, ',');
......@@ -265,9 +240,36 @@ int Main(int argc, char **argv) {
ParseShape(output_shapes[i], &output_shape_vec[i]);
}
const unsigned char *model_data =
mace::MACE_MODEL_TAG::LoadModelData(FLAGS_model_data_file.c_str());
NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(model_data);
mace::DeviceType device_type = ParseDeviceType(FLAGS_device);
// config runtime
mace::SetOpenMPThreadPolicy(
FLAGS_omp_num_threads,
static_cast<CPUAffinityPolicy >(FLAGS_cpu_affinity_policy));
#ifdef MACE_ENABLE_OPENCL
if (device_type == DeviceType::GPU) {
mace::SetGPUHints(
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
}
#endif // MACE_ENABLE_OPENCL
const char *kernel_path = getenv("MACE_CL_PROGRAM_PATH");
const std::string kernel_file_path =
std::string(kernel_path == nullptr ?
"/data/local/tmp/mace_run/cl_program" : kernel_path);
std::shared_ptr<KVStorageFactory> storage_factory(
new FileStorageFactory(kernel_file_path));
SetKVStorageFactory(storage_factory);
// Create Engine
std::unique_ptr<mace::MaceEngine> engine_ptr =
CreateMaceEngine(FLAGS_model_tag,
input_names,
output_names,
FLAGS_model_data_file.c_str(),
device_type);
std::map<std::string, mace::MaceTensor> inputs;
std::map<std::string, mace::MaceTensor> outputs;
......@@ -303,14 +305,6 @@ int Main(int argc, char **argv) {
buffer_out);
}
// Init model
LOG(INFO) << "Run init";
std::unique_ptr<mace::MaceEngine> engine_ptr(
new mace::MaceEngine(&net_def, device_type, input_names, output_names));
if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) {
mace::MACE_MODEL_TAG::UnloadModelData(model_data);
}
int64_t warmup_time_us = 0;
int64_t num_warmup_runs = 0;
if (FLAGS_warmup_runs > 0) {
......
......@@ -33,3 +33,14 @@ cc_library(
srcs = ["version/version.cc"],
linkstatic = 1,
)
cc_library(
name = "generated_mace_engine_creator",
srcs = ["engine/mace_engine_creator.cc"],
linkstatic = 1,
deps = [
":generated_models",
"//mace/public",
"//mace/utils",
],
)
......@@ -37,24 +37,6 @@
#include "mace/utils/env_time.h"
#include "mace/utils/logging.h"
// #include "mace/codegen/models/${MACE_MODEL_TAG}/${MACE_MODEL_TAG}.h" instead
namespace mace {
namespace MACE_MODEL_TAG {
extern const unsigned char *LoadModelData(const char *model_data_file);
extern void UnloadModelData(const unsigned char *model_data);
extern NetDef CreateNet(const unsigned char *model_data);
extern const std::string ModelName();
extern const std::string ModelChecksum();
extern const std::string ModelBuildTime();
extern const std::string ModelBuildOptions();
} // namespace MACE_MODEL_TAG
} // namespace mace
namespace mace {
namespace examples {
......@@ -112,6 +94,9 @@ DeviceType ParseDeviceType(const std::string &device_str) {
}
DEFINE_string(model_tag,
"",
"model tag in yaml file");
DEFINE_string(input_node,
"input_node0,input_node1",
"input nodes, separated by comma");
......@@ -148,37 +133,38 @@ bool RunModel(const std::vector<std::string> &input_names,
const std::vector<std::string> &output_names,
const std::vector<std::vector<int64_t>> &output_shapes) {
// load model
const unsigned char *model_data =
mace::MACE_MODEL_TAG::LoadModelData(FLAGS_model_data_file.c_str());
NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(model_data);
DeviceType device_type = ParseDeviceType(FLAGS_device);
// config runtime
MaceStatus res = mace::SetOpenMPThreadPolicy(
mace::SetOpenMPThreadPolicy(
FLAGS_omp_num_threads,
static_cast<CPUAffinityPolicy >(FLAGS_cpu_affinity_policy));
#ifdef MACE_ENABLE_OPENCL
if (device_type == DeviceType::GPU) {
mace::SetGPUHints(
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
}
#endif // MACE_ENABLE_OPENCL
// DO NOT USE tmp directory.
// Please use APP's own directory and make sure the directory exists.
// Just call once
const std::string kernel_file_path =
"/data/local/tmp/mace_run/cl";
"/data/local/tmp/mace_run/cl";
// Config internal kv storage factory.
std::shared_ptr<KVStorageFactory> storage_factory(
new FileStorageFactory(kernel_file_path));
SetKVStorageFactory(storage_factory);
// Init model
mace::MaceEngine engine(&net_def, device_type, input_names,
output_names);
if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) {
mace::MACE_MODEL_TAG::UnloadModelData(model_data);
}
// Create Engine
std::unique_ptr<mace::MaceEngine> engine =
CreateMaceEngine(FLAGS_model_tag,
input_names,
output_names,
FLAGS_model_data_file.c_str(),
device_type);
const size_t input_count = input_names.size();
const size_t output_count = output_names.size();
......@@ -216,12 +202,12 @@ bool RunModel(const std::vector<std::string> &input_names,
}
LOG(INFO) << "Warm up run";
engine.Run(inputs, &outputs);
engine->Run(inputs, &outputs);
if (FLAGS_round > 0) {
LOG(INFO) << "Run model";
for (int i = 0; i < FLAGS_round; ++i) {
engine.Run(inputs, &outputs);
engine->Run(inputs, &outputs);
}
}
......@@ -247,10 +233,6 @@ int Main(int argc, char **argv) {
gflags::ParseCommandLineFlags(&argc, &argv, true);
LOG(INFO) << "mace version: " << MaceVersion();
LOG(INFO) << "model name: " << mace::MACE_MODEL_TAG::ModelName();
LOG(INFO) << "model checksum: " << mace::MACE_MODEL_TAG::ModelChecksum();
LOG(INFO) << "build time: " << mace::MACE_MODEL_TAG::ModelBuildTime();
LOG(INFO) << "build options: " << mace::MACE_MODEL_TAG::ModelBuildOptions();
LOG(INFO) << "input node: " << FLAGS_input_node;
LOG(INFO) << "input shape: " << FLAGS_input_shape;
LOG(INFO) << "output node: " << FLAGS_output_node;
......
......@@ -28,7 +28,7 @@ namespace mace {
const char *MaceVersion();
enum DeviceType { CPU = 0, GPU = 2, HEXAGON = 3 };
enum DeviceType { CPU = 0, GPU = 2, HEXAGON = 3, AUTO = 4 };
enum MaceStatus { MACE_SUCCESS = 0, MACE_INVALID_ARGS = 1 };
......@@ -82,6 +82,13 @@ class MaceEngine {
MaceEngine &operator=(const MaceEngine &) = delete;
};
std::unique_ptr<MaceEngine> CreateMaceEngine(
const std::string &model_tag,
const std::vector<std::string> &input_nodes,
const std::vector<std::string> &output_nodes,
const char *model_data_file = nullptr,
const DeviceType device_type = DeviceType::AUTO);
} // namespace mace
#endif // MACE_PUBLIC_MACE_H_
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This is a generated file. DO NOT EDIT!
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "mace/public/mace.h"
#include "mace/public/mace_runtime.h"
#include "mace/utils/logging.h"
namespace mace {
{% for tag in model_tags %}
namespace {{tag}} {
extern const unsigned char *LoadModelData(const char *model_data_file);
extern void UnloadModelData(const unsigned char *model_data);
extern NetDef CreateNet(const unsigned char *model_data);
extern const std::string ModelName();
extern const std::string ModelChecksum();
extern const std::string ModelBuildTime();
extern const std::string ModelBuildOptions();
} // namespace {{tag}}
{% endfor %}
namespace {
std::map<std::string, int> model_tag_map {
{% for i in range(model_tags |length) %}
std::make_pair({{ model_tags[i]|tojson }}, {{ i }}),
{% endfor %}
};
} // namespace
std::unique_ptr<MaceEngine> CreateMaceEngine(
const std::string &model_tag,
const std::vector<std::string> &input_nodes,
const std::vector<std::string> &output_nodes,
const char *model_data_file,
const DeviceType device_type) {
// load model
std::unique_ptr<MaceEngine> engine;
const unsigned char * model_data = nullptr;
NetDef net_def;
switch (model_tag_map[model_tag]) {
{% for i in range(model_tags |length) %}
case {{ i }}:
model_data =
mace::{{model_tags[i]}}::LoadModelData(model_data_file);
net_def = mace::{{model_tags[i]}}::CreateNet(model_data);
engine.reset(
new mace::MaceEngine(&net_def, device_type, input_nodes, output_nodes));
if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) {
mace::{{model_tags[i]}}::UnloadModelData(model_data);
}
break;
{% endfor %}
default:
LOG(FATAL) << "There is no model named " << model_tag;
}
return engine;
}
} // namespace mace
# Copyright 2018 Xiaomi, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from jinja2 import Environment, FileSystemLoader
FLAGS = None
def gen_mace_engine_creator(model_tags, template_dir, output_dir):
# Create the jinja2 environment.
j2_env = Environment(
loader=FileSystemLoader(template_dir), trim_blocks=True)
# generate mace_run BUILD file
print model_tags
template_name = 'mace_engine_creator.jinja2'
source = j2_env.get_template(template_name).render(
model_tags=model_tags,
)
with open(output_dir + '/mace_engine_creator.cc', "wb") as f:
f.write(source)
def parse_args():
"""Parses command line arguments."""
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_tag",
type=str,
default="",
help="model tag")
parser.add_argument(
"--template_dir", type=str, default="", help="template path")
parser.add_argument(
"--output_dir", type=str, default="", help="template path")
return parser.parse_known_args()
if __name__ == '__main__':
FLAGS, unparsed = parse_args()
gen_mace_engine_creator(FLAGS.model_tag, FLAGS.template_dir,
FLAGS.output_dir)
......@@ -10,6 +10,7 @@ cc_binary(
deps = [
"//external:gflags_nothreads",
"//mace/codegen:generated_models",
"//mace/codegen:generated_mace_engine_creator",
"//mace/core:core",
],
)
......@@ -42,24 +42,6 @@
#include "mace/core/runtime/opencl/opencl_runtime.h"
#endif // MACE_ENABLE_OPENCL
// #include "mace/codegen/models/${MACE_MODEL_TAG}/${MACE_MODEL_TAG}.h" instead
namespace mace {
namespace MACE_MODEL_TAG {
extern const unsigned char *LoadModelData(const char *model_data_file);
extern void UnloadModelData(const unsigned char *model_data);
extern NetDef CreateNet(const unsigned char *model_data);
extern const std::string ModelName();
extern const std::string ModelChecksum();
extern const std::string ModelBuildTime();
extern const std::string ModelBuildOptions();
} // namespace MACE_MODEL_TAG
} // namespace mace
namespace mace {
namespace tools {
namespace validation {
......@@ -180,6 +162,9 @@ struct mallinfo LogMallinfoChange(struct mallinfo prev) {
return curr;
}
DEFINE_string(model_tag,
"",
"model tag in yaml");
DEFINE_string(input_node,
"input_node0,input_node1",
"input nodes, separated by comma");
......@@ -211,22 +196,12 @@ DEFINE_int32(omp_num_threads, -1, "num of openmp threads");
DEFINE_int32(cpu_affinity_policy, 1,
"0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY");
bool RunModel(const std::vector<std::string> &input_names,
bool RunModel(const std::string &model_tag,
const std::vector<std::string> &input_names,
const std::vector<std::vector<int64_t>> &input_shapes,
const std::vector<std::string> &output_names,
const std::vector<std::vector<int64_t>> &output_shapes) {
// load model
int64_t t0 = NowMicros();
const unsigned char *model_data =
mace::MACE_MODEL_TAG::LoadModelData(FLAGS_model_data_file.c_str());
NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(model_data);
int64_t t1 = NowMicros();
double create_net_millis = (t1 - t0) / 1000.0;
LOG(INFO) << "CreateNetDef latency: " << create_net_millis << " ms";
DeviceType device_type = ParseDeviceType(FLAGS_device);
LOG(INFO) << "Runing with device type: " << device_type;
// config runtime
mace::SetOpenMPThreadPolicy(
FLAGS_omp_num_threads,
......@@ -244,20 +219,20 @@ bool RunModel(const std::vector<std::string> &input_names,
std::string(kernel_path == nullptr ?
"/data/local/tmp/mace_run/cl_program" : kernel_path);
// Init model
LOG(INFO) << "Run init";
std::shared_ptr<KVStorageFactory> storage_factory(
new FileStorageFactory(kernel_file_path));
SetKVStorageFactory(storage_factory);
mace::MaceEngine engine(&net_def, device_type, input_names, output_names);
if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) {
mace::MACE_MODEL_TAG::UnloadModelData(model_data);
}
int64_t t2 = NowMicros();
double mace_engine_ctor_millis = (t2 - t1) / 1000.0;
double init_millis = (t2 - t0) / 1000.0;
LOG(INFO) << "MaceEngine constructor latency: "
<< mace_engine_ctor_millis << " ms";
// Create Engine
int64_t t0 = NowMicros();
std::unique_ptr<mace::MaceEngine> engine =
CreateMaceEngine(model_tag,
input_names,
output_names,
FLAGS_model_data_file.c_str(),
device_type);
int64_t t1 = NowMicros();
double init_millis = (t1 - t0) / 1000.0;
LOG(INFO) << "Total init latency: " << init_millis << " ms";
const size_t input_count = input_names.size();
......@@ -297,7 +272,7 @@ bool RunModel(const std::vector<std::string> &input_names,
LOG(INFO) << "Warm up run";
int64_t t3 = NowMicros();
engine.Run(inputs, &outputs);
engine->Run(inputs, &outputs);
int64_t t4 = NowMicros();
double warmup_millis = (t4 - t3) / 1000.0;
LOG(INFO) << "1st warm up run latency: " << warmup_millis << " ms";
......@@ -308,7 +283,7 @@ bool RunModel(const std::vector<std::string> &input_names,
int64_t t0 = NowMicros();
struct mallinfo prev = mallinfo();
for (int i = 0; i < FLAGS_round; ++i) {
engine.Run(inputs, &outputs);
engine->Run(inputs, &outputs);
if (FLAGS_malloc_check_cycle >= 1 && i % FLAGS_malloc_check_cycle == 0) {
LOG(INFO) << "=== check malloc info change #" << i << " ===";
prev = LogMallinfoChange(prev);
......@@ -320,11 +295,11 @@ bool RunModel(const std::vector<std::string> &input_names,
}
// Metrics reporting tools depends on the format, keep in consistent
printf("================================================================\n");
printf(" create_net engine_ctor init warmup run_avg\n");
printf("================================================================\n");
printf("time %11.3f %11.3f %11.3f %11.3f %11.3f\n", create_net_millis,
mace_engine_ctor_millis, init_millis, warmup_millis, model_run_millis);
printf("========================================\n");
printf(" init warmup run_avg\n");
printf("========================================\n");
printf("time %11.3f %11.3f %11.3f\n",
init_millis, warmup_millis, model_run_millis);
#ifdef MACE_ENABLE_OPENCL
if (device_type == DeviceType::GPU) {
......@@ -356,10 +331,6 @@ int Main(int argc, char **argv) {
gflags::ParseCommandLineFlags(&argc, &argv, true);
LOG(INFO) << "mace version: " << MaceVersion();
LOG(INFO) << "model name: " << mace::MACE_MODEL_TAG::ModelName();
LOG(INFO) << "model checksum: " << mace::MACE_MODEL_TAG::ModelChecksum();
LOG(INFO) << "build time: " << mace::MACE_MODEL_TAG::ModelBuildTime();
LOG(INFO) << "build options: " << mace::MACE_MODEL_TAG::ModelBuildOptions();
LOG(INFO) << "input node: " << FLAGS_input_node;
LOG(INFO) << "input shape: " << FLAGS_input_shape;
LOG(INFO) << "output node: " << FLAGS_output_node;
......@@ -399,7 +370,8 @@ int Main(int argc, char **argv) {
for (int i = 0; i < FLAGS_restart_round; ++i) {
VLOG(0) << "restart round " << i;
ret =
RunModel(input_names, input_shape_vec, output_names, output_shape_vec);
RunModel(FLAGS_model_tag, input_names, input_shape_vec,
output_names, output_shape_vec);
}
if (ret) {
return 0;
......
......@@ -95,13 +95,17 @@ def gen_opencl_and_tuning_code(target_abi,
serialno,
model_output_dirs,
pull_or_not):
cl_built_kernel_file_name = "mace_cl_compiled_program.bin"
cl_platform_info_file_name = "mace_cl_platform_info.txt"
if pull_or_not:
sh_commands.pull_binaries(target_abi, serialno, model_output_dirs)
codegen_path = "mace/codegen"
sh_commands.pull_binaries(target_abi, serialno, model_output_dirs,
cl_built_kernel_file_name,
cl_platform_info_file_name)
# generate opencl binary code
sh_commands.gen_opencl_binary_code(model_output_dirs)
sh_commands.gen_opencl_binary_code(model_output_dirs,
cl_built_kernel_file_name,
cl_platform_info_file_name)
sh_commands.gen_tuning_param_code(model_output_dirs)
......@@ -227,12 +231,11 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi,
sh_commands.bazel_build(
mace_run_target,
abi=target_abi,
model_tag=model_name,
production_mode=False,
hexagon_mode=hexagon_mode,
enable_openmp=enable_openmp
)
sh_commands.update_mace_run_lib(model_output_dir, target_abi,
sh_commands.update_mace_run_lib(model_output_dir,
model_name, embed_model_data)
tuning_run(runtime, target_abi, serialno, vlog_level, embed_model_data,
......@@ -254,13 +257,12 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi,
mace_run_target,
strip,
abi=target_abi,
model_tag=model_name,
production_mode=True,
hexagon_mode=hexagon_mode,
debug=debug,
enable_openmp=enable_openmp
)
sh_commands.update_mace_run_lib(model_output_dir, target_abi,
sh_commands.update_mace_run_lib(model_output_dir,
model_name, embed_model_data)
else:
gen_opencl_and_tuning_code(target_abi, serialno, [], False)
......@@ -268,13 +270,12 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi,
mace_run_target,
strip,
abi=target_abi,
model_tag=model_name,
production_mode=True,
hexagon_mode=hexagon_mode,
debug=debug,
enable_openmp=enable_openmp
)
sh_commands.update_mace_run_lib(model_output_dir, target_abi,
sh_commands.update_mace_run_lib(model_output_dir,
model_name, embed_model_data)
......@@ -525,6 +526,7 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
target_abi, phone_data_dir, target_soc="", serialno=""):
hexagon_mode = get_hexagon_mode(configs)
model_output_dirs = []
for model_name in configs["models"]:
print '===================', model_name, '==================='
model_config = configs["models"][model_name]
......@@ -534,16 +536,16 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
# Create model build directory
model_path_digest = md5sum(model_config["model_file_path"])
model_output_base_dir = "%s/%s/%s/%s/%s" % (
FLAGS.output_dir, project_name, "build",
model_name, model_path_digest)
if target_abi == "host":
model_output_dir = "%s/%s/%s/%s/%s/%s" % (
FLAGS.output_dir, project_name, "build",
model_name, model_path_digest, target_abi)
model_output_dir = "%s/%s" % (model_output_base_dir, target_abi)
else:
device_name = sh_commands.adb_get_device_name_by_serialno(serialno)
model_output_dir = "%s/%s/%s/%s/%s/%s_%s/%s" % (
FLAGS.output_dir, project_name, "build",
model_name, model_path_digest, device_name.replace(' ', ''),
model_output_dir = "%s/%s_%s/%s" % (
model_output_base_dir, device_name.replace(' ', ''),
target_soc, target_abi)
model_output_dirs.append(model_output_dir)
......@@ -552,16 +554,14 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
sh.rm("-rf", model_output_dir)
os.makedirs(model_output_dir)
if FLAGS.mode == "build" or FLAGS.mode == "benchmark" or \
FLAGS.mode == "all":
sh_commands.clear_mace_run_data(
target_abi, serialno, phone_data_dir)
model_file_path, weight_file_path = get_model_files(
model_config["model_file_path"],
model_output_dir,
model_output_base_dir,
model_config["weight_file_path"])
sh_commands.clear_phone_data_dir(
target_abi, serialno, phone_data_dir)
if FLAGS.mode == "build" or FLAGS.mode == "run" or \
FLAGS.mode == "validate" or \
FLAGS.mode == "benchmark" or FLAGS.mode == "all":
......@@ -570,25 +570,6 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
model_config["input_shapes"],
input_file_list)
if FLAGS.mode == "build" or FLAGS.mode == "benchmark" or \
FLAGS.mode == "all":
sh_commands.gen_model_code(
"mace/codegen/models/%s" % model_name,
model_config["platform"],
model_file_path,
weight_file_path,
model_config["model_sha256_checksum"],
",".join(model_config["input_nodes"]),
",".join(model_config["output_nodes"]),
data_type,
model_config["runtime"],
model_name,
":".join(model_config["input_shapes"]),
model_config["dsp_mode"],
embed_model_data,
model_config["fast_conv"],
model_config["obfuscate"])
if FLAGS.mode == "build" or FLAGS.mode == "all":
build_mace_run_prod(hexagon_mode,
model_config["runtime"],
......@@ -609,9 +590,14 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
model_config["limit_opencl_kernel_time"],
phone_data_dir,
FLAGS.enable_openmp)
sh_commands.build_benchmark_model(target_abi,
embed_model_data,
model_output_dir,
model_name,
hexagon_mode)
if FLAGS.mode == "run" or FLAGS.mode == "validate" or \
FLAGS.mode == "all":
FLAGS.mode == "all":
tuning_run(model_config["runtime"],
target_abi,
serialno,
......@@ -647,7 +633,6 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
model_config["output_shapes"],
model_name,
device_type,
hexagon_mode,
phone_data_dir,
FLAGS.omp_num_threads,
FLAGS.cpu_affinity_policy,
......@@ -738,12 +723,56 @@ def main(unused_args):
# generate source
sh_commands.gen_mace_version()
sh_commands.gen_encrypted_opencl_source()
sh_commands.gen_mace_engine_creator_source(configs['models'].keys())
embed_model_data = configs["embed_model_data"]
target_socs = get_target_socs(configs)
embed_model_data = configs["embed_model_data"]
vlog_level = FLAGS.vlog_level
phone_data_dir = "/data/local/tmp/mace_run/"
if FLAGS.mode == "build" or FLAGS.mode == "all":
print '* Model Convert'
sh_commands.clear_model_codegen()
for model_name in configs["models"]:
print '===================', model_name, '==================='
model_config = configs["models"][model_name]
data_type, device_type = get_data_and_device_type(
model_config["runtime"])
# Create model build directory
model_path_digest = md5sum(model_config["model_file_path"])
model_output_base_dir = "%s/%s/%s/%s/%s" % (
FLAGS.output_dir, project_name, "build",
model_name, model_path_digest)
if os.path.exists(model_output_base_dir):
sh.rm("-rf", model_output_base_dir)
os.makedirs(model_output_base_dir)
model_file_path, weight_file_path = get_model_files(
model_config["model_file_path"],
model_output_base_dir,
model_config["weight_file_path"])
sh_commands.gen_model_code(
"mace/codegen/models/%s" % model_name,
model_config["platform"],
model_file_path,
weight_file_path,
model_config["model_sha256_checksum"],
",".join(model_config["input_nodes"]),
",".join(model_config["output_nodes"]),
data_type,
model_config["runtime"],
model_name,
":".join(model_config["input_shapes"]),
model_config["dsp_mode"],
embed_model_data,
model_config["fast_conv"],
model_config["obfuscate"])
for target_abi in configs["target_abis"]:
for target_soc in target_socs:
if target_abi != 'host':
......
......@@ -33,6 +33,7 @@ try:
from binary_codegen import tuning_param_codegen
from generate_data import generate_input_data
from validate import validate
from mace_engine_generator import gen_mace_engine_creator
except Exception as e:
print("Import error:\n%s" % e)
exit(1)
......@@ -74,15 +75,15 @@ def is_device_locked(serialno):
################################
# clear data
################################
def clear_mace_run_data(abi,
serialno,
phone_data_dir,
model_codegen_dir="mace/codegen/models"):
def clear_phone_data_dir(abi, serialno, phone_data_dir):
if abi != "host":
sh.adb("-s",
serialno,
"shell",
"rm -rf %s" % phone_data_dir)
def clear_model_codegen(model_codegen_dir="mace/codegen/models"):
if os.path.exists(model_codegen_dir):
sh.rm("-rf", model_codegen_dir)
......@@ -268,7 +269,6 @@ def adb_run_valgrind(serialno,
def bazel_build(target,
strip="always",
abi="armeabi-v7a",
model_tag="",
production_mode=False,
hexagon_mode=False,
disable_no_tuning_warning=False,
......@@ -289,7 +289,6 @@ def bazel_build(target,
"--copt=-std=c++11",
"--copt=-D_GLIBCXX_USE_C99_MATH_TR1",
"--copt=-Werror=return-type",
"--copt=-DMACE_MODEL_TAG=%s" % model_tag,
"--copt=-O3",
"--define",
"openmp=%s" % str(enable_openmp).lower(),
......@@ -315,7 +314,6 @@ def bazel_build(target,
"--copt=-D_GLIBCXX_USE_C99_MATH_TR1",
"--copt=-Werror=return-type",
"--copt=-DMACE_OBFUSCATE_LITERALS",
"--copt=-DMACE_MODEL_TAG=%s" % model_tag,
"--copt=-O3",
"--define",
"neon=true",
......@@ -371,7 +369,21 @@ def gen_encrypted_opencl_source(codegen_path="mace/codegen"):
"mace/codegen/opencl/opencl_encrypt_program.cc")
def pull_binaries(abi, serialno, model_output_dirs):
def gen_mace_engine_creator_source(model_tags, codegen_path="mace/codegen"):
print("* Genearte mace engine creator source")
codegen_tools_dir = "%s/engine" % codegen_path
sh.rm("-rf", codegen_tools_dir)
sh.mkdir("-p", codegen_tools_dir)
gen_mace_engine_creator(
model_tags,
"mace/python/tools",
codegen_tools_dir)
print("Genearte mace engine creator source done!\n")
def pull_binaries(abi, serialno, model_output_dirs,
cl_built_kernel_file_name,
cl_platform_info_file_name):
compiled_opencl_dir = "/data/local/tmp/mace_run/cl_program/"
mace_run_param_file = "mace_run.config"
......@@ -385,15 +397,18 @@ def pull_binaries(abi, serialno, model_output_dirs):
sh.rm("-rf", cl_bin_dir)
sh.mkdir("-p", cl_bin_dir)
if abi != "host":
adb_pull(compiled_opencl_dir, cl_bin_dir, serialno)
adb_pull(compiled_opencl_dir + cl_built_kernel_file_name,
cl_bin_dir, serialno)
adb_pull(compiled_opencl_dir + cl_platform_info_file_name,
cl_bin_dir, serialno)
adb_pull("/data/local/tmp/mace_run/%s" % mace_run_param_file,
cl_bin_dir, serialno)
def gen_opencl_binary_code(model_output_dirs,
cl_built_kernel_file_name,
cl_platform_info_file_name,
codegen_path="mace/codegen"):
cl_built_kernel_file_name = "mace_cl_compiled_program.bin"
cl_platform_info_file_name = "mace_cl_platform_info.txt"
opencl_codegen_file = "%s/opencl/opencl_compiled_program.cc" % codegen_path
cl_bin_dirs = []
......@@ -528,25 +543,8 @@ def gen_random_input(model_output_dir,
def update_mace_run_lib(model_output_dir,
abi,
model_tag,
embed_model_data,
generated_model_lib_dir="bazel-bin/mace/codegen/"):
model_lib_path = model_output_dir + "/libmace_%s.a" % model_tag
if abi == "host":
bazel_build(
"//mace/codegen:generated_models",
abi=abi,
model_tag=model_tag)
generated_model_lib_name = "libgenerated_models.pic.a"
else:
generated_model_lib_name = "libgenerated_models.a"
if os.path.exists(model_lib_path):
sh.rm("-rf", model_lib_path)
sh.cp("-f", generated_model_lib_dir + "/" + generated_model_lib_name,
model_lib_path)
embed_model_data):
mace_run_filepath = model_output_dir + "/mace_run"
if os.path.exists(mace_run_filepath):
sh.rm("-rf", mace_run_filepath)
......@@ -560,6 +558,11 @@ def update_mace_run_lib(model_output_dir,
model_output_dir)
def create_compiled_opencl_dir(serialno):
compiled_opencl_dir = "/data/local/tmp/mace_run/cl_program/"
sh.adb("-s", serialno, "shell", "mkdir", "-p", compiled_opencl_dir)
def tuning_run(abi,
serialno,
vlog_level,
......@@ -598,6 +601,7 @@ def tuning_run(abi,
"env",
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"%s/mace_run" % model_output_dir,
"--model_tag=%s" % model_tag,
"--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes),
......@@ -622,8 +626,7 @@ def tuning_run(abi,
return stdout
else:
sh.adb("-s", serialno, "shell", "mkdir", "-p", phone_data_dir)
compiled_opencl_dir = "/data/local/tmp/mace_run/cl_program/"
sh.adb("-s", serialno, "shell", "mkdir", "-p", compiled_opencl_dir)
create_compiled_opencl_dir(serialno)
for input_name in input_nodes:
formatted_name = common.formatted_file_name(input_file_name,
......@@ -657,6 +660,7 @@ def tuning_run(abi,
])
adb_cmd.extend([
"%s/mace_run" % phone_data_dir,
"--model_tag=%s" % model_tag,
"--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes),
......@@ -846,6 +850,12 @@ def merge_libs(target_soc,
mri_stream += (
"addlib "
"bazel-bin/mace/codegen/libgenerated_tuning_params.pic.a\n")
mri_stream += (
"addlib "
"bazel-bin/mace/codegen/libgenerated_models.pic.a\n")
mri_stream += (
"addlib "
"bazel-bin/mace/codegen/libgenerated_mace_engine_creator.pic.a\n")
else:
mri_stream += "create %s/libmace_%s.%s.a\n" % \
(model_bin_dir, project_name, target_soc)
......@@ -858,6 +868,12 @@ def merge_libs(target_soc,
mri_stream += (
"addlib "
"bazel-bin/mace/codegen/libgenerated_version.a\n")
mri_stream += (
"addlib "
"bazel-bin/mace/codegen/libgenerated_models.a\n")
mri_stream += (
"addlib "
"bazel-bin/mace/codegen/libgenerated_mace_engine_creator.a\n")
mri_stream += (
"addlib "
"bazel-bin/mace/core/libcore.a\n")
......@@ -875,8 +891,6 @@ def merge_libs(target_soc,
"bazel-bin/mace/ops/libops.lo\n")
for model_output_dir in model_output_dirs:
for lib in sh.ls(glob.glob("%s/*.a" % model_output_dir), "-1"):
mri_stream += "addlib %s\n" % lib
if not embed_model_data:
sh.cp("-f", glob.glob("%s/*.data" % model_output_dir),
model_data_dir)
......@@ -921,6 +935,28 @@ def packaging_lib(libmace_output_dir, project_name):
print("Packaging Done!\n")
def build_benchmark_model(abi,
embed_model_data,
model_output_dir,
model_tag,
hexagon_mode):
benchmark_binary_file = "%s/benchmark_model" % model_output_dir
if os.path.exists(benchmark_binary_file):
sh.rm("-rf", benchmark_binary_file)
if not embed_model_data:
sh.cp("-f", "mace/codegen/models/%s/%s.data" % (model_tag, model_tag),
model_output_dir)
benchmark_target = "//mace/benchmark:benchmark_model"
bazel_build(benchmark_target,
abi=abi,
production_mode=True,
hexagon_mode=hexagon_mode)
target_bin = "/".join(bazel_target_to_bin(benchmark_target))
sh.cp("-f", target_bin, model_output_dir)
def benchmark_model(abi,
serialno,
vlog_level,
......@@ -932,31 +968,13 @@ def benchmark_model(abi,
output_shapes,
model_tag,
device_type,
hexagon_mode,
phone_data_dir,
omp_num_threads=-1,
cpu_affinity_policy=1,
gpu_perf_hint=3,
gpu_priority_hint=3,
input_file_name="model_input",
output_file_name="model_out"):
input_file_name="model_input"):
print("* Benchmark for %s" % model_tag)
benchmark_binary_file = "%s/benchmark_model" % model_output_dir
if os.path.exists(benchmark_binary_file):
sh.rm("-rf", benchmark_binary_file)
if not embed_model_data:
sh.cp("-f", "mace/codegen/models/%s/%s.data" % (model_tag, model_tag),
model_output_dir)
benchmark_target = "//mace/benchmark:benchmark_model"
bazel_build(benchmark_target,
abi=abi,
model_tag=model_tag,
production_mode=True,
hexagon_mode=hexagon_mode)
target_bin = "/".join(bazel_target_to_bin(benchmark_target))
sh.cp("-f", target_bin, model_output_dir)
stdout_buff = []
process_output = make_output_processor(stdout_buff)
......@@ -966,6 +984,7 @@ def benchmark_model(abi,
"env",
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"%s/benchmark_model" % model_output_dir,
"--model_tag=%s" % model_tag,
"--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes),
......@@ -981,6 +1000,7 @@ def benchmark_model(abi,
p.wait()
else:
sh.adb("-s", serialno, "shell", "mkdir", "-p", phone_data_dir)
create_compiled_opencl_dir(serialno)
for input_name in input_nodes:
formatted_name = common.formatted_file_name(input_file_name,
......@@ -1002,6 +1022,7 @@ def benchmark_model(abi,
phone_data_dir,
"MACE_OPENCL_PROFILING=1",
"%s/benchmark_model" % phone_data_dir,
"--model_tag=%s" % model_tag,
"--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册