提交 dbb51228 编写于 作者: 叶剑武

Merge branch 'new-api' into 'master'

Add CreateMaceEngine API and speed up build logic.

See merge request !463
...@@ -8,6 +8,7 @@ mace/codegen/opencl/ ...@@ -8,6 +8,7 @@ mace/codegen/opencl/
mace/codegen/opencl_bin/ mace/codegen/opencl_bin/
mace/codegen/tuning/ mace/codegen/tuning/
mace/codegen/version/ mace/codegen/version/
mace/codegen/engine/
build/ build/
docs/_build/ docs/_build/
......
...@@ -6,6 +6,7 @@ load( ...@@ -6,6 +6,7 @@ load(
"if_not_production_mode", "if_not_production_mode",
"if_hexagon_enabled", "if_hexagon_enabled",
"if_openmp_enabled", "if_openmp_enabled",
"if_android",
) )
licenses(["notice"]) # Apache 2.0 licenses(["notice"]) # Apache 2.0
...@@ -26,12 +27,14 @@ cc_binary( ...@@ -26,12 +27,14 @@ cc_binary(
srcs = [ srcs = [
"benchmark_model.cc", "benchmark_model.cc",
], ],
copts = if_android(["-DMACE_ENABLE_OPENCL"]),
linkopts = if_openmp_enabled(["-fopenmp"]), linkopts = if_openmp_enabled(["-fopenmp"]),
linkstatic = 1, linkstatic = 1,
deps = [ deps = [
":statistics", ":statistics",
"//external:gflags_nothreads", "//external:gflags_nothreads",
"//mace/codegen:generated_models", "//mace/codegen:generated_models",
"//mace/codegen:generated_mace_engine_factory",
], ],
) )
......
...@@ -25,20 +25,7 @@ ...@@ -25,20 +25,7 @@
#include "mace/public/mace_runtime.h" #include "mace/public/mace_runtime.h"
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
#include "mace/benchmark/statistics.h" #include "mace/benchmark/statistics.h"
#include "mace/codegen/engine/mace_engine_factory.h"
namespace mace {
namespace MACE_MODEL_TAG {
extern const unsigned char *LoadModelData(const char *model_data_file);
extern void UnloadModelData(const unsigned char *model_data);
extern NetDef CreateNet(const unsigned char *model_data);
extern const std::string ModelChecksum();
} // namespace MACE_MODEL_TAG
} // namespace mace
namespace mace { namespace mace {
namespace benchmark { namespace benchmark {
...@@ -188,6 +175,7 @@ bool Run(const std::string &title, ...@@ -188,6 +175,7 @@ bool Run(const std::string &title,
return true; return true;
} }
DEFINE_string(model_name, "", "model name in yaml");
DEFINE_string(device, "CPU", "Device [CPU|GPU|DSP]"); DEFINE_string(device, "CPU", "Device [CPU|GPU|DSP]");
DEFINE_string(input_node, "input_node0,input_node1", DEFINE_string(input_node, "input_node0,input_node1",
"input nodes, separated by comma"); "input nodes, separated by comma");
...@@ -198,7 +186,6 @@ DEFINE_string(output_shape, "", "output shape, separated by colon and comma"); ...@@ -198,7 +186,6 @@ DEFINE_string(output_shape, "", "output shape, separated by colon and comma");
DEFINE_string(input_file, "", "input file name"); DEFINE_string(input_file, "", "input file name");
DEFINE_int32(max_num_runs, 100, "number of runs max"); DEFINE_int32(max_num_runs, 100, "number of runs max");
DEFINE_string(max_time, "10.0", "length to run max"); DEFINE_string(max_time, "10.0", "length to run max");
DEFINE_string(benchmark_name, "", "benchmark name");
DEFINE_int32(warmup_runs, 1, "how many runs to initialize model"); DEFINE_int32(warmup_runs, 1, "how many runs to initialize model");
DEFINE_string(model_data_file, "", DEFINE_string(model_data_file, "",
"model data file name, used when EMBED_MODEL_DATA set to 0"); "model data file name, used when EMBED_MODEL_DATA set to 0");
...@@ -214,7 +201,7 @@ int Main(int argc, char **argv) { ...@@ -214,7 +201,7 @@ int Main(int argc, char **argv) {
gflags::SetUsageMessage("some usage message"); gflags::SetUsageMessage("some usage message");
gflags::ParseCommandLineFlags(&argc, &argv, true); gflags::ParseCommandLineFlags(&argc, &argv, true);
LOG(INFO) << "Benchmark name: [" << FLAGS_benchmark_name << "]"; LOG(INFO) << "Model name: [" << FLAGS_model_name << "]";
LOG(INFO) << "Device: [" << FLAGS_device << "]"; LOG(INFO) << "Device: [" << FLAGS_device << "]";
LOG(INFO) << "gpu_perf_hint: [" << FLAGS_gpu_perf_hint << "]"; LOG(INFO) << "gpu_perf_hint: [" << FLAGS_gpu_perf_hint << "]";
LOG(INFO) << "gpu_priority_hint: [" << FLAGS_gpu_priority_hint << "]"; LOG(INFO) << "gpu_priority_hint: [" << FLAGS_gpu_priority_hint << "]";
...@@ -233,17 +220,6 @@ int Main(int argc, char **argv) { ...@@ -233,17 +220,6 @@ int Main(int argc, char **argv) {
std::unique_ptr<OpStat> statistician(new OpStat()); std::unique_ptr<OpStat> statistician(new OpStat());
mace::DeviceType device_type = ParseDeviceType(FLAGS_device);
// config runtime
mace::SetOpenMPThreadPolicy(
FLAGS_omp_num_threads,
static_cast<CPUAffinityPolicy >(FLAGS_cpu_affinity_policy));
if (device_type == DeviceType::GPU) {
mace::SetGPUHints(
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
}
std::vector<std::string> input_names = std::vector<std::string> input_names =
str_util::Split(FLAGS_input_node, ','); str_util::Split(FLAGS_input_node, ',');
...@@ -265,9 +241,53 @@ int Main(int argc, char **argv) { ...@@ -265,9 +241,53 @@ int Main(int argc, char **argv) {
ParseShape(output_shapes[i], &output_shape_vec[i]); ParseShape(output_shapes[i], &output_shape_vec[i]);
} }
const unsigned char *model_data = mace::DeviceType device_type = ParseDeviceType(FLAGS_device);
mace::MACE_MODEL_TAG::LoadModelData(FLAGS_model_data_file.c_str());
NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(model_data); // config runtime
mace::SetOpenMPThreadPolicy(
FLAGS_omp_num_threads,
static_cast<CPUAffinityPolicy >(FLAGS_cpu_affinity_policy));
#ifdef MACE_ENABLE_OPENCL
if (device_type == DeviceType::GPU) {
mace::SetGPUHints(
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
}
#endif // MACE_ENABLE_OPENCL
const char *kernel_path = getenv("MACE_INTERNAL_STORAGE_PATH");
const std::string kernel_file_path =
std::string(kernel_path == nullptr ?
"/data/local/tmp/mace_run/interior" : kernel_path);
std::shared_ptr<KVStorageFactory> storage_factory(
new FileStorageFactory(kernel_file_path));
SetKVStorageFactory(storage_factory);
// Create Engine
std::shared_ptr<mace::MaceEngine> engine;
MaceStatus create_engine_status;
// Create Engine
if (FLAGS_model_data_file.empty()) {
create_engine_status =
CreateMaceEngine(FLAGS_model_name.c_str(),
nullptr,
input_names,
output_names,
device_type,
&engine);
} else {
create_engine_status =
CreateMaceEngine(FLAGS_model_name.c_str(),
FLAGS_model_data_file.c_str(),
input_names,
output_names,
device_type,
&engine);
}
if (create_engine_status != MaceStatus::MACE_SUCCESS) {
LOG(FATAL) << "Create engine error, please check the arguments";
}
std::map<std::string, mace::MaceTensor> inputs; std::map<std::string, mace::MaceTensor> inputs;
std::map<std::string, mace::MaceTensor> outputs; std::map<std::string, mace::MaceTensor> outputs;
...@@ -303,19 +323,11 @@ int Main(int argc, char **argv) { ...@@ -303,19 +323,11 @@ int Main(int argc, char **argv) {
buffer_out); buffer_out);
} }
// Init model
LOG(INFO) << "Run init";
std::unique_ptr<mace::MaceEngine> engine_ptr(
new mace::MaceEngine(&net_def, device_type, input_names, output_names));
if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) {
mace::MACE_MODEL_TAG::UnloadModelData(model_data);
}
int64_t warmup_time_us = 0; int64_t warmup_time_us = 0;
int64_t num_warmup_runs = 0; int64_t num_warmup_runs = 0;
if (FLAGS_warmup_runs > 0) { if (FLAGS_warmup_runs > 0) {
bool status = bool status =
Run("Warm Up", engine_ptr.get(), inputs, &outputs, Run("Warm Up", engine.get(), inputs, &outputs,
FLAGS_warmup_runs, -1.0, FLAGS_warmup_runs, -1.0,
&warmup_time_us, &num_warmup_runs, nullptr); &warmup_time_us, &num_warmup_runs, nullptr);
if (!status) { if (!status) {
...@@ -326,7 +338,7 @@ int Main(int argc, char **argv) { ...@@ -326,7 +338,7 @@ int Main(int argc, char **argv) {
int64_t no_stat_time_us = 0; int64_t no_stat_time_us = 0;
int64_t no_stat_runs = 0; int64_t no_stat_runs = 0;
bool status = bool status =
Run("Run without statistics", engine_ptr.get(), inputs, &outputs, Run("Run without statistics", engine.get(), inputs, &outputs,
FLAGS_max_num_runs, max_benchmark_time_seconds, FLAGS_max_num_runs, max_benchmark_time_seconds,
&no_stat_time_us, &no_stat_runs, nullptr); &no_stat_time_us, &no_stat_runs, nullptr);
if (!status) { if (!status) {
...@@ -335,7 +347,7 @@ int Main(int argc, char **argv) { ...@@ -335,7 +347,7 @@ int Main(int argc, char **argv) {
int64_t stat_time_us = 0; int64_t stat_time_us = 0;
int64_t stat_runs = 0; int64_t stat_runs = 0;
status = Run("Run with statistics", engine_ptr.get(), inputs, &outputs, status = Run("Run with statistics", engine.get(), inputs, &outputs,
FLAGS_max_num_runs, max_benchmark_time_seconds, FLAGS_max_num_runs, max_benchmark_time_seconds,
&stat_time_us, &stat_runs, statistician.get()); &stat_time_us, &stat_runs, statistician.get());
if (!status) { if (!status) {
......
...@@ -33,3 +33,11 @@ cc_library( ...@@ -33,3 +33,11 @@ cc_library(
srcs = ["version/version.cc"], srcs = ["version/version.cc"],
linkstatic = 1, linkstatic = 1,
) )
cc_library(
name = "generated_mace_engine_factory",
hdrs = ["engine/mace_engine_factory.h"],
deps = [
"//mace/public",
],
)
...@@ -9,5 +9,6 @@ cc_binary( ...@@ -9,5 +9,6 @@ cc_binary(
deps = [ deps = [
"//external:gflags_nothreads", "//external:gflags_nothreads",
"//mace/codegen:generated_models", "//mace/codegen:generated_models",
"//mace/codegen:generated_mace_engine_factory",
], ],
) )
...@@ -34,27 +34,11 @@ ...@@ -34,27 +34,11 @@
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/public/mace_runtime.h" #include "mace/public/mace_runtime.h"
// if convert model to code.
#include "mace/codegen/engine/mace_engine_factory.h"
#include "mace/utils/env_time.h" #include "mace/utils/env_time.h"
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
// #include "mace/codegen/models/${MACE_MODEL_TAG}/${MACE_MODEL_TAG}.h" instead
namespace mace {
namespace MACE_MODEL_TAG {
extern const unsigned char *LoadModelData(const char *model_data_file);
extern void UnloadModelData(const unsigned char *model_data);
extern NetDef CreateNet(const unsigned char *model_data);
extern const std::string ModelName();
extern const std::string ModelChecksum();
extern const std::string ModelBuildTime();
extern const std::string ModelBuildOptions();
} // namespace MACE_MODEL_TAG
} // namespace mace
namespace mace { namespace mace {
namespace examples { namespace examples {
...@@ -112,6 +96,9 @@ DeviceType ParseDeviceType(const std::string &device_str) { ...@@ -112,6 +96,9 @@ DeviceType ParseDeviceType(const std::string &device_str) {
} }
DEFINE_string(model_name,
"",
"model name in yaml file");
DEFINE_string(input_node, DEFINE_string(input_node,
"input_node0,input_node1", "input_node0,input_node1",
"input nodes, separated by comma"); "input nodes, separated by comma");
...@@ -148,36 +135,53 @@ bool RunModel(const std::vector<std::string> &input_names, ...@@ -148,36 +135,53 @@ bool RunModel(const std::vector<std::string> &input_names,
const std::vector<std::string> &output_names, const std::vector<std::string> &output_names,
const std::vector<std::vector<int64_t>> &output_shapes) { const std::vector<std::vector<int64_t>> &output_shapes) {
// load model // load model
const unsigned char *model_data =
mace::MACE_MODEL_TAG::LoadModelData(FLAGS_model_data_file.c_str());
NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(model_data);
DeviceType device_type = ParseDeviceType(FLAGS_device); DeviceType device_type = ParseDeviceType(FLAGS_device);
// config runtime // config runtime
MaceStatus res = mace::SetOpenMPThreadPolicy( mace::SetOpenMPThreadPolicy(
FLAGS_omp_num_threads, FLAGS_omp_num_threads,
static_cast<CPUAffinityPolicy >(FLAGS_cpu_affinity_policy)); static_cast<CPUAffinityPolicy >(FLAGS_cpu_affinity_policy));
#ifdef MACE_ENABLE_OPENCL
if (device_type == DeviceType::GPU) { if (device_type == DeviceType::GPU) {
mace::SetGPUHints( mace::SetGPUHints(
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint), static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint)); static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
} }
#endif // MACE_ENABLE_OPENCL
// DO NOT USE tmp directory. // DO NOT USE tmp directory.
// Please use APP's own directory and make sure the directory exists. // Please use APP's own directory and make sure the directory exists.
const std::string kernel_file_path = // Just call once
"/data/local/tmp/mace_run/cl"; const std::string internal_storage_path =
"/data/local/tmp/mace_run/interior";
// Config internal kv storage factory. // Config internal kv storage factory.
std::shared_ptr<KVStorageFactory> storage_factory( std::shared_ptr<KVStorageFactory> storage_factory(
new FileStorageFactory(kernel_file_path)); new FileStorageFactory(internal_storage_path));
SetKVStorageFactory(storage_factory); SetKVStorageFactory(storage_factory);
// Init model
mace::MaceEngine engine(&net_def, device_type, input_names, // Create Engine
output_names); std::shared_ptr<mace::MaceEngine> engine;
if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) { MaceStatus create_engine_status;
mace::MACE_MODEL_TAG::UnloadModelData(model_data); // Create Engine
if (FLAGS_model_data_file.empty()) {
create_engine_status =
CreateMaceEngine(FLAGS_model_name.c_str(),
nullptr,
input_names,
output_names,
device_type,
&engine);
} else {
create_engine_status =
CreateMaceEngine(FLAGS_model_name.c_str(),
FLAGS_model_data_file.c_str(),
input_names,
output_names,
device_type,
&engine);
}
if (create_engine_status != MaceStatus::MACE_SUCCESS) {
LOG(FATAL) << "Create engine error, please check the arguments";
} }
const size_t input_count = input_names.size(); const size_t input_count = input_names.size();
...@@ -216,12 +220,12 @@ bool RunModel(const std::vector<std::string> &input_names, ...@@ -216,12 +220,12 @@ bool RunModel(const std::vector<std::string> &input_names,
} }
LOG(INFO) << "Warm up run"; LOG(INFO) << "Warm up run";
engine.Run(inputs, &outputs); engine->Run(inputs, &outputs);
if (FLAGS_round > 0) { if (FLAGS_round > 0) {
LOG(INFO) << "Run model"; LOG(INFO) << "Run model";
for (int i = 0; i < FLAGS_round; ++i) { for (int i = 0; i < FLAGS_round; ++i) {
engine.Run(inputs, &outputs); engine->Run(inputs, &outputs);
} }
} }
...@@ -247,10 +251,6 @@ int Main(int argc, char **argv) { ...@@ -247,10 +251,6 @@ int Main(int argc, char **argv) {
gflags::ParseCommandLineFlags(&argc, &argv, true); gflags::ParseCommandLineFlags(&argc, &argv, true);
LOG(INFO) << "mace version: " << MaceVersion(); LOG(INFO) << "mace version: " << MaceVersion();
LOG(INFO) << "model name: " << mace::MACE_MODEL_TAG::ModelName();
LOG(INFO) << "model checksum: " << mace::MACE_MODEL_TAG::ModelChecksum();
LOG(INFO) << "build time: " << mace::MACE_MODEL_TAG::ModelBuildTime();
LOG(INFO) << "build options: " << mace::MACE_MODEL_TAG::ModelBuildOptions();
LOG(INFO) << "input node: " << FLAGS_input_node; LOG(INFO) << "input node: " << FLAGS_input_node;
LOG(INFO) << "input shape: " << FLAGS_input_shape; LOG(INFO) << "input shape: " << FLAGS_input_shape;
LOG(INFO) << "output node: " << FLAGS_output_node; LOG(INFO) << "output node: " << FLAGS_output_node;
......
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This is a generated file. DO NOT EDIT!
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "mace/public/mace.h"
#include "mace/public/mace_runtime.h"
namespace mace {
{% for tag in model_tags %}
namespace {{tag}} {
extern const unsigned char *LoadModelData(const char *model_data_file);
extern void UnloadModelData(const unsigned char *model_data);
extern NetDef CreateNet(const unsigned char *model_data);
extern const std::string ModelName();
extern const std::string ModelChecksum();
extern const std::string ModelBuildTime();
extern const std::string ModelBuildOptions();
} // namespace {{tag}}
{% endfor %}
namespace {
std::map<std::string, int> model_name_map {
{% for i in range(model_tags |length) %}
std::make_pair({{ model_tags[i]|tojson }}, {{ i }}),
{% endfor %}
};
} // namespace
MaceStatus CreateMaceEngine(
const char *model_name,
const char *model_data_file,
const std::vector<std::string> &input_nodes,
const std::vector<std::string> &output_nodes,
const DeviceType device_type,
std::shared_ptr<MaceEngine> *engine) {
// load model
if (engine == nullptr) {
return MaceStatus::MACE_INVALID_ARGS;
}
const unsigned char * model_data = nullptr;
NetDef net_def;
switch (model_name_map[model_name]) {
{% for i in range(model_tags |length) %}
case {{ i }}:
model_data =
mace::{{model_tags[i]}}::LoadModelData(model_data_file);
net_def = mace::{{model_tags[i]}}::CreateNet(model_data);
engine->reset(
new mace::MaceEngine(&net_def, device_type, input_nodes, output_nodes));
if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) {
mace::{{model_tags[i]}}::UnloadModelData(model_data);
}
break;
{% endfor %}
default:
return MaceStatus::MACE_INVALID_ARGS;
}
return MaceStatus::MACE_SUCCESS;
}
} // namespace mace
# Copyright 2018 Xiaomi, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from jinja2 import Environment, FileSystemLoader
FLAGS = None
def gen_mace_engine_factory(model_tags, template_dir, output_dir):
# Create the jinja2 environment.
j2_env = Environment(
loader=FileSystemLoader(template_dir), trim_blocks=True)
# generate mace_run BUILD file
print model_tags
template_name = 'mace_engine_factory.h.jinja2'
source = j2_env.get_template(template_name).render(
model_tags=model_tags,
)
with open(output_dir + '/mace_engine_factory.h', "wb") as f:
f.write(source)
def parse_args():
"""Parses command line arguments."""
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_tag",
type=str,
default="",
help="model tag")
parser.add_argument(
"--template_dir", type=str, default="", help="template path")
parser.add_argument(
"--output_dir", type=str, default="", help="template path")
return parser.parse_known_args()
if __name__ == '__main__':
FLAGS, unparsed = parse_args()
gen_mace_engine_creator(FLAGS.model_tag, FLAGS.template_dir,
FLAGS.output_dir)
...@@ -10,6 +10,7 @@ cc_binary( ...@@ -10,6 +10,7 @@ cc_binary(
deps = [ deps = [
"//external:gflags_nothreads", "//external:gflags_nothreads",
"//mace/codegen:generated_models", "//mace/codegen:generated_models",
"//mace/codegen:generated_mace_engine_factory",
"//mace/core:core", "//mace/core:core",
], ],
) )
...@@ -41,24 +41,7 @@ ...@@ -41,24 +41,7 @@
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/runtime/opencl/opencl_runtime.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
#include "mace/codegen/engine/mace_engine_factory.h"
// #include "mace/codegen/models/${MACE_MODEL_TAG}/${MACE_MODEL_TAG}.h" instead
namespace mace {
namespace MACE_MODEL_TAG {
extern const unsigned char *LoadModelData(const char *model_data_file);
extern void UnloadModelData(const unsigned char *model_data);
extern NetDef CreateNet(const unsigned char *model_data);
extern const std::string ModelName();
extern const std::string ModelChecksum();
extern const std::string ModelBuildTime();
extern const std::string ModelBuildOptions();
} // namespace MACE_MODEL_TAG
} // namespace mace
namespace mace { namespace mace {
namespace tools { namespace tools {
...@@ -180,6 +163,9 @@ struct mallinfo LogMallinfoChange(struct mallinfo prev) { ...@@ -180,6 +163,9 @@ struct mallinfo LogMallinfoChange(struct mallinfo prev) {
return curr; return curr;
} }
DEFINE_string(model_name,
"",
"model name in yaml");
DEFINE_string(input_node, DEFINE_string(input_node,
"input_node0,input_node1", "input_node0,input_node1",
"input nodes, separated by comma"); "input nodes, separated by comma");
...@@ -211,22 +197,12 @@ DEFINE_int32(omp_num_threads, -1, "num of openmp threads"); ...@@ -211,22 +197,12 @@ DEFINE_int32(omp_num_threads, -1, "num of openmp threads");
DEFINE_int32(cpu_affinity_policy, 1, DEFINE_int32(cpu_affinity_policy, 1,
"0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"); "0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY");
bool RunModel(const std::vector<std::string> &input_names, bool RunModel(const std::string &model_name,
const std::vector<std::string> &input_names,
const std::vector<std::vector<int64_t>> &input_shapes, const std::vector<std::vector<int64_t>> &input_shapes,
const std::vector<std::string> &output_names, const std::vector<std::string> &output_names,
const std::vector<std::vector<int64_t>> &output_shapes) { const std::vector<std::vector<int64_t>> &output_shapes) {
// load model
int64_t t0 = NowMicros();
const unsigned char *model_data =
mace::MACE_MODEL_TAG::LoadModelData(FLAGS_model_data_file.c_str());
NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(model_data);
int64_t t1 = NowMicros();
double create_net_millis = (t1 - t0) / 1000.0;
LOG(INFO) << "CreateNetDef latency: " << create_net_millis << " ms";
DeviceType device_type = ParseDeviceType(FLAGS_device); DeviceType device_type = ParseDeviceType(FLAGS_device);
LOG(INFO) << "Runing with device type: " << device_type;
// config runtime // config runtime
mace::SetOpenMPThreadPolicy( mace::SetOpenMPThreadPolicy(
FLAGS_omp_num_threads, FLAGS_omp_num_threads,
...@@ -239,25 +215,43 @@ bool RunModel(const std::vector<std::string> &input_names, ...@@ -239,25 +215,43 @@ bool RunModel(const std::vector<std::string> &input_names,
} }
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
const char *kernel_path = getenv("MACE_CL_PROGRAM_PATH"); const char *kernel_path = getenv("MACE_INTERNAL_STORAGE_PATH");
const std::string kernel_file_path = const std::string kernel_file_path =
std::string(kernel_path == nullptr ? std::string(kernel_path == nullptr ?
"/data/local/tmp/mace_run/cl_program" : kernel_path); "/data/local/tmp/mace_run/interior" : kernel_path);
// Init model
LOG(INFO) << "Run init";
std::shared_ptr<KVStorageFactory> storage_factory( std::shared_ptr<KVStorageFactory> storage_factory(
new FileStorageFactory(kernel_file_path)); new FileStorageFactory(kernel_file_path));
SetKVStorageFactory(storage_factory); SetKVStorageFactory(storage_factory);
mace::MaceEngine engine(&net_def, device_type, input_names, output_names);
if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) { std::shared_ptr<mace::MaceEngine> engine;
mace::MACE_MODEL_TAG::UnloadModelData(model_data); MaceStatus create_engine_status;
} // Create Engine
int64_t t2 = NowMicros(); int64_t t0 = NowMicros();
double mace_engine_ctor_millis = (t2 - t1) / 1000.0; if (FLAGS_model_data_file.empty()) {
double init_millis = (t2 - t0) / 1000.0; create_engine_status =
LOG(INFO) << "MaceEngine constructor latency: " CreateMaceEngine(model_name.c_str(),
<< mace_engine_ctor_millis << " ms"; nullptr,
input_names,
output_names,
device_type,
&engine);
} else {
create_engine_status =
CreateMaceEngine(model_name.c_str(),
FLAGS_model_data_file.c_str(),
input_names,
output_names,
device_type,
&engine);
}
int64_t t1 = NowMicros();
if (create_engine_status != MaceStatus::MACE_SUCCESS) {
LOG(FATAL) << "Create engine error, please check the arguments";
}
double init_millis = (t1 - t0) / 1000.0;
LOG(INFO) << "Total init latency: " << init_millis << " ms"; LOG(INFO) << "Total init latency: " << init_millis << " ms";
const size_t input_count = input_names.size(); const size_t input_count = input_names.size();
...@@ -297,7 +291,7 @@ bool RunModel(const std::vector<std::string> &input_names, ...@@ -297,7 +291,7 @@ bool RunModel(const std::vector<std::string> &input_names,
LOG(INFO) << "Warm up run"; LOG(INFO) << "Warm up run";
int64_t t3 = NowMicros(); int64_t t3 = NowMicros();
engine.Run(inputs, &outputs); engine->Run(inputs, &outputs);
int64_t t4 = NowMicros(); int64_t t4 = NowMicros();
double warmup_millis = (t4 - t3) / 1000.0; double warmup_millis = (t4 - t3) / 1000.0;
LOG(INFO) << "1st warm up run latency: " << warmup_millis << " ms"; LOG(INFO) << "1st warm up run latency: " << warmup_millis << " ms";
...@@ -308,7 +302,7 @@ bool RunModel(const std::vector<std::string> &input_names, ...@@ -308,7 +302,7 @@ bool RunModel(const std::vector<std::string> &input_names,
int64_t t0 = NowMicros(); int64_t t0 = NowMicros();
struct mallinfo prev = mallinfo(); struct mallinfo prev = mallinfo();
for (int i = 0; i < FLAGS_round; ++i) { for (int i = 0; i < FLAGS_round; ++i) {
engine.Run(inputs, &outputs); engine->Run(inputs, &outputs);
if (FLAGS_malloc_check_cycle >= 1 && i % FLAGS_malloc_check_cycle == 0) { if (FLAGS_malloc_check_cycle >= 1 && i % FLAGS_malloc_check_cycle == 0) {
LOG(INFO) << "=== check malloc info change #" << i << " ==="; LOG(INFO) << "=== check malloc info change #" << i << " ===";
prev = LogMallinfoChange(prev); prev = LogMallinfoChange(prev);
...@@ -320,11 +314,11 @@ bool RunModel(const std::vector<std::string> &input_names, ...@@ -320,11 +314,11 @@ bool RunModel(const std::vector<std::string> &input_names,
} }
// Metrics reporting tools depends on the format, keep in consistent // Metrics reporting tools depends on the format, keep in consistent
printf("================================================================\n"); printf("========================================\n");
printf(" create_net engine_ctor init warmup run_avg\n"); printf(" init warmup run_avg\n");
printf("================================================================\n"); printf("========================================\n");
printf("time %11.3f %11.3f %11.3f %11.3f %11.3f\n", create_net_millis, printf("time %11.3f %11.3f %11.3f\n",
mace_engine_ctor_millis, init_millis, warmup_millis, model_run_millis); init_millis, warmup_millis, model_run_millis);
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
if (device_type == DeviceType::GPU) { if (device_type == DeviceType::GPU) {
...@@ -355,11 +349,8 @@ int Main(int argc, char **argv) { ...@@ -355,11 +349,8 @@ int Main(int argc, char **argv) {
gflags::SetUsageMessage("some usage message"); gflags::SetUsageMessage("some usage message");
gflags::ParseCommandLineFlags(&argc, &argv, true); gflags::ParseCommandLineFlags(&argc, &argv, true);
LOG(INFO) << "model name: " << FLAGS_model_name;
LOG(INFO) << "mace version: " << MaceVersion(); LOG(INFO) << "mace version: " << MaceVersion();
LOG(INFO) << "model name: " << mace::MACE_MODEL_TAG::ModelName();
LOG(INFO) << "model checksum: " << mace::MACE_MODEL_TAG::ModelChecksum();
LOG(INFO) << "build time: " << mace::MACE_MODEL_TAG::ModelBuildTime();
LOG(INFO) << "build options: " << mace::MACE_MODEL_TAG::ModelBuildOptions();
LOG(INFO) << "input node: " << FLAGS_input_node; LOG(INFO) << "input node: " << FLAGS_input_node;
LOG(INFO) << "input shape: " << FLAGS_input_shape; LOG(INFO) << "input shape: " << FLAGS_input_shape;
LOG(INFO) << "output node: " << FLAGS_output_node; LOG(INFO) << "output node: " << FLAGS_output_node;
...@@ -399,7 +390,8 @@ int Main(int argc, char **argv) { ...@@ -399,7 +390,8 @@ int Main(int argc, char **argv) {
for (int i = 0; i < FLAGS_restart_round; ++i) { for (int i = 0; i < FLAGS_restart_round; ++i) {
VLOG(0) << "restart round " << i; VLOG(0) << "restart round " << i;
ret = ret =
RunModel(input_names, input_shape_vec, output_names, output_shape_vec); RunModel(FLAGS_model_name, input_names, input_shape_vec,
output_names, output_shape_vec);
} }
if (ret) { if (ret) {
return 0; return 0;
......
...@@ -95,13 +95,17 @@ def gen_opencl_and_tuning_code(target_abi, ...@@ -95,13 +95,17 @@ def gen_opencl_and_tuning_code(target_abi,
serialno, serialno,
model_output_dirs, model_output_dirs,
pull_or_not): pull_or_not):
cl_built_kernel_file_name = "mace_cl_compiled_program.bin"
cl_platform_info_file_name = "mace_cl_platform_info.txt"
if pull_or_not: if pull_or_not:
sh_commands.pull_binaries(target_abi, serialno, model_output_dirs) sh_commands.pull_binaries(target_abi, serialno, model_output_dirs,
cl_built_kernel_file_name,
codegen_path = "mace/codegen" cl_platform_info_file_name)
# generate opencl binary code # generate opencl binary code
sh_commands.gen_opencl_binary_code(model_output_dirs) sh_commands.gen_opencl_binary_code(model_output_dirs,
cl_built_kernel_file_name,
cl_platform_info_file_name)
sh_commands.gen_tuning_param_code(model_output_dirs) sh_commands.gen_tuning_param_code(model_output_dirs)
...@@ -111,16 +115,14 @@ def model_benchmark_stdout_processor(stdout, ...@@ -111,16 +115,14 @@ def model_benchmark_stdout_processor(stdout,
serialno, serialno,
model_name, model_name,
runtime): runtime):
metrics = [0] * 5 metrics = [0] * 3
for line in stdout.split('\n'): for line in stdout.split('\n'):
line = line.strip() line = line.strip()
parts = line.split() parts = line.split()
if len(parts) == 6 and parts[0].startswith("time"): if len(parts) == 4 and parts[0].startswith("time"):
metrics[0] = str(float(parts[1])) metrics[0] = str(float(parts[1]))
metrics[1] = str(float(parts[2])) metrics[1] = str(float(parts[2]))
metrics[2] = str(float(parts[3])) metrics[2] = str(float(parts[3]))
metrics[3] = str(float(parts[4]))
metrics[4] = str(float(parts[5]))
break break
device_name = "" device_name = ""
...@@ -133,22 +135,20 @@ def model_benchmark_stdout_processor(stdout, ...@@ -133,22 +135,20 @@ def model_benchmark_stdout_processor(stdout,
report_filename = FLAGS.output_dir + "/report.csv" report_filename = FLAGS.output_dir + "/report.csv"
if not os.path.exists(report_filename): if not os.path.exists(report_filename):
with open(report_filename, 'w') as f: with open(report_filename, 'w') as f:
f.write("model_name,device_name,soc,abi,runtime,create_net," f.write("model_name,device_name,soc,abi,runtime,"
"engine_ctor,init,warmup,run_avg\n") "init,warmup,run_avg\n")
data_str = "{model_name},{device_name},{soc},{abi},{runtime}," \ data_str = "{model_name},{device_name},{soc},{abi},{runtime}," \
"{create_net},{engine_ctor},{init},{warmup},{run_avg}\n" \ "{init},{warmup},{run_avg}\n" \
.format( .format(
model_name=model_name, model_name=model_name,
device_name=device_name, device_name=device_name,
soc=target_soc, soc=target_soc,
abi=abi, abi=abi,
runtime=runtime, runtime=runtime,
create_net=metrics[0], init=metrics[0],
engine_ctor=metrics[1], warmup=metrics[1],
init=metrics[2], run_avg=metrics[2]
warmup=metrics[3],
run_avg=metrics[4]
) )
with open(report_filename, 'a') as f: with open(report_filename, 'a') as f:
f.write(data_str) f.write(data_str)
...@@ -227,12 +227,11 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi, ...@@ -227,12 +227,11 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi,
sh_commands.bazel_build( sh_commands.bazel_build(
mace_run_target, mace_run_target,
abi=target_abi, abi=target_abi,
model_tag=model_name,
production_mode=False, production_mode=False,
hexagon_mode=hexagon_mode, hexagon_mode=hexagon_mode,
enable_openmp=enable_openmp enable_openmp=enable_openmp
) )
sh_commands.update_mace_run_lib(model_output_dir, target_abi, sh_commands.update_mace_run_lib(model_output_dir,
model_name, embed_model_data) model_name, embed_model_data)
tuning_run(runtime, target_abi, serialno, vlog_level, embed_model_data, tuning_run(runtime, target_abi, serialno, vlog_level, embed_model_data,
...@@ -254,13 +253,12 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi, ...@@ -254,13 +253,12 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi,
mace_run_target, mace_run_target,
strip, strip,
abi=target_abi, abi=target_abi,
model_tag=model_name,
production_mode=True, production_mode=True,
hexagon_mode=hexagon_mode, hexagon_mode=hexagon_mode,
debug=debug, debug=debug,
enable_openmp=enable_openmp enable_openmp=enable_openmp
) )
sh_commands.update_mace_run_lib(model_output_dir, target_abi, sh_commands.update_mace_run_lib(model_output_dir,
model_name, embed_model_data) model_name, embed_model_data)
else: else:
gen_opencl_and_tuning_code(target_abi, serialno, [], False) gen_opencl_and_tuning_code(target_abi, serialno, [], False)
...@@ -268,13 +266,12 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi, ...@@ -268,13 +266,12 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi,
mace_run_target, mace_run_target,
strip, strip,
abi=target_abi, abi=target_abi,
model_tag=model_name,
production_mode=True, production_mode=True,
hexagon_mode=hexagon_mode, hexagon_mode=hexagon_mode,
debug=debug, debug=debug,
enable_openmp=enable_openmp enable_openmp=enable_openmp
) )
sh_commands.update_mace_run_lib(model_output_dir, target_abi, sh_commands.update_mace_run_lib(model_output_dir,
model_name, embed_model_data) model_name, embed_model_data)
...@@ -299,7 +296,7 @@ def merge_libs_and_tuning_results(target_soc, ...@@ -299,7 +296,7 @@ def merge_libs_and_tuning_results(target_soc,
embed_model_data) embed_model_data)
def get_model_files(model_file_path, def download_model_files(model_file_path,
model_output_dir, model_output_dir,
weight_file_path=""): weight_file_path=""):
model_file = "" model_file = ""
...@@ -308,13 +305,27 @@ def get_model_files(model_file_path, ...@@ -308,13 +305,27 @@ def get_model_files(model_file_path,
model_file_path.startswith("https://"): model_file_path.startswith("https://"):
model_file = model_output_dir + "/model.pb" model_file = model_output_dir + "/model.pb"
urllib.urlretrieve(model_file_path, model_file) urllib.urlretrieve(model_file_path, model_file)
if weight_file_path.startswith("http://") or \
weight_file_path.startswith("https://"):
weight_file = model_output_dir + "/model.caffemodel"
urllib.urlretrieve(weight_file_path, weight_file)
def get_model_files_path(model_file_path,
model_output_dir,
weight_file_path=""):
model_file = ""
weight_file = ""
if model_file_path.startswith("http://") or \
model_file_path.startswith("https://"):
model_file = model_output_dir + "/model.pb"
else: else:
model_file = model_file_path model_file = model_file_path
if weight_file_path.startswith("http://") or \ if weight_file_path.startswith("http://") or \
weight_file_path.startswith("https://"): weight_file_path.startswith("https://"):
weight_file = model_output_dir + "/model.caffemodel" weight_file = model_output_dir + "/model.caffemodel"
urllib.urlretrieve(weight_file_path, weight_file)
else: else:
weight_file = weight_file_path weight_file = weight_file_path
...@@ -525,6 +536,7 @@ def process_models(project_name, configs, embed_model_data, vlog_level, ...@@ -525,6 +536,7 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
target_abi, phone_data_dir, target_soc="", serialno=""): target_abi, phone_data_dir, target_soc="", serialno=""):
hexagon_mode = get_hexagon_mode(configs) hexagon_mode = get_hexagon_mode(configs)
model_output_dirs = [] model_output_dirs = []
for model_name in configs["models"]: for model_name in configs["models"]:
print '===================', model_name, '===================' print '===================', model_name, '==================='
model_config = configs["models"][model_name] model_config = configs["models"][model_name]
...@@ -534,17 +546,19 @@ def process_models(project_name, configs, embed_model_data, vlog_level, ...@@ -534,17 +546,19 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
# Create model build directory # Create model build directory
model_path_digest = md5sum(model_config["model_file_path"]) model_path_digest = md5sum(model_config["model_file_path"])
model_output_base_dir = "%s/%s/%s/%s/%s" % (
FLAGS.output_dir, project_name, "build",
model_name, model_path_digest)
if target_abi == "host": if target_abi == "host":
model_output_dir = "%s/%s/%s/%s/%s/%s" % ( model_output_dir = "%s/%s" % (model_output_base_dir, target_abi)
FLAGS.output_dir, project_name, "build",
model_name, model_path_digest, target_abi)
else: else:
device_name = sh_commands.adb_get_device_name_by_serialno(serialno) device_name = sh_commands.adb_get_device_name_by_serialno(serialno)
model_output_dir = "%s/%s/%s/%s/%s/%s_%s/%s" % ( model_output_dir = "%s/%s_%s/%s" % (
FLAGS.output_dir, project_name, "build", model_output_base_dir, device_name.replace(' ', ''),
model_name, model_path_digest, device_name.replace(' ', ''),
target_soc, target_abi) target_soc, target_abi)
sh_commands.clear_phone_data_dir(serialno, phone_data_dir)
model_output_dirs.append(model_output_dir) model_output_dirs.append(model_output_dir)
if FLAGS.mode == "build" or FLAGS.mode == "all": if FLAGS.mode == "build" or FLAGS.mode == "all":
...@@ -552,14 +566,9 @@ def process_models(project_name, configs, embed_model_data, vlog_level, ...@@ -552,14 +566,9 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
sh.rm("-rf", model_output_dir) sh.rm("-rf", model_output_dir)
os.makedirs(model_output_dir) os.makedirs(model_output_dir)
if FLAGS.mode == "build" or FLAGS.mode == "benchmark" or \ model_file_path, weight_file_path = get_model_files_path(
FLAGS.mode == "all":
sh_commands.clear_mace_run_data(
target_abi, serialno, phone_data_dir)
model_file_path, weight_file_path = get_model_files(
model_config["model_file_path"], model_config["model_file_path"],
model_output_dir, model_output_base_dir,
model_config["weight_file_path"]) model_config["weight_file_path"])
if FLAGS.mode == "build" or FLAGS.mode == "run" or \ if FLAGS.mode == "build" or FLAGS.mode == "run" or \
...@@ -570,25 +579,6 @@ def process_models(project_name, configs, embed_model_data, vlog_level, ...@@ -570,25 +579,6 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
model_config["input_shapes"], model_config["input_shapes"],
input_file_list) input_file_list)
if FLAGS.mode == "build" or FLAGS.mode == "benchmark" or \
FLAGS.mode == "all":
sh_commands.gen_model_code(
"mace/codegen/models/%s" % model_name,
model_config["platform"],
model_file_path,
weight_file_path,
model_config["model_sha256_checksum"],
",".join(model_config["input_nodes"]),
",".join(model_config["output_nodes"]),
data_type,
model_config["runtime"],
model_name,
":".join(model_config["input_shapes"]),
model_config["dsp_mode"],
embed_model_data,
model_config["fast_conv"],
model_config["obfuscate"])
if FLAGS.mode == "build" or FLAGS.mode == "all": if FLAGS.mode == "build" or FLAGS.mode == "all":
build_mace_run_prod(hexagon_mode, build_mace_run_prod(hexagon_mode,
model_config["runtime"], model_config["runtime"],
...@@ -609,6 +599,11 @@ def process_models(project_name, configs, embed_model_data, vlog_level, ...@@ -609,6 +599,11 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
model_config["limit_opencl_kernel_time"], model_config["limit_opencl_kernel_time"],
phone_data_dir, phone_data_dir,
FLAGS.enable_openmp) FLAGS.enable_openmp)
sh_commands.build_benchmark_model(target_abi,
embed_model_data,
model_output_dir,
model_name,
hexagon_mode)
if FLAGS.mode == "run" or FLAGS.mode == "validate" or \ if FLAGS.mode == "run" or FLAGS.mode == "validate" or \
FLAGS.mode == "all": FLAGS.mode == "all":
...@@ -647,7 +642,6 @@ def process_models(project_name, configs, embed_model_data, vlog_level, ...@@ -647,7 +642,6 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
model_config["output_shapes"], model_config["output_shapes"],
model_name, model_name,
device_type, device_type,
hexagon_mode,
phone_data_dir, phone_data_dir,
FLAGS.omp_num_threads, FLAGS.omp_num_threads,
FLAGS.cpu_affinity_policy, FLAGS.cpu_affinity_policy,
...@@ -738,12 +732,61 @@ def main(unused_args): ...@@ -738,12 +732,61 @@ def main(unused_args):
# generate source # generate source
sh_commands.gen_mace_version() sh_commands.gen_mace_version()
sh_commands.gen_encrypted_opencl_source() sh_commands.gen_encrypted_opencl_source()
sh_commands.gen_mace_engine_factory_source(configs['models'].keys())
embed_model_data = configs["embed_model_data"]
target_socs = get_target_socs(configs) target_socs = get_target_socs(configs)
embed_model_data = configs["embed_model_data"]
vlog_level = FLAGS.vlog_level vlog_level = FLAGS.vlog_level
phone_data_dir = "/data/local/tmp/mace_run/" phone_data_dir = "/data/local/tmp/mace_run/"
if FLAGS.mode == "build" or FLAGS.mode == "all":
print '* Model Convert'
sh_commands.clear_model_codegen()
for model_name in configs["models"]:
print '===================', model_name, '==================='
model_config = configs["models"][model_name]
data_type, device_type = get_data_and_device_type(
model_config["runtime"])
# Create model build directory
model_path_digest = md5sum(model_config["model_file_path"])
model_output_base_dir = "%s/%s/%s/%s/%s" % (
FLAGS.output_dir, project_name, "build",
model_name, model_path_digest)
if os.path.exists(model_output_base_dir):
sh.rm("-rf", model_output_base_dir)
os.makedirs(model_output_base_dir)
download_model_files(
model_config["model_file_path"],
model_output_base_dir,
model_config["weight_file_path"])
model_file_path, weight_file_path = get_model_files_path(
model_config["model_file_path"],
model_output_base_dir,
model_config["weight_file_path"])
sh_commands.gen_model_code(
"mace/codegen/models/%s" % model_name,
model_config["platform"],
model_file_path,
weight_file_path,
model_config["model_sha256_checksum"],
",".join(model_config["input_nodes"]),
",".join(model_config["output_nodes"]),
data_type,
model_config["runtime"],
model_name,
":".join(model_config["input_shapes"]),
model_config["dsp_mode"],
embed_model_data,
model_config["fast_conv"],
model_config["obfuscate"])
for target_abi in configs["target_abis"]: for target_abi in configs["target_abis"]:
for target_soc in target_socs: for target_soc in target_socs:
if target_abi != 'host': if target_abi != 'host':
......
...@@ -33,6 +33,7 @@ try: ...@@ -33,6 +33,7 @@ try:
from binary_codegen import tuning_param_codegen from binary_codegen import tuning_param_codegen
from generate_data import generate_input_data from generate_data import generate_input_data
from validate import validate from validate import validate
from mace_engine_factory_codegen import gen_mace_engine_factory
except Exception as e: except Exception as e:
print("Import error:\n%s" % e) print("Import error:\n%s" % e)
exit(1) exit(1)
...@@ -74,15 +75,14 @@ def is_device_locked(serialno): ...@@ -74,15 +75,14 @@ def is_device_locked(serialno):
################################ ################################
# clear data # clear data
################################ ################################
def clear_mace_run_data(abi, def clear_phone_data_dir(serialno, phone_data_dir):
serialno,
phone_data_dir,
model_codegen_dir="mace/codegen/models"):
if abi != "host":
sh.adb("-s", sh.adb("-s",
serialno, serialno,
"shell", "shell",
"rm -rf %s" % phone_data_dir) "rm -rf %s" % phone_data_dir)
def clear_model_codegen(model_codegen_dir="mace/codegen/models"):
if os.path.exists(model_codegen_dir): if os.path.exists(model_codegen_dir):
sh.rm("-rf", model_codegen_dir) sh.rm("-rf", model_codegen_dir)
...@@ -268,7 +268,6 @@ def adb_run_valgrind(serialno, ...@@ -268,7 +268,6 @@ def adb_run_valgrind(serialno,
def bazel_build(target, def bazel_build(target,
strip="always", strip="always",
abi="armeabi-v7a", abi="armeabi-v7a",
model_tag="",
production_mode=False, production_mode=False,
hexagon_mode=False, hexagon_mode=False,
disable_no_tuning_warning=False, disable_no_tuning_warning=False,
...@@ -289,7 +288,6 @@ def bazel_build(target, ...@@ -289,7 +288,6 @@ def bazel_build(target,
"--copt=-std=c++11", "--copt=-std=c++11",
"--copt=-D_GLIBCXX_USE_C99_MATH_TR1", "--copt=-D_GLIBCXX_USE_C99_MATH_TR1",
"--copt=-Werror=return-type", "--copt=-Werror=return-type",
"--copt=-DMACE_MODEL_TAG=%s" % model_tag,
"--copt=-O3", "--copt=-O3",
"--define", "--define",
"openmp=%s" % str(enable_openmp).lower(), "openmp=%s" % str(enable_openmp).lower(),
...@@ -315,7 +313,6 @@ def bazel_build(target, ...@@ -315,7 +313,6 @@ def bazel_build(target,
"--copt=-D_GLIBCXX_USE_C99_MATH_TR1", "--copt=-D_GLIBCXX_USE_C99_MATH_TR1",
"--copt=-Werror=return-type", "--copt=-Werror=return-type",
"--copt=-DMACE_OBFUSCATE_LITERALS", "--copt=-DMACE_OBFUSCATE_LITERALS",
"--copt=-DMACE_MODEL_TAG=%s" % model_tag,
"--copt=-O3", "--copt=-O3",
"--define", "--define",
"neon=true", "neon=true",
...@@ -371,8 +368,22 @@ def gen_encrypted_opencl_source(codegen_path="mace/codegen"): ...@@ -371,8 +368,22 @@ def gen_encrypted_opencl_source(codegen_path="mace/codegen"):
"mace/codegen/opencl/opencl_encrypt_program.cc") "mace/codegen/opencl/opencl_encrypt_program.cc")
def pull_binaries(abi, serialno, model_output_dirs): def gen_mace_engine_factory_source(model_tags, codegen_path="mace/codegen"):
compiled_opencl_dir = "/data/local/tmp/mace_run/cl_program/" print("* Genearte mace engine creator source")
codegen_tools_dir = "%s/engine" % codegen_path
sh.rm("-rf", codegen_tools_dir)
sh.mkdir("-p", codegen_tools_dir)
gen_mace_engine_factory(
model_tags,
"mace/python/tools",
codegen_tools_dir)
print("Genearte mace engine creator source done!\n")
def pull_binaries(abi, serialno, model_output_dirs,
cl_built_kernel_file_name,
cl_platform_info_file_name):
compiled_opencl_dir = "/data/local/tmp/mace_run/interior/"
mace_run_param_file = "mace_run.config" mace_run_param_file = "mace_run.config"
cl_bin_dirs = [] cl_bin_dirs = []
...@@ -385,15 +396,18 @@ def pull_binaries(abi, serialno, model_output_dirs): ...@@ -385,15 +396,18 @@ def pull_binaries(abi, serialno, model_output_dirs):
sh.rm("-rf", cl_bin_dir) sh.rm("-rf", cl_bin_dir)
sh.mkdir("-p", cl_bin_dir) sh.mkdir("-p", cl_bin_dir)
if abi != "host": if abi != "host":
adb_pull(compiled_opencl_dir, cl_bin_dir, serialno) adb_pull(compiled_opencl_dir + cl_built_kernel_file_name,
cl_bin_dir, serialno)
adb_pull(compiled_opencl_dir + cl_platform_info_file_name,
cl_bin_dir, serialno)
adb_pull("/data/local/tmp/mace_run/%s" % mace_run_param_file, adb_pull("/data/local/tmp/mace_run/%s" % mace_run_param_file,
cl_bin_dir, serialno) cl_bin_dir, serialno)
def gen_opencl_binary_code(model_output_dirs, def gen_opencl_binary_code(model_output_dirs,
cl_built_kernel_file_name,
cl_platform_info_file_name,
codegen_path="mace/codegen"): codegen_path="mace/codegen"):
cl_built_kernel_file_name = "mace_cl_compiled_program.bin"
cl_platform_info_file_name = "mace_cl_platform_info.txt"
opencl_codegen_file = "%s/opencl/opencl_compiled_program.cc" % codegen_path opencl_codegen_file = "%s/opencl/opencl_compiled_program.cc" % codegen_path
cl_bin_dirs = [] cl_bin_dirs = []
...@@ -528,25 +542,8 @@ def gen_random_input(model_output_dir, ...@@ -528,25 +542,8 @@ def gen_random_input(model_output_dir,
def update_mace_run_lib(model_output_dir, def update_mace_run_lib(model_output_dir,
abi,
model_tag, model_tag,
embed_model_data, embed_model_data):
generated_model_lib_dir="bazel-bin/mace/codegen/"):
model_lib_path = model_output_dir + "/libmace_%s.a" % model_tag
if abi == "host":
bazel_build(
"//mace/codegen:generated_models",
abi=abi,
model_tag=model_tag)
generated_model_lib_name = "libgenerated_models.pic.a"
else:
generated_model_lib_name = "libgenerated_models.a"
if os.path.exists(model_lib_path):
sh.rm("-rf", model_lib_path)
sh.cp("-f", generated_model_lib_dir + "/" + generated_model_lib_name,
model_lib_path)
mace_run_filepath = model_output_dir + "/mace_run" mace_run_filepath = model_output_dir + "/mace_run"
if os.path.exists(mace_run_filepath): if os.path.exists(mace_run_filepath):
sh.rm("-rf", mace_run_filepath) sh.rm("-rf", mace_run_filepath)
...@@ -560,6 +557,12 @@ def update_mace_run_lib(model_output_dir, ...@@ -560,6 +557,12 @@ def update_mace_run_lib(model_output_dir,
model_output_dir) model_output_dir)
def create_internal_storage_dir(serialno, phone_data_dir):
internal_storage_dir = "%s/interior/" % phone_data_dir
sh.adb("-s", serialno, "shell", "mkdir", "-p", internal_storage_dir)
return internal_storage_dir
def tuning_run(abi, def tuning_run(abi,
serialno, serialno,
vlog_level, vlog_level,
...@@ -598,6 +601,7 @@ def tuning_run(abi, ...@@ -598,6 +601,7 @@ def tuning_run(abi,
"env", "env",
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level, "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"%s/mace_run" % model_output_dir, "%s/mace_run" % model_output_dir,
"--model_name=%s" % model_tag,
"--input_node=%s" % ",".join(input_nodes), "--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes), "--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes), "--input_shape=%s" % ":".join(input_shapes),
...@@ -622,8 +626,8 @@ def tuning_run(abi, ...@@ -622,8 +626,8 @@ def tuning_run(abi,
return stdout return stdout
else: else:
sh.adb("-s", serialno, "shell", "mkdir", "-p", phone_data_dir) sh.adb("-s", serialno, "shell", "mkdir", "-p", phone_data_dir)
compiled_opencl_dir = "/data/local/tmp/mace_run/cl_program/" internal_storage_dir = create_internal_storage_dir(
sh.adb("-s", serialno, "shell", "mkdir", "-p", compiled_opencl_dir) serialno, phone_data_dir)
for input_name in input_nodes: for input_name in input_nodes:
formatted_name = common.formatted_file_name(input_file_name, formatted_name = common.formatted_file_name(input_file_name,
...@@ -646,7 +650,7 @@ def tuning_run(abi, ...@@ -646,7 +650,7 @@ def tuning_run(abi,
"MACE_OUT_OF_RANGE_CHECK=%s" % int(out_of_range_check), "MACE_OUT_OF_RANGE_CHECK=%s" % int(out_of_range_check),
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level, "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"MACE_RUN_PARAMETER_PATH=%s/mace_run.config" % phone_data_dir, "MACE_RUN_PARAMETER_PATH=%s/mace_run.config" % phone_data_dir,
"MACE_CL_PROGRAM_PATH=%s/cl_program" % phone_data_dir, "MACE_INTERNAL_STORAGE_PATH=%s" % internal_storage_dir,
"MACE_LIMIT_OPENCL_KERNEL_TIME=%s" % limit_opencl_kernel_time, "MACE_LIMIT_OPENCL_KERNEL_TIME=%s" % limit_opencl_kernel_time,
] ]
if valgrind: if valgrind:
...@@ -657,6 +661,7 @@ def tuning_run(abi, ...@@ -657,6 +661,7 @@ def tuning_run(abi,
]) ])
adb_cmd.extend([ adb_cmd.extend([
"%s/mace_run" % phone_data_dir, "%s/mace_run" % phone_data_dir,
"--model_name=%s" % model_tag,
"--input_node=%s" % ",".join(input_nodes), "--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes), "--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes), "--input_shape=%s" % ":".join(input_shapes),
...@@ -836,6 +841,8 @@ def merge_libs(target_soc, ...@@ -836,6 +841,8 @@ def merge_libs(target_soc,
if hexagon_mode: if hexagon_mode:
sh.cp("-f", hexagon_lib_file, model_bin_dir) sh.cp("-f", hexagon_lib_file, model_bin_dir)
sh.cp("-f", glob.glob("mace/codegen/engine/*.h"), model_header_dir)
mri_stream = "" mri_stream = ""
if abi == "host": if abi == "host":
mri_stream += "create %s/libmace_%s.a\n" % \ mri_stream += "create %s/libmace_%s.a\n" % \
...@@ -846,6 +853,12 @@ def merge_libs(target_soc, ...@@ -846,6 +853,12 @@ def merge_libs(target_soc,
mri_stream += ( mri_stream += (
"addlib " "addlib "
"bazel-bin/mace/codegen/libgenerated_tuning_params.pic.a\n") "bazel-bin/mace/codegen/libgenerated_tuning_params.pic.a\n")
mri_stream += (
"addlib "
"bazel-bin/mace/codegen/libgenerated_models.pic.a\n")
mri_stream += (
"addlib "
"bazel-bin/mace/codegen/libgenerated_mace_engine_creator.pic.a\n")
else: else:
mri_stream += "create %s/libmace_%s.%s.a\n" % \ mri_stream += "create %s/libmace_%s.%s.a\n" % \
(model_bin_dir, project_name, target_soc) (model_bin_dir, project_name, target_soc)
...@@ -858,6 +871,12 @@ def merge_libs(target_soc, ...@@ -858,6 +871,12 @@ def merge_libs(target_soc,
mri_stream += ( mri_stream += (
"addlib " "addlib "
"bazel-bin/mace/codegen/libgenerated_version.a\n") "bazel-bin/mace/codegen/libgenerated_version.a\n")
mri_stream += (
"addlib "
"bazel-bin/mace/codegen/libgenerated_models.a\n")
mri_stream += (
"addlib "
"bazel-bin/mace/codegen/libgenerated_mace_engine_creator.a\n")
mri_stream += ( mri_stream += (
"addlib " "addlib "
"bazel-bin/mace/core/libcore.a\n") "bazel-bin/mace/core/libcore.a\n")
...@@ -875,8 +894,6 @@ def merge_libs(target_soc, ...@@ -875,8 +894,6 @@ def merge_libs(target_soc,
"bazel-bin/mace/ops/libops.lo\n") "bazel-bin/mace/ops/libops.lo\n")
for model_output_dir in model_output_dirs: for model_output_dir in model_output_dirs:
for lib in sh.ls(glob.glob("%s/*.a" % model_output_dir), "-1"):
mri_stream += "addlib %s\n" % lib
if not embed_model_data: if not embed_model_data:
sh.cp("-f", glob.glob("%s/*.data" % model_output_dir), sh.cp("-f", glob.glob("%s/*.data" % model_output_dir),
model_data_dir) model_data_dir)
...@@ -921,26 +938,11 @@ def packaging_lib(libmace_output_dir, project_name): ...@@ -921,26 +938,11 @@ def packaging_lib(libmace_output_dir, project_name):
print("Packaging Done!\n") print("Packaging Done!\n")
def benchmark_model(abi, def build_benchmark_model(abi,
serialno,
vlog_level,
embed_model_data, embed_model_data,
model_output_dir, model_output_dir,
input_nodes,
output_nodes,
input_shapes,
output_shapes,
model_tag, model_tag,
device_type, hexagon_mode):
hexagon_mode,
phone_data_dir,
omp_num_threads=-1,
cpu_affinity_policy=1,
gpu_perf_hint=3,
gpu_priority_hint=3,
input_file_name="model_input",
output_file_name="model_out"):
print("* Benchmark for %s" % model_tag)
benchmark_binary_file = "%s/benchmark_model" % model_output_dir benchmark_binary_file = "%s/benchmark_model" % model_output_dir
if os.path.exists(benchmark_binary_file): if os.path.exists(benchmark_binary_file):
sh.rm("-rf", benchmark_binary_file) sh.rm("-rf", benchmark_binary_file)
...@@ -951,13 +953,32 @@ def benchmark_model(abi, ...@@ -951,13 +953,32 @@ def benchmark_model(abi,
benchmark_target = "//mace/benchmark:benchmark_model" benchmark_target = "//mace/benchmark:benchmark_model"
bazel_build(benchmark_target, bazel_build(benchmark_target,
abi=abi, abi=abi,
model_tag=model_tag,
production_mode=True, production_mode=True,
hexagon_mode=hexagon_mode) hexagon_mode=hexagon_mode)
target_bin = "/".join(bazel_target_to_bin(benchmark_target)) target_bin = "/".join(bazel_target_to_bin(benchmark_target))
sh.cp("-f", target_bin, model_output_dir) sh.cp("-f", target_bin, model_output_dir)
def benchmark_model(abi,
serialno,
vlog_level,
embed_model_data,
model_output_dir,
input_nodes,
output_nodes,
input_shapes,
output_shapes,
model_tag,
device_type,
phone_data_dir,
omp_num_threads=-1,
cpu_affinity_policy=1,
gpu_perf_hint=3,
gpu_priority_hint=3,
input_file_name="model_input"):
print("* Benchmark for %s" % model_tag)
stdout_buff = [] stdout_buff = []
process_output = make_output_processor(stdout_buff) process_output = make_output_processor(stdout_buff)
if abi == "host": if abi == "host":
...@@ -966,6 +987,7 @@ def benchmark_model(abi, ...@@ -966,6 +987,7 @@ def benchmark_model(abi,
"env", "env",
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level, "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"%s/benchmark_model" % model_output_dir, "%s/benchmark_model" % model_output_dir,
"--model_name=%s" % model_tag,
"--input_node=%s" % ",".join(input_nodes), "--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes), "--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes), "--input_shape=%s" % ":".join(input_shapes),
...@@ -981,6 +1003,8 @@ def benchmark_model(abi, ...@@ -981,6 +1003,8 @@ def benchmark_model(abi,
p.wait() p.wait()
else: else:
sh.adb("-s", serialno, "shell", "mkdir", "-p", phone_data_dir) sh.adb("-s", serialno, "shell", "mkdir", "-p", phone_data_dir)
internal_storage_dir = create_internal_storage_dir(
serialno, phone_data_dir)
for input_name in input_nodes: for input_name in input_nodes:
formatted_name = common.formatted_file_name(input_file_name, formatted_name = common.formatted_file_name(input_file_name,
...@@ -1000,8 +1024,10 @@ def benchmark_model(abi, ...@@ -1000,8 +1024,10 @@ def benchmark_model(abi,
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level, "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"MACE_RUN_PARAMETER_PATH=%s/mace_run.config" % "MACE_RUN_PARAMETER_PATH=%s/mace_run.config" %
phone_data_dir, phone_data_dir,
"MACE_INTERNAL_STORAGE_PATH=%s" % internal_storage_dir,
"MACE_OPENCL_PROFILING=1", "MACE_OPENCL_PROFILING=1",
"%s/benchmark_model" % phone_data_dir, "%s/benchmark_model" % phone_data_dir,
"--model_name=%s" % model_tag,
"--input_node=%s" % ",".join(input_nodes), "--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes), "--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes), "--input_shape=%s" % ":".join(input_shapes),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册