From dbf67ad96744159adc94b0fb24631ad5b6718468 Mon Sep 17 00:00:00 2001 From: liuqi Date: Tue, 8 May 2018 21:02:22 +0800 Subject: [PATCH] Add CreateMaceEngine API and speed up build logic. --- .gitignore | 1 + mace/benchmark/BUILD | 3 + mace/benchmark/benchmark_model.cc | 70 +++++------ mace/codegen/BUILD | 11 ++ mace/examples/example.cc | 56 +++------ mace/public/mace.h | 9 +- mace/python/tools/mace_engine_creator.jinja2 | 82 ++++++++++++ mace/python/tools/mace_engine_generator.py | 55 ++++++++ mace/tools/validation/BUILD | 1 + mace/tools/validation/mace_run.cc | 78 ++++-------- tools/mace_tools.py | 117 ++++++++++------- tools/sh_commands.py | 125 +++++++++++-------- 12 files changed, 383 insertions(+), 225 deletions(-) create mode 100644 mace/python/tools/mace_engine_creator.jinja2 create mode 100644 mace/python/tools/mace_engine_generator.py diff --git a/.gitignore b/.gitignore index 060f9f20..54dacd45 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ mace/codegen/opencl/ mace/codegen/opencl_bin/ mace/codegen/tuning/ mace/codegen/version/ +mace/codegen/engine/ build/ docs/_build/ diff --git a/mace/benchmark/BUILD b/mace/benchmark/BUILD index 0cc23bb1..70738ddc 100644 --- a/mace/benchmark/BUILD +++ b/mace/benchmark/BUILD @@ -6,6 +6,7 @@ load( "if_not_production_mode", "if_hexagon_enabled", "if_openmp_enabled", + "if_android", ) licenses(["notice"]) # Apache 2.0 @@ -26,12 +27,14 @@ cc_binary( srcs = [ "benchmark_model.cc", ], + copts = if_android(["-DMACE_ENABLE_OPENCL"]), linkopts = if_openmp_enabled(["-fopenmp"]), linkstatic = 1, deps = [ ":statistics", "//external:gflags_nothreads", "//mace/codegen:generated_models", + "//mace/codegen:generated_mace_engine_creator", ], ) diff --git a/mace/benchmark/benchmark_model.cc b/mace/benchmark/benchmark_model.cc index b282af94..9332727e 100644 --- a/mace/benchmark/benchmark_model.cc +++ b/mace/benchmark/benchmark_model.cc @@ -26,20 +26,6 @@ #include "mace/utils/logging.h" #include "mace/benchmark/statistics.h" -namespace mace { -namespace MACE_MODEL_TAG { - -extern const unsigned char *LoadModelData(const char *model_data_file); - -extern void UnloadModelData(const unsigned char *model_data); - -extern NetDef CreateNet(const unsigned char *model_data); - -extern const std::string ModelChecksum(); - -} // namespace MACE_MODEL_TAG -} // namespace mace - namespace mace { namespace benchmark { namespace str_util { @@ -188,6 +174,7 @@ bool Run(const std::string &title, return true; } +DEFINE_string(model_tag, "", "model tag"); DEFINE_string(device, "CPU", "Device [CPU|GPU|DSP]"); DEFINE_string(input_node, "input_node0,input_node1", "input nodes, separated by comma"); @@ -198,7 +185,6 @@ DEFINE_string(output_shape, "", "output shape, separated by colon and comma"); DEFINE_string(input_file, "", "input file name"); DEFINE_int32(max_num_runs, 100, "number of runs max"); DEFINE_string(max_time, "10.0", "length to run max"); -DEFINE_string(benchmark_name, "", "benchmark name"); DEFINE_int32(warmup_runs, 1, "how many runs to initialize model"); DEFINE_string(model_data_file, "", "model data file name, used when EMBED_MODEL_DATA set to 0"); @@ -214,7 +200,7 @@ int Main(int argc, char **argv) { gflags::SetUsageMessage("some usage message"); gflags::ParseCommandLineFlags(&argc, &argv, true); - LOG(INFO) << "Benchmark name: [" << FLAGS_benchmark_name << "]"; + LOG(INFO) << "Model tag: [" << FLAGS_model_tag << "]"; LOG(INFO) << "Device: [" << FLAGS_device << "]"; LOG(INFO) << "gpu_perf_hint: [" << FLAGS_gpu_perf_hint << "]"; LOG(INFO) << "gpu_priority_hint: [" << FLAGS_gpu_priority_hint << "]"; @@ -233,17 +219,6 @@ int Main(int argc, char **argv) { std::unique_ptr statistician(new OpStat()); - mace::DeviceType device_type = ParseDeviceType(FLAGS_device); - - // config runtime - mace::SetOpenMPThreadPolicy( - FLAGS_omp_num_threads, - static_cast(FLAGS_cpu_affinity_policy)); - if (device_type == DeviceType::GPU) { - mace::SetGPUHints( - static_cast(FLAGS_gpu_perf_hint), - static_cast(FLAGS_gpu_priority_hint)); - } std::vector input_names = str_util::Split(FLAGS_input_node, ','); @@ -265,9 +240,36 @@ int Main(int argc, char **argv) { ParseShape(output_shapes[i], &output_shape_vec[i]); } - const unsigned char *model_data = - mace::MACE_MODEL_TAG::LoadModelData(FLAGS_model_data_file.c_str()); - NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(model_data); + mace::DeviceType device_type = ParseDeviceType(FLAGS_device); + + // config runtime + mace::SetOpenMPThreadPolicy( + FLAGS_omp_num_threads, + static_cast(FLAGS_cpu_affinity_policy)); +#ifdef MACE_ENABLE_OPENCL + if (device_type == DeviceType::GPU) { + mace::SetGPUHints( + static_cast(FLAGS_gpu_perf_hint), + static_cast(FLAGS_gpu_priority_hint)); + } +#endif // MACE_ENABLE_OPENCL + + const char *kernel_path = getenv("MACE_CL_PROGRAM_PATH"); + const std::string kernel_file_path = + std::string(kernel_path == nullptr ? + "/data/local/tmp/mace_run/cl_program" : kernel_path); + + std::shared_ptr storage_factory( + new FileStorageFactory(kernel_file_path)); + SetKVStorageFactory(storage_factory); + + // Create Engine + std::unique_ptr engine_ptr = + CreateMaceEngine(FLAGS_model_tag, + input_names, + output_names, + FLAGS_model_data_file.c_str(), + device_type); std::map inputs; std::map outputs; @@ -303,14 +305,6 @@ int Main(int argc, char **argv) { buffer_out); } - // Init model - LOG(INFO) << "Run init"; - std::unique_ptr engine_ptr( - new mace::MaceEngine(&net_def, device_type, input_names, output_names)); - if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) { - mace::MACE_MODEL_TAG::UnloadModelData(model_data); - } - int64_t warmup_time_us = 0; int64_t num_warmup_runs = 0; if (FLAGS_warmup_runs > 0) { diff --git a/mace/codegen/BUILD b/mace/codegen/BUILD index bc92a7bf..fde3ebdd 100644 --- a/mace/codegen/BUILD +++ b/mace/codegen/BUILD @@ -33,3 +33,14 @@ cc_library( srcs = ["version/version.cc"], linkstatic = 1, ) + +cc_library( + name = "generated_mace_engine_creator", + srcs = ["engine/mace_engine_creator.cc"], + linkstatic = 1, + deps = [ + ":generated_models", + "//mace/public", + "//mace/utils", + ], +) diff --git a/mace/examples/example.cc b/mace/examples/example.cc index 91f8cb6a..97520b15 100644 --- a/mace/examples/example.cc +++ b/mace/examples/example.cc @@ -37,24 +37,6 @@ #include "mace/utils/env_time.h" #include "mace/utils/logging.h" -// #include "mace/codegen/models/${MACE_MODEL_TAG}/${MACE_MODEL_TAG}.h" instead -namespace mace { -namespace MACE_MODEL_TAG { - -extern const unsigned char *LoadModelData(const char *model_data_file); - -extern void UnloadModelData(const unsigned char *model_data); - -extern NetDef CreateNet(const unsigned char *model_data); - -extern const std::string ModelName(); -extern const std::string ModelChecksum(); -extern const std::string ModelBuildTime(); -extern const std::string ModelBuildOptions(); - -} // namespace MACE_MODEL_TAG -} // namespace mace - namespace mace { namespace examples { @@ -112,6 +94,9 @@ DeviceType ParseDeviceType(const std::string &device_str) { } +DEFINE_string(model_tag, + "", + "model tag in yaml file"); DEFINE_string(input_node, "input_node0,input_node1", "input nodes, separated by comma"); @@ -148,37 +133,38 @@ bool RunModel(const std::vector &input_names, const std::vector &output_names, const std::vector> &output_shapes) { // load model - const unsigned char *model_data = - mace::MACE_MODEL_TAG::LoadModelData(FLAGS_model_data_file.c_str()); - NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(model_data); - DeviceType device_type = ParseDeviceType(FLAGS_device); - // config runtime - MaceStatus res = mace::SetOpenMPThreadPolicy( + mace::SetOpenMPThreadPolicy( FLAGS_omp_num_threads, static_cast(FLAGS_cpu_affinity_policy)); +#ifdef MACE_ENABLE_OPENCL if (device_type == DeviceType::GPU) { mace::SetGPUHints( static_cast(FLAGS_gpu_perf_hint), static_cast(FLAGS_gpu_priority_hint)); } +#endif // MACE_ENABLE_OPENCL // DO NOT USE tmp directory. // Please use APP's own directory and make sure the directory exists. + // Just call once const std::string kernel_file_path = - "/data/local/tmp/mace_run/cl"; + "/data/local/tmp/mace_run/cl"; // Config internal kv storage factory. std::shared_ptr storage_factory( new FileStorageFactory(kernel_file_path)); SetKVStorageFactory(storage_factory); - // Init model - mace::MaceEngine engine(&net_def, device_type, input_names, - output_names); - if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) { - mace::MACE_MODEL_TAG::UnloadModelData(model_data); - } + + // Create Engine + std::unique_ptr engine = + CreateMaceEngine(FLAGS_model_tag, + input_names, + output_names, + FLAGS_model_data_file.c_str(), + device_type); + const size_t input_count = input_names.size(); const size_t output_count = output_names.size(); @@ -216,12 +202,12 @@ bool RunModel(const std::vector &input_names, } LOG(INFO) << "Warm up run"; - engine.Run(inputs, &outputs); + engine->Run(inputs, &outputs); if (FLAGS_round > 0) { LOG(INFO) << "Run model"; for (int i = 0; i < FLAGS_round; ++i) { - engine.Run(inputs, &outputs); + engine->Run(inputs, &outputs); } } @@ -247,10 +233,6 @@ int Main(int argc, char **argv) { gflags::ParseCommandLineFlags(&argc, &argv, true); LOG(INFO) << "mace version: " << MaceVersion(); - LOG(INFO) << "model name: " << mace::MACE_MODEL_TAG::ModelName(); - LOG(INFO) << "model checksum: " << mace::MACE_MODEL_TAG::ModelChecksum(); - LOG(INFO) << "build time: " << mace::MACE_MODEL_TAG::ModelBuildTime(); - LOG(INFO) << "build options: " << mace::MACE_MODEL_TAG::ModelBuildOptions(); LOG(INFO) << "input node: " << FLAGS_input_node; LOG(INFO) << "input shape: " << FLAGS_input_shape; LOG(INFO) << "output node: " << FLAGS_output_node; diff --git a/mace/public/mace.h b/mace/public/mace.h index 02d903fd..c2a387d0 100644 --- a/mace/public/mace.h +++ b/mace/public/mace.h @@ -28,7 +28,7 @@ namespace mace { const char *MaceVersion(); -enum DeviceType { CPU = 0, GPU = 2, HEXAGON = 3 }; +enum DeviceType { CPU = 0, GPU = 2, HEXAGON = 3, AUTO = 4 }; enum MaceStatus { MACE_SUCCESS = 0, MACE_INVALID_ARGS = 1 }; @@ -82,6 +82,13 @@ class MaceEngine { MaceEngine &operator=(const MaceEngine &) = delete; }; +std::unique_ptr CreateMaceEngine( + const std::string &model_tag, + const std::vector &input_nodes, + const std::vector &output_nodes, + const char *model_data_file = nullptr, + const DeviceType device_type = DeviceType::AUTO); + } // namespace mace #endif // MACE_PUBLIC_MACE_H_ diff --git a/mace/python/tools/mace_engine_creator.jinja2 b/mace/python/tools/mace_engine_creator.jinja2 new file mode 100644 index 00000000..9863b6bb --- /dev/null +++ b/mace/python/tools/mace_engine_creator.jinja2 @@ -0,0 +1,82 @@ +// Copyright 2018 Xiaomi, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This is a generated file. DO NOT EDIT! + +#include +#include +#include +#include + +#include "mace/public/mace.h" +#include "mace/public/mace_runtime.h" +#include "mace/utils/logging.h" + +namespace mace { +{% for tag in model_tags %} +namespace {{tag}} { + +extern const unsigned char *LoadModelData(const char *model_data_file); + +extern void UnloadModelData(const unsigned char *model_data); + +extern NetDef CreateNet(const unsigned char *model_data); + +extern const std::string ModelName(); +extern const std::string ModelChecksum(); +extern const std::string ModelBuildTime(); +extern const std::string ModelBuildOptions(); + +} // namespace {{tag}} +{% endfor %} + +namespace { +std::map model_tag_map { +{% for i in range(model_tags |length) %} + std::make_pair({{ model_tags[i]|tojson }}, {{ i }}), +{% endfor %} +}; +} // namespace + +std::unique_ptr CreateMaceEngine( + const std::string &model_tag, + const std::vector &input_nodes, + const std::vector &output_nodes, + const char *model_data_file, + const DeviceType device_type) { + // load model + std::unique_ptr engine; + const unsigned char * model_data = nullptr; + NetDef net_def; + switch (model_tag_map[model_tag]) { +{% for i in range(model_tags |length) %} + case {{ i }}: + model_data = + mace::{{model_tags[i]}}::LoadModelData(model_data_file); + net_def = mace::{{model_tags[i]}}::CreateNet(model_data); + engine.reset( + new mace::MaceEngine(&net_def, device_type, input_nodes, output_nodes)); + if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) { + mace::{{model_tags[i]}}::UnloadModelData(model_data); + } + break; +{% endfor %} + default: + LOG(FATAL) << "There is no model named " << model_tag; + } + + return engine; +} + +} // namespace mace diff --git a/mace/python/tools/mace_engine_generator.py b/mace/python/tools/mace_engine_generator.py new file mode 100644 index 00000000..d2f85f46 --- /dev/null +++ b/mace/python/tools/mace_engine_generator.py @@ -0,0 +1,55 @@ +# Copyright 2018 Xiaomi, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse + +from jinja2 import Environment, FileSystemLoader + + +FLAGS = None + + +def gen_mace_engine_creator(model_tags, template_dir, output_dir): + # Create the jinja2 environment. + j2_env = Environment( + loader=FileSystemLoader(template_dir), trim_blocks=True) + # generate mace_run BUILD file + print model_tags + template_name = 'mace_engine_creator.jinja2' + source = j2_env.get_template(template_name).render( + model_tags=model_tags, + ) + with open(output_dir + '/mace_engine_creator.cc', "wb") as f: + f.write(source) + + +def parse_args(): + """Parses command line arguments.""" + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_tag", + type=str, + default="", + help="model tag") + parser.add_argument( + "--template_dir", type=str, default="", help="template path") + parser.add_argument( + "--output_dir", type=str, default="", help="template path") + return parser.parse_known_args() + + +if __name__ == '__main__': + FLAGS, unparsed = parse_args() + gen_mace_engine_creator(FLAGS.model_tag, FLAGS.template_dir, + FLAGS.output_dir) diff --git a/mace/tools/validation/BUILD b/mace/tools/validation/BUILD index 636937d5..7282155e 100644 --- a/mace/tools/validation/BUILD +++ b/mace/tools/validation/BUILD @@ -10,6 +10,7 @@ cc_binary( deps = [ "//external:gflags_nothreads", "//mace/codegen:generated_models", + "//mace/codegen:generated_mace_engine_creator", "//mace/core:core", ], ) diff --git a/mace/tools/validation/mace_run.cc b/mace/tools/validation/mace_run.cc index 1af468e2..a1583ee0 100644 --- a/mace/tools/validation/mace_run.cc +++ b/mace/tools/validation/mace_run.cc @@ -42,24 +42,6 @@ #include "mace/core/runtime/opencl/opencl_runtime.h" #endif // MACE_ENABLE_OPENCL -// #include "mace/codegen/models/${MACE_MODEL_TAG}/${MACE_MODEL_TAG}.h" instead -namespace mace { -namespace MACE_MODEL_TAG { - -extern const unsigned char *LoadModelData(const char *model_data_file); - -extern void UnloadModelData(const unsigned char *model_data); - -extern NetDef CreateNet(const unsigned char *model_data); - -extern const std::string ModelName(); -extern const std::string ModelChecksum(); -extern const std::string ModelBuildTime(); -extern const std::string ModelBuildOptions(); - -} // namespace MACE_MODEL_TAG -} // namespace mace - namespace mace { namespace tools { namespace validation { @@ -180,6 +162,9 @@ struct mallinfo LogMallinfoChange(struct mallinfo prev) { return curr; } +DEFINE_string(model_tag, + "", + "model tag in yaml"); DEFINE_string(input_node, "input_node0,input_node1", "input nodes, separated by comma"); @@ -211,22 +196,12 @@ DEFINE_int32(omp_num_threads, -1, "num of openmp threads"); DEFINE_int32(cpu_affinity_policy, 1, "0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"); -bool RunModel(const std::vector &input_names, +bool RunModel(const std::string &model_tag, + const std::vector &input_names, const std::vector> &input_shapes, const std::vector &output_names, const std::vector> &output_shapes) { - // load model - int64_t t0 = NowMicros(); - const unsigned char *model_data = - mace::MACE_MODEL_TAG::LoadModelData(FLAGS_model_data_file.c_str()); - NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(model_data); - int64_t t1 = NowMicros(); - double create_net_millis = (t1 - t0) / 1000.0; - LOG(INFO) << "CreateNetDef latency: " << create_net_millis << " ms"; - DeviceType device_type = ParseDeviceType(FLAGS_device); - LOG(INFO) << "Runing with device type: " << device_type; - // config runtime mace::SetOpenMPThreadPolicy( FLAGS_omp_num_threads, @@ -244,20 +219,20 @@ bool RunModel(const std::vector &input_names, std::string(kernel_path == nullptr ? "/data/local/tmp/mace_run/cl_program" : kernel_path); - // Init model - LOG(INFO) << "Run init"; std::shared_ptr storage_factory( new FileStorageFactory(kernel_file_path)); SetKVStorageFactory(storage_factory); - mace::MaceEngine engine(&net_def, device_type, input_names, output_names); - if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) { - mace::MACE_MODEL_TAG::UnloadModelData(model_data); - } - int64_t t2 = NowMicros(); - double mace_engine_ctor_millis = (t2 - t1) / 1000.0; - double init_millis = (t2 - t0) / 1000.0; - LOG(INFO) << "MaceEngine constructor latency: " - << mace_engine_ctor_millis << " ms"; + + // Create Engine + int64_t t0 = NowMicros(); + std::unique_ptr engine = + CreateMaceEngine(model_tag, + input_names, + output_names, + FLAGS_model_data_file.c_str(), + device_type); + int64_t t1 = NowMicros(); + double init_millis = (t1 - t0) / 1000.0; LOG(INFO) << "Total init latency: " << init_millis << " ms"; const size_t input_count = input_names.size(); @@ -297,7 +272,7 @@ bool RunModel(const std::vector &input_names, LOG(INFO) << "Warm up run"; int64_t t3 = NowMicros(); - engine.Run(inputs, &outputs); + engine->Run(inputs, &outputs); int64_t t4 = NowMicros(); double warmup_millis = (t4 - t3) / 1000.0; LOG(INFO) << "1st warm up run latency: " << warmup_millis << " ms"; @@ -308,7 +283,7 @@ bool RunModel(const std::vector &input_names, int64_t t0 = NowMicros(); struct mallinfo prev = mallinfo(); for (int i = 0; i < FLAGS_round; ++i) { - engine.Run(inputs, &outputs); + engine->Run(inputs, &outputs); if (FLAGS_malloc_check_cycle >= 1 && i % FLAGS_malloc_check_cycle == 0) { LOG(INFO) << "=== check malloc info change #" << i << " ==="; prev = LogMallinfoChange(prev); @@ -320,11 +295,11 @@ bool RunModel(const std::vector &input_names, } // Metrics reporting tools depends on the format, keep in consistent - printf("================================================================\n"); - printf(" create_net engine_ctor init warmup run_avg\n"); - printf("================================================================\n"); - printf("time %11.3f %11.3f %11.3f %11.3f %11.3f\n", create_net_millis, - mace_engine_ctor_millis, init_millis, warmup_millis, model_run_millis); + printf("========================================\n"); + printf(" init warmup run_avg\n"); + printf("========================================\n"); + printf("time %11.3f %11.3f %11.3f\n", + init_millis, warmup_millis, model_run_millis); #ifdef MACE_ENABLE_OPENCL if (device_type == DeviceType::GPU) { @@ -356,10 +331,6 @@ int Main(int argc, char **argv) { gflags::ParseCommandLineFlags(&argc, &argv, true); LOG(INFO) << "mace version: " << MaceVersion(); - LOG(INFO) << "model name: " << mace::MACE_MODEL_TAG::ModelName(); - LOG(INFO) << "model checksum: " << mace::MACE_MODEL_TAG::ModelChecksum(); - LOG(INFO) << "build time: " << mace::MACE_MODEL_TAG::ModelBuildTime(); - LOG(INFO) << "build options: " << mace::MACE_MODEL_TAG::ModelBuildOptions(); LOG(INFO) << "input node: " << FLAGS_input_node; LOG(INFO) << "input shape: " << FLAGS_input_shape; LOG(INFO) << "output node: " << FLAGS_output_node; @@ -399,7 +370,8 @@ int Main(int argc, char **argv) { for (int i = 0; i < FLAGS_restart_round; ++i) { VLOG(0) << "restart round " << i; ret = - RunModel(input_names, input_shape_vec, output_names, output_shape_vec); + RunModel(FLAGS_model_tag, input_names, input_shape_vec, + output_names, output_shape_vec); } if (ret) { return 0; diff --git a/tools/mace_tools.py b/tools/mace_tools.py index a2874610..24ebabfb 100644 --- a/tools/mace_tools.py +++ b/tools/mace_tools.py @@ -95,13 +95,17 @@ def gen_opencl_and_tuning_code(target_abi, serialno, model_output_dirs, pull_or_not): + cl_built_kernel_file_name = "mace_cl_compiled_program.bin" + cl_platform_info_file_name = "mace_cl_platform_info.txt" if pull_or_not: - sh_commands.pull_binaries(target_abi, serialno, model_output_dirs) - - codegen_path = "mace/codegen" + sh_commands.pull_binaries(target_abi, serialno, model_output_dirs, + cl_built_kernel_file_name, + cl_platform_info_file_name) # generate opencl binary code - sh_commands.gen_opencl_binary_code(model_output_dirs) + sh_commands.gen_opencl_binary_code(model_output_dirs, + cl_built_kernel_file_name, + cl_platform_info_file_name) sh_commands.gen_tuning_param_code(model_output_dirs) @@ -227,12 +231,11 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi, sh_commands.bazel_build( mace_run_target, abi=target_abi, - model_tag=model_name, production_mode=False, hexagon_mode=hexagon_mode, enable_openmp=enable_openmp ) - sh_commands.update_mace_run_lib(model_output_dir, target_abi, + sh_commands.update_mace_run_lib(model_output_dir, model_name, embed_model_data) tuning_run(runtime, target_abi, serialno, vlog_level, embed_model_data, @@ -254,13 +257,12 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi, mace_run_target, strip, abi=target_abi, - model_tag=model_name, production_mode=True, hexagon_mode=hexagon_mode, debug=debug, enable_openmp=enable_openmp ) - sh_commands.update_mace_run_lib(model_output_dir, target_abi, + sh_commands.update_mace_run_lib(model_output_dir, model_name, embed_model_data) else: gen_opencl_and_tuning_code(target_abi, serialno, [], False) @@ -268,13 +270,12 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi, mace_run_target, strip, abi=target_abi, - model_tag=model_name, production_mode=True, hexagon_mode=hexagon_mode, debug=debug, enable_openmp=enable_openmp ) - sh_commands.update_mace_run_lib(model_output_dir, target_abi, + sh_commands.update_mace_run_lib(model_output_dir, model_name, embed_model_data) @@ -525,6 +526,7 @@ def process_models(project_name, configs, embed_model_data, vlog_level, target_abi, phone_data_dir, target_soc="", serialno=""): hexagon_mode = get_hexagon_mode(configs) model_output_dirs = [] + for model_name in configs["models"]: print '===================', model_name, '===================' model_config = configs["models"][model_name] @@ -534,16 +536,16 @@ def process_models(project_name, configs, embed_model_data, vlog_level, # Create model build directory model_path_digest = md5sum(model_config["model_file_path"]) + model_output_base_dir = "%s/%s/%s/%s/%s" % ( + FLAGS.output_dir, project_name, "build", + model_name, model_path_digest) if target_abi == "host": - model_output_dir = "%s/%s/%s/%s/%s/%s" % ( - FLAGS.output_dir, project_name, "build", - model_name, model_path_digest, target_abi) + model_output_dir = "%s/%s" % (model_output_base_dir, target_abi) else: device_name = sh_commands.adb_get_device_name_by_serialno(serialno) - model_output_dir = "%s/%s/%s/%s/%s/%s_%s/%s" % ( - FLAGS.output_dir, project_name, "build", - model_name, model_path_digest, device_name.replace(' ', ''), + model_output_dir = "%s/%s_%s/%s" % ( + model_output_base_dir, device_name.replace(' ', ''), target_soc, target_abi) model_output_dirs.append(model_output_dir) @@ -552,16 +554,14 @@ def process_models(project_name, configs, embed_model_data, vlog_level, sh.rm("-rf", model_output_dir) os.makedirs(model_output_dir) - if FLAGS.mode == "build" or FLAGS.mode == "benchmark" or \ - FLAGS.mode == "all": - sh_commands.clear_mace_run_data( - target_abi, serialno, phone_data_dir) - model_file_path, weight_file_path = get_model_files( model_config["model_file_path"], - model_output_dir, + model_output_base_dir, model_config["weight_file_path"]) + sh_commands.clear_phone_data_dir( + target_abi, serialno, phone_data_dir) + if FLAGS.mode == "build" or FLAGS.mode == "run" or \ FLAGS.mode == "validate" or \ FLAGS.mode == "benchmark" or FLAGS.mode == "all": @@ -570,25 +570,6 @@ def process_models(project_name, configs, embed_model_data, vlog_level, model_config["input_shapes"], input_file_list) - if FLAGS.mode == "build" or FLAGS.mode == "benchmark" or \ - FLAGS.mode == "all": - sh_commands.gen_model_code( - "mace/codegen/models/%s" % model_name, - model_config["platform"], - model_file_path, - weight_file_path, - model_config["model_sha256_checksum"], - ",".join(model_config["input_nodes"]), - ",".join(model_config["output_nodes"]), - data_type, - model_config["runtime"], - model_name, - ":".join(model_config["input_shapes"]), - model_config["dsp_mode"], - embed_model_data, - model_config["fast_conv"], - model_config["obfuscate"]) - if FLAGS.mode == "build" or FLAGS.mode == "all": build_mace_run_prod(hexagon_mode, model_config["runtime"], @@ -609,9 +590,14 @@ def process_models(project_name, configs, embed_model_data, vlog_level, model_config["limit_opencl_kernel_time"], phone_data_dir, FLAGS.enable_openmp) + sh_commands.build_benchmark_model(target_abi, + embed_model_data, + model_output_dir, + model_name, + hexagon_mode) if FLAGS.mode == "run" or FLAGS.mode == "validate" or \ - FLAGS.mode == "all": + FLAGS.mode == "all": tuning_run(model_config["runtime"], target_abi, serialno, @@ -647,7 +633,6 @@ def process_models(project_name, configs, embed_model_data, vlog_level, model_config["output_shapes"], model_name, device_type, - hexagon_mode, phone_data_dir, FLAGS.omp_num_threads, FLAGS.cpu_affinity_policy, @@ -738,12 +723,56 @@ def main(unused_args): # generate source sh_commands.gen_mace_version() sh_commands.gen_encrypted_opencl_source() + sh_commands.gen_mace_engine_creator_source(configs['models'].keys()) + embed_model_data = configs["embed_model_data"] target_socs = get_target_socs(configs) - embed_model_data = configs["embed_model_data"] vlog_level = FLAGS.vlog_level phone_data_dir = "/data/local/tmp/mace_run/" + + if FLAGS.mode == "build" or FLAGS.mode == "all": + print '* Model Convert' + sh_commands.clear_model_codegen() + for model_name in configs["models"]: + print '===================', model_name, '===================' + model_config = configs["models"][model_name] + data_type, device_type = get_data_and_device_type( + model_config["runtime"]) + + # Create model build directory + model_path_digest = md5sum(model_config["model_file_path"]) + + model_output_base_dir = "%s/%s/%s/%s/%s" % ( + FLAGS.output_dir, project_name, "build", + model_name, model_path_digest) + + if os.path.exists(model_output_base_dir): + sh.rm("-rf", model_output_base_dir) + os.makedirs(model_output_base_dir) + + model_file_path, weight_file_path = get_model_files( + model_config["model_file_path"], + model_output_base_dir, + model_config["weight_file_path"]) + + sh_commands.gen_model_code( + "mace/codegen/models/%s" % model_name, + model_config["platform"], + model_file_path, + weight_file_path, + model_config["model_sha256_checksum"], + ",".join(model_config["input_nodes"]), + ",".join(model_config["output_nodes"]), + data_type, + model_config["runtime"], + model_name, + ":".join(model_config["input_shapes"]), + model_config["dsp_mode"], + embed_model_data, + model_config["fast_conv"], + model_config["obfuscate"]) + for target_abi in configs["target_abis"]: for target_soc in target_socs: if target_abi != 'host': diff --git a/tools/sh_commands.py b/tools/sh_commands.py index 01656d3f..97c89ea8 100644 --- a/tools/sh_commands.py +++ b/tools/sh_commands.py @@ -33,6 +33,7 @@ try: from binary_codegen import tuning_param_codegen from generate_data import generate_input_data from validate import validate + from mace_engine_generator import gen_mace_engine_creator except Exception as e: print("Import error:\n%s" % e) exit(1) @@ -74,15 +75,15 @@ def is_device_locked(serialno): ################################ # clear data ################################ -def clear_mace_run_data(abi, - serialno, - phone_data_dir, - model_codegen_dir="mace/codegen/models"): +def clear_phone_data_dir(abi, serialno, phone_data_dir): if abi != "host": sh.adb("-s", serialno, "shell", "rm -rf %s" % phone_data_dir) + + +def clear_model_codegen(model_codegen_dir="mace/codegen/models"): if os.path.exists(model_codegen_dir): sh.rm("-rf", model_codegen_dir) @@ -268,7 +269,6 @@ def adb_run_valgrind(serialno, def bazel_build(target, strip="always", abi="armeabi-v7a", - model_tag="", production_mode=False, hexagon_mode=False, disable_no_tuning_warning=False, @@ -289,7 +289,6 @@ def bazel_build(target, "--copt=-std=c++11", "--copt=-D_GLIBCXX_USE_C99_MATH_TR1", "--copt=-Werror=return-type", - "--copt=-DMACE_MODEL_TAG=%s" % model_tag, "--copt=-O3", "--define", "openmp=%s" % str(enable_openmp).lower(), @@ -315,7 +314,6 @@ def bazel_build(target, "--copt=-D_GLIBCXX_USE_C99_MATH_TR1", "--copt=-Werror=return-type", "--copt=-DMACE_OBFUSCATE_LITERALS", - "--copt=-DMACE_MODEL_TAG=%s" % model_tag, "--copt=-O3", "--define", "neon=true", @@ -371,7 +369,21 @@ def gen_encrypted_opencl_source(codegen_path="mace/codegen"): "mace/codegen/opencl/opencl_encrypt_program.cc") -def pull_binaries(abi, serialno, model_output_dirs): +def gen_mace_engine_creator_source(model_tags, codegen_path="mace/codegen"): + print("* Genearte mace engine creator source") + codegen_tools_dir = "%s/engine" % codegen_path + sh.rm("-rf", codegen_tools_dir) + sh.mkdir("-p", codegen_tools_dir) + gen_mace_engine_creator( + model_tags, + "mace/python/tools", + codegen_tools_dir) + print("Genearte mace engine creator source done!\n") + + +def pull_binaries(abi, serialno, model_output_dirs, + cl_built_kernel_file_name, + cl_platform_info_file_name): compiled_opencl_dir = "/data/local/tmp/mace_run/cl_program/" mace_run_param_file = "mace_run.config" @@ -385,15 +397,18 @@ def pull_binaries(abi, serialno, model_output_dirs): sh.rm("-rf", cl_bin_dir) sh.mkdir("-p", cl_bin_dir) if abi != "host": - adb_pull(compiled_opencl_dir, cl_bin_dir, serialno) + adb_pull(compiled_opencl_dir + cl_built_kernel_file_name, + cl_bin_dir, serialno) + adb_pull(compiled_opencl_dir + cl_platform_info_file_name, + cl_bin_dir, serialno) adb_pull("/data/local/tmp/mace_run/%s" % mace_run_param_file, cl_bin_dir, serialno) def gen_opencl_binary_code(model_output_dirs, + cl_built_kernel_file_name, + cl_platform_info_file_name, codegen_path="mace/codegen"): - cl_built_kernel_file_name = "mace_cl_compiled_program.bin" - cl_platform_info_file_name = "mace_cl_platform_info.txt" opencl_codegen_file = "%s/opencl/opencl_compiled_program.cc" % codegen_path cl_bin_dirs = [] @@ -528,25 +543,8 @@ def gen_random_input(model_output_dir, def update_mace_run_lib(model_output_dir, - abi, model_tag, - embed_model_data, - generated_model_lib_dir="bazel-bin/mace/codegen/"): - model_lib_path = model_output_dir + "/libmace_%s.a" % model_tag - if abi == "host": - bazel_build( - "//mace/codegen:generated_models", - abi=abi, - model_tag=model_tag) - generated_model_lib_name = "libgenerated_models.pic.a" - else: - generated_model_lib_name = "libgenerated_models.a" - - if os.path.exists(model_lib_path): - sh.rm("-rf", model_lib_path) - sh.cp("-f", generated_model_lib_dir + "/" + generated_model_lib_name, - model_lib_path) - + embed_model_data): mace_run_filepath = model_output_dir + "/mace_run" if os.path.exists(mace_run_filepath): sh.rm("-rf", mace_run_filepath) @@ -560,6 +558,11 @@ def update_mace_run_lib(model_output_dir, model_output_dir) +def create_compiled_opencl_dir(serialno): + compiled_opencl_dir = "/data/local/tmp/mace_run/cl_program/" + sh.adb("-s", serialno, "shell", "mkdir", "-p", compiled_opencl_dir) + + def tuning_run(abi, serialno, vlog_level, @@ -598,6 +601,7 @@ def tuning_run(abi, "env", "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level, "%s/mace_run" % model_output_dir, + "--model_tag=%s" % model_tag, "--input_node=%s" % ",".join(input_nodes), "--output_node=%s" % ",".join(output_nodes), "--input_shape=%s" % ":".join(input_shapes), @@ -622,8 +626,7 @@ def tuning_run(abi, return stdout else: sh.adb("-s", serialno, "shell", "mkdir", "-p", phone_data_dir) - compiled_opencl_dir = "/data/local/tmp/mace_run/cl_program/" - sh.adb("-s", serialno, "shell", "mkdir", "-p", compiled_opencl_dir) + create_compiled_opencl_dir(serialno) for input_name in input_nodes: formatted_name = common.formatted_file_name(input_file_name, @@ -657,6 +660,7 @@ def tuning_run(abi, ]) adb_cmd.extend([ "%s/mace_run" % phone_data_dir, + "--model_tag=%s" % model_tag, "--input_node=%s" % ",".join(input_nodes), "--output_node=%s" % ",".join(output_nodes), "--input_shape=%s" % ":".join(input_shapes), @@ -846,6 +850,12 @@ def merge_libs(target_soc, mri_stream += ( "addlib " "bazel-bin/mace/codegen/libgenerated_tuning_params.pic.a\n") + mri_stream += ( + "addlib " + "bazel-bin/mace/codegen/libgenerated_models.pic.a\n") + mri_stream += ( + "addlib " + "bazel-bin/mace/codegen/libgenerated_mace_engine_creator.pic.a\n") else: mri_stream += "create %s/libmace_%s.%s.a\n" % \ (model_bin_dir, project_name, target_soc) @@ -858,6 +868,12 @@ def merge_libs(target_soc, mri_stream += ( "addlib " "bazel-bin/mace/codegen/libgenerated_version.a\n") + mri_stream += ( + "addlib " + "bazel-bin/mace/codegen/libgenerated_models.a\n") + mri_stream += ( + "addlib " + "bazel-bin/mace/codegen/libgenerated_mace_engine_creator.a\n") mri_stream += ( "addlib " "bazel-bin/mace/core/libcore.a\n") @@ -875,8 +891,6 @@ def merge_libs(target_soc, "bazel-bin/mace/ops/libops.lo\n") for model_output_dir in model_output_dirs: - for lib in sh.ls(glob.glob("%s/*.a" % model_output_dir), "-1"): - mri_stream += "addlib %s\n" % lib if not embed_model_data: sh.cp("-f", glob.glob("%s/*.data" % model_output_dir), model_data_dir) @@ -921,6 +935,28 @@ def packaging_lib(libmace_output_dir, project_name): print("Packaging Done!\n") +def build_benchmark_model(abi, + embed_model_data, + model_output_dir, + model_tag, + hexagon_mode): + benchmark_binary_file = "%s/benchmark_model" % model_output_dir + if os.path.exists(benchmark_binary_file): + sh.rm("-rf", benchmark_binary_file) + if not embed_model_data: + sh.cp("-f", "mace/codegen/models/%s/%s.data" % (model_tag, model_tag), + model_output_dir) + + benchmark_target = "//mace/benchmark:benchmark_model" + bazel_build(benchmark_target, + abi=abi, + production_mode=True, + hexagon_mode=hexagon_mode) + + target_bin = "/".join(bazel_target_to_bin(benchmark_target)) + sh.cp("-f", target_bin, model_output_dir) + + def benchmark_model(abi, serialno, vlog_level, @@ -932,31 +968,13 @@ def benchmark_model(abi, output_shapes, model_tag, device_type, - hexagon_mode, phone_data_dir, omp_num_threads=-1, cpu_affinity_policy=1, gpu_perf_hint=3, gpu_priority_hint=3, - input_file_name="model_input", - output_file_name="model_out"): + input_file_name="model_input"): print("* Benchmark for %s" % model_tag) - benchmark_binary_file = "%s/benchmark_model" % model_output_dir - if os.path.exists(benchmark_binary_file): - sh.rm("-rf", benchmark_binary_file) - if not embed_model_data: - sh.cp("-f", "mace/codegen/models/%s/%s.data" % (model_tag, model_tag), - model_output_dir) - - benchmark_target = "//mace/benchmark:benchmark_model" - bazel_build(benchmark_target, - abi=abi, - model_tag=model_tag, - production_mode=True, - hexagon_mode=hexagon_mode) - - target_bin = "/".join(bazel_target_to_bin(benchmark_target)) - sh.cp("-f", target_bin, model_output_dir) stdout_buff = [] process_output = make_output_processor(stdout_buff) @@ -966,6 +984,7 @@ def benchmark_model(abi, "env", "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level, "%s/benchmark_model" % model_output_dir, + "--model_tag=%s" % model_tag, "--input_node=%s" % ",".join(input_nodes), "--output_node=%s" % ",".join(output_nodes), "--input_shape=%s" % ":".join(input_shapes), @@ -981,6 +1000,7 @@ def benchmark_model(abi, p.wait() else: sh.adb("-s", serialno, "shell", "mkdir", "-p", phone_data_dir) + create_compiled_opencl_dir(serialno) for input_name in input_nodes: formatted_name = common.formatted_file_name(input_file_name, @@ -1002,6 +1022,7 @@ def benchmark_model(abi, phone_data_dir, "MACE_OPENCL_PROFILING=1", "%s/benchmark_model" % phone_data_dir, + "--model_tag=%s" % model_tag, "--input_node=%s" % ",".join(input_nodes), "--output_node=%s" % ",".join(output_nodes), "--input_shape=%s" % ":".join(input_shapes), -- GitLab