From dbf67ad96744159adc94b0fb24631ad5b6718468 Mon Sep 17 00:00:00 2001
From: liuqi <liuqi10@xiaomi.com>
Date: Tue, 8 May 2018 21:02:22 +0800
Subject: [PATCH] Add CreateMaceEngine API and speed up build logic.

---
 .gitignore                                   |   1 +
 mace/benchmark/BUILD                         |   3 +
 mace/benchmark/benchmark_model.cc            |  70 +++++------
 mace/codegen/BUILD                           |  11 ++
 mace/examples/example.cc                     |  56 +++------
 mace/public/mace.h                           |   9 +-
 mace/python/tools/mace_engine_creator.jinja2 |  82 ++++++++++++
 mace/python/tools/mace_engine_generator.py   |  55 ++++++++
 mace/tools/validation/BUILD                  |   1 +
 mace/tools/validation/mace_run.cc            |  78 ++++--------
 tools/mace_tools.py                          | 117 ++++++++++-------
 tools/sh_commands.py                         | 125 +++++++++++--------
 12 files changed, 383 insertions(+), 225 deletions(-)
 create mode 100644 mace/python/tools/mace_engine_creator.jinja2
 create mode 100644 mace/python/tools/mace_engine_generator.py

diff --git a/.gitignore b/.gitignore
index 060f9f20..54dacd45 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,7 @@ mace/codegen/opencl/
 mace/codegen/opencl_bin/
 mace/codegen/tuning/
 mace/codegen/version/
+mace/codegen/engine/
 build/
 docs/_build/
 
diff --git a/mace/benchmark/BUILD b/mace/benchmark/BUILD
index 0cc23bb1..70738ddc 100644
--- a/mace/benchmark/BUILD
+++ b/mace/benchmark/BUILD
@@ -6,6 +6,7 @@ load(
     "if_not_production_mode",
     "if_hexagon_enabled",
     "if_openmp_enabled",
+    "if_android",
 )
 
 licenses(["notice"])  # Apache 2.0
@@ -26,12 +27,14 @@ cc_binary(
     srcs = [
         "benchmark_model.cc",
     ],
+    copts = if_android(["-DMACE_ENABLE_OPENCL"]),
     linkopts = if_openmp_enabled(["-fopenmp"]),
     linkstatic = 1,
     deps = [
         ":statistics",
         "//external:gflags_nothreads",
         "//mace/codegen:generated_models",
+        "//mace/codegen:generated_mace_engine_creator",
     ],
 )
 
diff --git a/mace/benchmark/benchmark_model.cc b/mace/benchmark/benchmark_model.cc
index b282af94..9332727e 100644
--- a/mace/benchmark/benchmark_model.cc
+++ b/mace/benchmark/benchmark_model.cc
@@ -26,20 +26,6 @@
 #include "mace/utils/logging.h"
 #include "mace/benchmark/statistics.h"
 
-namespace mace {
-namespace MACE_MODEL_TAG {
-
-extern const unsigned char *LoadModelData(const char *model_data_file);
-
-extern void UnloadModelData(const unsigned char *model_data);
-
-extern NetDef CreateNet(const unsigned char *model_data);
-
-extern const std::string ModelChecksum();
-
-}  // namespace MACE_MODEL_TAG
-}  // namespace mace
-
 namespace mace {
 namespace benchmark {
 namespace str_util {
@@ -188,6 +174,7 @@ bool Run(const std::string &title,
   return true;
 }
 
+DEFINE_string(model_tag, "", "model tag");
 DEFINE_string(device, "CPU", "Device [CPU|GPU|DSP]");
 DEFINE_string(input_node, "input_node0,input_node1",
               "input nodes, separated by comma");
@@ -198,7 +185,6 @@ DEFINE_string(output_shape, "", "output shape, separated by colon and comma");
 DEFINE_string(input_file, "", "input file name");
 DEFINE_int32(max_num_runs, 100, "number of runs max");
 DEFINE_string(max_time, "10.0", "length to run max");
-DEFINE_string(benchmark_name, "", "benchmark name");
 DEFINE_int32(warmup_runs, 1, "how many runs to initialize model");
 DEFINE_string(model_data_file, "",
               "model data file name, used when EMBED_MODEL_DATA set to 0");
@@ -214,7 +200,7 @@ int Main(int argc, char **argv) {
   gflags::SetUsageMessage("some usage message");
   gflags::ParseCommandLineFlags(&argc, &argv, true);
 
-  LOG(INFO) << "Benchmark name: [" << FLAGS_benchmark_name << "]";
+  LOG(INFO) << "Model tag: [" << FLAGS_model_tag << "]";
   LOG(INFO) << "Device: [" << FLAGS_device << "]";
   LOG(INFO) << "gpu_perf_hint: [" << FLAGS_gpu_perf_hint << "]";
   LOG(INFO) << "gpu_priority_hint: [" << FLAGS_gpu_priority_hint << "]";
@@ -233,17 +219,6 @@ int Main(int argc, char **argv) {
 
   std::unique_ptr<OpStat> statistician(new OpStat());
 
-  mace::DeviceType device_type = ParseDeviceType(FLAGS_device);
-
-  // config runtime
-  mace::SetOpenMPThreadPolicy(
-      FLAGS_omp_num_threads,
-      static_cast<CPUAffinityPolicy >(FLAGS_cpu_affinity_policy));
-  if (device_type == DeviceType::GPU) {
-    mace::SetGPUHints(
-        static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
-        static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
-  }
 
   std::vector<std::string> input_names =
       str_util::Split(FLAGS_input_node, ',');
@@ -265,9 +240,36 @@ int Main(int argc, char **argv) {
     ParseShape(output_shapes[i], &output_shape_vec[i]);
   }
 
-  const unsigned char *model_data =
-      mace::MACE_MODEL_TAG::LoadModelData(FLAGS_model_data_file.c_str());
-  NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(model_data);
+  mace::DeviceType device_type = ParseDeviceType(FLAGS_device);
+
+  // config runtime
+  mace::SetOpenMPThreadPolicy(
+      FLAGS_omp_num_threads,
+      static_cast<CPUAffinityPolicy >(FLAGS_cpu_affinity_policy));
+#ifdef MACE_ENABLE_OPENCL
+  if (device_type == DeviceType::GPU) {
+    mace::SetGPUHints(
+        static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
+        static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
+  }
+#endif  // MACE_ENABLE_OPENCL
+
+  const char *kernel_path = getenv("MACE_CL_PROGRAM_PATH");
+  const std::string kernel_file_path =
+      std::string(kernel_path == nullptr ?
+                  "/data/local/tmp/mace_run/cl_program" : kernel_path);
+
+  std::shared_ptr<KVStorageFactory> storage_factory(
+      new FileStorageFactory(kernel_file_path));
+  SetKVStorageFactory(storage_factory);
+
+  // Create Engine
+  std::unique_ptr<mace::MaceEngine> engine_ptr =
+      CreateMaceEngine(FLAGS_model_tag,
+                       input_names,
+                       output_names,
+                       FLAGS_model_data_file.c_str(),
+                       device_type);
 
   std::map<std::string, mace::MaceTensor> inputs;
   std::map<std::string, mace::MaceTensor> outputs;
@@ -303,14 +305,6 @@ int Main(int argc, char **argv) {
                                                 buffer_out);
   }
 
-  // Init model
-  LOG(INFO) << "Run init";
-  std::unique_ptr<mace::MaceEngine> engine_ptr(
-      new mace::MaceEngine(&net_def, device_type, input_names, output_names));
-  if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) {
-    mace::MACE_MODEL_TAG::UnloadModelData(model_data);
-  }
-
   int64_t warmup_time_us = 0;
   int64_t num_warmup_runs = 0;
   if (FLAGS_warmup_runs > 0) {
diff --git a/mace/codegen/BUILD b/mace/codegen/BUILD
index bc92a7bf..fde3ebdd 100644
--- a/mace/codegen/BUILD
+++ b/mace/codegen/BUILD
@@ -33,3 +33,14 @@ cc_library(
     srcs = ["version/version.cc"],
     linkstatic = 1,
 )
+
+cc_library(
+    name = "generated_mace_engine_creator",
+    srcs = ["engine/mace_engine_creator.cc"],
+    linkstatic = 1,
+    deps = [
+        ":generated_models",
+        "//mace/public",
+        "//mace/utils",
+    ],
+)
diff --git a/mace/examples/example.cc b/mace/examples/example.cc
index 91f8cb6a..97520b15 100644
--- a/mace/examples/example.cc
+++ b/mace/examples/example.cc
@@ -37,24 +37,6 @@
 #include "mace/utils/env_time.h"
 #include "mace/utils/logging.h"
 
-// #include "mace/codegen/models/${MACE_MODEL_TAG}/${MACE_MODEL_TAG}.h" instead
-namespace mace {
-namespace MACE_MODEL_TAG {
-
-extern const unsigned char *LoadModelData(const char *model_data_file);
-
-extern void UnloadModelData(const unsigned char *model_data);
-
-extern NetDef CreateNet(const unsigned char *model_data);
-
-extern const std::string ModelName();
-extern const std::string ModelChecksum();
-extern const std::string ModelBuildTime();
-extern const std::string ModelBuildOptions();
-
-}  // namespace MACE_MODEL_TAG
-}  // namespace mace
-
 namespace mace {
 namespace examples {
 
@@ -112,6 +94,9 @@ DeviceType ParseDeviceType(const std::string &device_str) {
 }
 
 
+DEFINE_string(model_tag,
+              "",
+              "model tag in yaml file");
 DEFINE_string(input_node,
               "input_node0,input_node1",
               "input nodes, separated by comma");
@@ -148,37 +133,38 @@ bool RunModel(const std::vector<std::string> &input_names,
               const std::vector<std::string> &output_names,
               const std::vector<std::vector<int64_t>> &output_shapes) {
   // load model
-  const unsigned char *model_data =
-      mace::MACE_MODEL_TAG::LoadModelData(FLAGS_model_data_file.c_str());
-  NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(model_data);
-
   DeviceType device_type = ParseDeviceType(FLAGS_device);
-
   // config runtime
-  MaceStatus res = mace::SetOpenMPThreadPolicy(
+  mace::SetOpenMPThreadPolicy(
       FLAGS_omp_num_threads,
       static_cast<CPUAffinityPolicy >(FLAGS_cpu_affinity_policy));
+#ifdef MACE_ENABLE_OPENCL
   if (device_type == DeviceType::GPU) {
     mace::SetGPUHints(
         static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
         static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
   }
+#endif  // MACE_ENABLE_OPENCL
 
   // DO NOT USE tmp directory.
   // Please use APP's own directory and make sure the directory exists.
+  // Just call once
   const std::string kernel_file_path =
-                  "/data/local/tmp/mace_run/cl";
+      "/data/local/tmp/mace_run/cl";
 
   // Config internal kv storage factory.
   std::shared_ptr<KVStorageFactory> storage_factory(
       new FileStorageFactory(kernel_file_path));
   SetKVStorageFactory(storage_factory);
-  // Init model
-  mace::MaceEngine engine(&net_def, device_type, input_names,
-                          output_names);
-  if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) {
-    mace::MACE_MODEL_TAG::UnloadModelData(model_data);
-  }
+
+  // Create Engine
+  std::unique_ptr<mace::MaceEngine> engine =
+      CreateMaceEngine(FLAGS_model_tag,
+                       input_names,
+                       output_names,
+                       FLAGS_model_data_file.c_str(),
+                       device_type);
+
 
   const size_t input_count = input_names.size();
   const size_t output_count = output_names.size();
@@ -216,12 +202,12 @@ bool RunModel(const std::vector<std::string> &input_names,
   }
 
   LOG(INFO) << "Warm up run";
-  engine.Run(inputs, &outputs);
+  engine->Run(inputs, &outputs);
 
   if (FLAGS_round > 0) {
     LOG(INFO) << "Run model";
     for (int i = 0; i < FLAGS_round; ++i) {
-      engine.Run(inputs, &outputs);
+      engine->Run(inputs, &outputs);
     }
   }
 
@@ -247,10 +233,6 @@ int Main(int argc, char **argv) {
   gflags::ParseCommandLineFlags(&argc, &argv, true);
 
   LOG(INFO) << "mace version: " << MaceVersion();
-  LOG(INFO) << "model name: " << mace::MACE_MODEL_TAG::ModelName();
-  LOG(INFO) << "model checksum: " << mace::MACE_MODEL_TAG::ModelChecksum();
-  LOG(INFO) << "build time: " << mace::MACE_MODEL_TAG::ModelBuildTime();
-  LOG(INFO) << "build options: " << mace::MACE_MODEL_TAG::ModelBuildOptions();
   LOG(INFO) << "input node: " << FLAGS_input_node;
   LOG(INFO) << "input shape: " << FLAGS_input_shape;
   LOG(INFO) << "output node: " << FLAGS_output_node;
diff --git a/mace/public/mace.h b/mace/public/mace.h
index 02d903fd..c2a387d0 100644
--- a/mace/public/mace.h
+++ b/mace/public/mace.h
@@ -28,7 +28,7 @@ namespace mace {
 
 const char *MaceVersion();
 
-enum DeviceType { CPU = 0, GPU = 2, HEXAGON = 3 };
+enum DeviceType { CPU = 0, GPU = 2, HEXAGON = 3, AUTO = 4 };
 
 enum MaceStatus { MACE_SUCCESS = 0, MACE_INVALID_ARGS = 1 };
 
@@ -82,6 +82,13 @@ class MaceEngine {
   MaceEngine &operator=(const MaceEngine &) = delete;
 };
 
+std::unique_ptr<MaceEngine> CreateMaceEngine(
+    const std::string &model_tag,
+    const std::vector<std::string> &input_nodes,
+    const std::vector<std::string> &output_nodes,
+    const char *model_data_file = nullptr,
+    const DeviceType device_type = DeviceType::AUTO);
+
 }  // namespace mace
 
 #endif  // MACE_PUBLIC_MACE_H_
diff --git a/mace/python/tools/mace_engine_creator.jinja2 b/mace/python/tools/mace_engine_creator.jinja2
new file mode 100644
index 00000000..9863b6bb
--- /dev/null
+++ b/mace/python/tools/mace_engine_creator.jinja2
@@ -0,0 +1,82 @@
+// Copyright 2018 Xiaomi, Inc.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This is a generated file. DO NOT EDIT!
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "mace/public/mace.h"
+#include "mace/public/mace_runtime.h"
+#include "mace/utils/logging.h"
+
+namespace mace {
+{% for tag in model_tags %}
+namespace {{tag}} {
+
+extern const unsigned char *LoadModelData(const char *model_data_file);
+
+extern void UnloadModelData(const unsigned char *model_data);
+
+extern NetDef CreateNet(const unsigned char *model_data);
+
+extern const std::string ModelName();
+extern const std::string ModelChecksum();
+extern const std::string ModelBuildTime();
+extern const std::string ModelBuildOptions();
+
+}  // namespace {{tag}}
+{% endfor %}
+
+namespace {
+std::map<std::string, int> model_tag_map {
+{% for i in range(model_tags |length) %}
+  std::make_pair({{ model_tags[i]|tojson }}, {{ i }}),
+{% endfor %}
+};
+}  // namespace
+
+std::unique_ptr<MaceEngine> CreateMaceEngine(
+    const std::string &model_tag,
+    const std::vector<std::string> &input_nodes,
+    const std::vector<std::string> &output_nodes,
+    const char *model_data_file,
+    const DeviceType device_type) {
+  // load model
+  std::unique_ptr<MaceEngine> engine;
+  const unsigned char * model_data = nullptr;
+  NetDef net_def;
+  switch (model_tag_map[model_tag]) {
+{% for i in range(model_tags |length) %}
+   case {{ i }}:
+    model_data =
+        mace::{{model_tags[i]}}::LoadModelData(model_data_file);
+    net_def = mace::{{model_tags[i]}}::CreateNet(model_data);
+    engine.reset(
+        new mace::MaceEngine(&net_def, device_type, input_nodes, output_nodes));
+    if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) {
+      mace::{{model_tags[i]}}::UnloadModelData(model_data);
+    }
+    break;
+{% endfor %}
+   default:
+     LOG(FATAL) << "There is no model named " << model_tag;
+  }
+
+  return engine;
+}
+
+}  // namespace mace
diff --git a/mace/python/tools/mace_engine_generator.py b/mace/python/tools/mace_engine_generator.py
new file mode 100644
index 00000000..d2f85f46
--- /dev/null
+++ b/mace/python/tools/mace_engine_generator.py
@@ -0,0 +1,55 @@
+# Copyright 2018 Xiaomi, Inc.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+
+from jinja2 import Environment, FileSystemLoader
+
+
+FLAGS = None
+
+
+def gen_mace_engine_creator(model_tags, template_dir, output_dir):
+    # Create the jinja2 environment.
+    j2_env = Environment(
+        loader=FileSystemLoader(template_dir), trim_blocks=True)
+    # generate mace_run BUILD file
+    print model_tags
+    template_name = 'mace_engine_creator.jinja2'
+    source = j2_env.get_template(template_name).render(
+        model_tags=model_tags,
+    )
+    with open(output_dir + '/mace_engine_creator.cc', "wb") as f:
+        f.write(source)
+
+
+def parse_args():
+    """Parses command line arguments."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model_tag",
+        type=str,
+        default="",
+        help="model tag")
+    parser.add_argument(
+        "--template_dir", type=str, default="", help="template path")
+    parser.add_argument(
+        "--output_dir", type=str, default="", help="template path")
+    return parser.parse_known_args()
+
+
+if __name__ == '__main__':
+    FLAGS, unparsed = parse_args()
+    gen_mace_engine_creator(FLAGS.model_tag, FLAGS.template_dir,
+                            FLAGS.output_dir)
diff --git a/mace/tools/validation/BUILD b/mace/tools/validation/BUILD
index 636937d5..7282155e 100644
--- a/mace/tools/validation/BUILD
+++ b/mace/tools/validation/BUILD
@@ -10,6 +10,7 @@ cc_binary(
     deps = [
         "//external:gflags_nothreads",
         "//mace/codegen:generated_models",
+        "//mace/codegen:generated_mace_engine_creator",
         "//mace/core:core",
     ],
 )
diff --git a/mace/tools/validation/mace_run.cc b/mace/tools/validation/mace_run.cc
index 1af468e2..a1583ee0 100644
--- a/mace/tools/validation/mace_run.cc
+++ b/mace/tools/validation/mace_run.cc
@@ -42,24 +42,6 @@
 #include "mace/core/runtime/opencl/opencl_runtime.h"
 #endif  // MACE_ENABLE_OPENCL
 
-// #include "mace/codegen/models/${MACE_MODEL_TAG}/${MACE_MODEL_TAG}.h" instead
-namespace mace {
-namespace MACE_MODEL_TAG {
-
-extern const unsigned char *LoadModelData(const char *model_data_file);
-
-extern void UnloadModelData(const unsigned char *model_data);
-
-extern NetDef CreateNet(const unsigned char *model_data);
-
-extern const std::string ModelName();
-extern const std::string ModelChecksum();
-extern const std::string ModelBuildTime();
-extern const std::string ModelBuildOptions();
-
-}  // namespace MACE_MODEL_TAG
-}  // namespace mace
-
 namespace mace {
 namespace tools {
 namespace validation {
@@ -180,6 +162,9 @@ struct mallinfo LogMallinfoChange(struct mallinfo prev) {
   return curr;
 }
 
+DEFINE_string(model_tag,
+              "",
+              "model tag in yaml");
 DEFINE_string(input_node,
               "input_node0,input_node1",
               "input nodes, separated by comma");
@@ -211,22 +196,12 @@ DEFINE_int32(omp_num_threads, -1, "num of openmp threads");
 DEFINE_int32(cpu_affinity_policy, 1,
              "0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY");
 
-bool RunModel(const std::vector<std::string> &input_names,
+bool RunModel(const std::string &model_tag,
+              const std::vector<std::string> &input_names,
               const std::vector<std::vector<int64_t>> &input_shapes,
               const std::vector<std::string> &output_names,
               const std::vector<std::vector<int64_t>> &output_shapes) {
-  // load model
-  int64_t t0 = NowMicros();
-  const unsigned char *model_data =
-      mace::MACE_MODEL_TAG::LoadModelData(FLAGS_model_data_file.c_str());
-  NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(model_data);
-  int64_t t1 = NowMicros();
-  double create_net_millis = (t1 - t0) / 1000.0;
-  LOG(INFO) << "CreateNetDef latency: " << create_net_millis << " ms";
-
   DeviceType device_type = ParseDeviceType(FLAGS_device);
-  LOG(INFO) << "Runing with device type: " << device_type;
-
   // config runtime
   mace::SetOpenMPThreadPolicy(
       FLAGS_omp_num_threads,
@@ -244,20 +219,20 @@ bool RunModel(const std::vector<std::string> &input_names,
       std::string(kernel_path == nullptr ?
                   "/data/local/tmp/mace_run/cl_program" : kernel_path);
 
-  // Init model
-  LOG(INFO) << "Run init";
   std::shared_ptr<KVStorageFactory> storage_factory(
       new FileStorageFactory(kernel_file_path));
   SetKVStorageFactory(storage_factory);
-  mace::MaceEngine engine(&net_def, device_type, input_names, output_names);
-  if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) {
-    mace::MACE_MODEL_TAG::UnloadModelData(model_data);
-  }
-  int64_t t2 = NowMicros();
-  double mace_engine_ctor_millis = (t2 - t1) / 1000.0;
-  double init_millis = (t2 - t0) / 1000.0;
-  LOG(INFO) << "MaceEngine constructor latency: "
-            << mace_engine_ctor_millis << " ms";
+
+  // Create Engine
+  int64_t t0 = NowMicros();
+  std::unique_ptr<mace::MaceEngine> engine =
+      CreateMaceEngine(model_tag,
+                       input_names,
+                       output_names,
+                       FLAGS_model_data_file.c_str(),
+                       device_type);
+  int64_t t1 = NowMicros();
+  double init_millis = (t1 - t0) / 1000.0;
   LOG(INFO) << "Total init latency: " << init_millis << " ms";
 
   const size_t input_count = input_names.size();
@@ -297,7 +272,7 @@ bool RunModel(const std::vector<std::string> &input_names,
 
   LOG(INFO) << "Warm up run";
   int64_t t3 = NowMicros();
-  engine.Run(inputs, &outputs);
+  engine->Run(inputs, &outputs);
   int64_t t4 = NowMicros();
   double warmup_millis = (t4 - t3) / 1000.0;
   LOG(INFO) << "1st warm up run latency: " << warmup_millis << " ms";
@@ -308,7 +283,7 @@ bool RunModel(const std::vector<std::string> &input_names,
     int64_t t0 = NowMicros();
     struct mallinfo prev = mallinfo();
     for (int i = 0; i < FLAGS_round; ++i) {
-      engine.Run(inputs, &outputs);
+      engine->Run(inputs, &outputs);
       if (FLAGS_malloc_check_cycle >= 1 && i % FLAGS_malloc_check_cycle == 0) {
         LOG(INFO) << "=== check malloc info change #" << i << " ===";
         prev = LogMallinfoChange(prev);
@@ -320,11 +295,11 @@ bool RunModel(const std::vector<std::string> &input_names,
   }
 
   // Metrics reporting tools depends on the format, keep in consistent
-  printf("================================================================\n");
-  printf("      create_net engine_ctor        init      warmup     run_avg\n");
-  printf("================================================================\n");
-  printf("time %11.3f %11.3f %11.3f %11.3f %11.3f\n", create_net_millis,
-         mace_engine_ctor_millis, init_millis, warmup_millis, model_run_millis);
+  printf("========================================\n");
+  printf("            init      warmup     run_avg\n");
+  printf("========================================\n");
+  printf("time %11.3f %11.3f %11.3f\n",
+         init_millis, warmup_millis, model_run_millis);
 
 #ifdef MACE_ENABLE_OPENCL
   if (device_type == DeviceType::GPU) {
@@ -356,10 +331,6 @@ int Main(int argc, char **argv) {
   gflags::ParseCommandLineFlags(&argc, &argv, true);
 
   LOG(INFO) << "mace version: " << MaceVersion();
-  LOG(INFO) << "model name: " << mace::MACE_MODEL_TAG::ModelName();
-  LOG(INFO) << "model checksum: " << mace::MACE_MODEL_TAG::ModelChecksum();
-  LOG(INFO) << "build time: " << mace::MACE_MODEL_TAG::ModelBuildTime();
-  LOG(INFO) << "build options: " << mace::MACE_MODEL_TAG::ModelBuildOptions();
   LOG(INFO) << "input node: " << FLAGS_input_node;
   LOG(INFO) << "input shape: " << FLAGS_input_shape;
   LOG(INFO) << "output node: " << FLAGS_output_node;
@@ -399,7 +370,8 @@ int Main(int argc, char **argv) {
   for (int i = 0; i < FLAGS_restart_round; ++i) {
     VLOG(0) << "restart round " << i;
     ret =
-        RunModel(input_names, input_shape_vec, output_names, output_shape_vec);
+        RunModel(FLAGS_model_tag, input_names, input_shape_vec,
+                 output_names, output_shape_vec);
   }
   if (ret) {
     return 0;
diff --git a/tools/mace_tools.py b/tools/mace_tools.py
index a2874610..24ebabfb 100644
--- a/tools/mace_tools.py
+++ b/tools/mace_tools.py
@@ -95,13 +95,17 @@ def gen_opencl_and_tuning_code(target_abi,
                                serialno,
                                model_output_dirs,
                                pull_or_not):
+    cl_built_kernel_file_name = "mace_cl_compiled_program.bin"
+    cl_platform_info_file_name = "mace_cl_platform_info.txt"
     if pull_or_not:
-        sh_commands.pull_binaries(target_abi, serialno, model_output_dirs)
-
-    codegen_path = "mace/codegen"
+        sh_commands.pull_binaries(target_abi, serialno, model_output_dirs,
+                                  cl_built_kernel_file_name,
+                                  cl_platform_info_file_name)
 
     # generate opencl binary code
-    sh_commands.gen_opencl_binary_code(model_output_dirs)
+    sh_commands.gen_opencl_binary_code(model_output_dirs,
+                                       cl_built_kernel_file_name,
+                                       cl_platform_info_file_name)
 
     sh_commands.gen_tuning_param_code(model_output_dirs)
 
@@ -227,12 +231,11 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi,
         sh_commands.bazel_build(
             mace_run_target,
             abi=target_abi,
-            model_tag=model_name,
             production_mode=False,
             hexagon_mode=hexagon_mode,
             enable_openmp=enable_openmp
         )
-        sh_commands.update_mace_run_lib(model_output_dir, target_abi,
+        sh_commands.update_mace_run_lib(model_output_dir,
                                         model_name, embed_model_data)
 
         tuning_run(runtime, target_abi, serialno, vlog_level, embed_model_data,
@@ -254,13 +257,12 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi,
             mace_run_target,
             strip,
             abi=target_abi,
-            model_tag=model_name,
             production_mode=True,
             hexagon_mode=hexagon_mode,
             debug=debug,
             enable_openmp=enable_openmp
         )
-        sh_commands.update_mace_run_lib(model_output_dir, target_abi,
+        sh_commands.update_mace_run_lib(model_output_dir,
                                         model_name, embed_model_data)
     else:
         gen_opencl_and_tuning_code(target_abi, serialno, [], False)
@@ -268,13 +270,12 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi,
             mace_run_target,
             strip,
             abi=target_abi,
-            model_tag=model_name,
             production_mode=True,
             hexagon_mode=hexagon_mode,
             debug=debug,
             enable_openmp=enable_openmp
         )
-        sh_commands.update_mace_run_lib(model_output_dir, target_abi,
+        sh_commands.update_mace_run_lib(model_output_dir,
                                         model_name, embed_model_data)
 
 
@@ -525,6 +526,7 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
                    target_abi, phone_data_dir, target_soc="", serialno=""):
     hexagon_mode = get_hexagon_mode(configs)
     model_output_dirs = []
+
     for model_name in configs["models"]:
         print '===================', model_name, '==================='
         model_config = configs["models"][model_name]
@@ -534,16 +536,16 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
 
         # Create model build directory
         model_path_digest = md5sum(model_config["model_file_path"])
+        model_output_base_dir = "%s/%s/%s/%s/%s" % (
+            FLAGS.output_dir, project_name, "build",
+            model_name, model_path_digest)
 
         if target_abi == "host":
-            model_output_dir = "%s/%s/%s/%s/%s/%s" % (
-                FLAGS.output_dir, project_name, "build",
-                model_name, model_path_digest, target_abi)
+            model_output_dir = "%s/%s" % (model_output_base_dir, target_abi)
         else:
             device_name = sh_commands.adb_get_device_name_by_serialno(serialno)
-            model_output_dir = "%s/%s/%s/%s/%s/%s_%s/%s" % (
-                FLAGS.output_dir, project_name, "build",
-                model_name, model_path_digest, device_name.replace(' ', ''),
+            model_output_dir = "%s/%s_%s/%s" % (
+                model_output_base_dir, device_name.replace(' ', ''),
                 target_soc, target_abi)
         model_output_dirs.append(model_output_dir)
 
@@ -552,16 +554,14 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
                 sh.rm("-rf", model_output_dir)
             os.makedirs(model_output_dir)
 
-        if FLAGS.mode == "build" or FLAGS.mode == "benchmark" or \
-                FLAGS.mode == "all":
-            sh_commands.clear_mace_run_data(
-                    target_abi, serialno, phone_data_dir)
-
         model_file_path, weight_file_path = get_model_files(
                 model_config["model_file_path"],
-                model_output_dir,
+                model_output_base_dir,
                 model_config["weight_file_path"])
 
+        sh_commands.clear_phone_data_dir(
+            target_abi, serialno, phone_data_dir)
+
         if FLAGS.mode == "build" or FLAGS.mode == "run" or \
                 FLAGS.mode == "validate" or \
                 FLAGS.mode == "benchmark" or FLAGS.mode == "all":
@@ -570,25 +570,6 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
                                          model_config["input_shapes"],
                                          input_file_list)
 
-        if FLAGS.mode == "build" or FLAGS.mode == "benchmark" or \
-                FLAGS.mode == "all":
-            sh_commands.gen_model_code(
-                    "mace/codegen/models/%s" % model_name,
-                    model_config["platform"],
-                    model_file_path,
-                    weight_file_path,
-                    model_config["model_sha256_checksum"],
-                    ",".join(model_config["input_nodes"]),
-                    ",".join(model_config["output_nodes"]),
-                    data_type,
-                    model_config["runtime"],
-                    model_name,
-                    ":".join(model_config["input_shapes"]),
-                    model_config["dsp_mode"],
-                    embed_model_data,
-                    model_config["fast_conv"],
-                    model_config["obfuscate"])
-
         if FLAGS.mode == "build" or FLAGS.mode == "all":
             build_mace_run_prod(hexagon_mode,
                                 model_config["runtime"],
@@ -609,9 +590,14 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
                                 model_config["limit_opencl_kernel_time"],
                                 phone_data_dir,
                                 FLAGS.enable_openmp)
+            sh_commands.build_benchmark_model(target_abi,
+                                              embed_model_data,
+                                              model_output_dir,
+                                              model_name,
+                                              hexagon_mode)
 
         if FLAGS.mode == "run" or FLAGS.mode == "validate" or \
-                FLAGS.mode == "all":
+           FLAGS.mode == "all":
             tuning_run(model_config["runtime"],
                        target_abi,
                        serialno,
@@ -647,7 +633,6 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
                                         model_config["output_shapes"],
                                         model_name,
                                         device_type,
-                                        hexagon_mode,
                                         phone_data_dir,
                                         FLAGS.omp_num_threads,
                                         FLAGS.cpu_affinity_policy,
@@ -738,12 +723,56 @@ def main(unused_args):
         # generate source
         sh_commands.gen_mace_version()
         sh_commands.gen_encrypted_opencl_source()
+        sh_commands.gen_mace_engine_creator_source(configs['models'].keys())
 
+    embed_model_data = configs["embed_model_data"]
     target_socs = get_target_socs(configs)
 
-    embed_model_data = configs["embed_model_data"]
     vlog_level = FLAGS.vlog_level
     phone_data_dir = "/data/local/tmp/mace_run/"
+
+    if FLAGS.mode == "build" or FLAGS.mode == "all":
+        print '* Model Convert'
+        sh_commands.clear_model_codegen()
+        for model_name in configs["models"]:
+            print '===================', model_name, '==================='
+            model_config = configs["models"][model_name]
+            data_type, device_type = get_data_and_device_type(
+                model_config["runtime"])
+
+            # Create model build directory
+            model_path_digest = md5sum(model_config["model_file_path"])
+
+            model_output_base_dir = "%s/%s/%s/%s/%s" % (
+                FLAGS.output_dir, project_name, "build",
+                model_name, model_path_digest)
+
+            if os.path.exists(model_output_base_dir):
+                sh.rm("-rf", model_output_base_dir)
+            os.makedirs(model_output_base_dir)
+
+            model_file_path, weight_file_path = get_model_files(
+                model_config["model_file_path"],
+                model_output_base_dir,
+                model_config["weight_file_path"])
+
+            sh_commands.gen_model_code(
+                "mace/codegen/models/%s" % model_name,
+                model_config["platform"],
+                model_file_path,
+                weight_file_path,
+                model_config["model_sha256_checksum"],
+                ",".join(model_config["input_nodes"]),
+                ",".join(model_config["output_nodes"]),
+                data_type,
+                model_config["runtime"],
+                model_name,
+                ":".join(model_config["input_shapes"]),
+                model_config["dsp_mode"],
+                embed_model_data,
+                model_config["fast_conv"],
+                model_config["obfuscate"])
+
     for target_abi in configs["target_abis"]:
         for target_soc in target_socs:
             if target_abi != 'host':
diff --git a/tools/sh_commands.py b/tools/sh_commands.py
index 01656d3f..97c89ea8 100644
--- a/tools/sh_commands.py
+++ b/tools/sh_commands.py
@@ -33,6 +33,7 @@ try:
     from binary_codegen import tuning_param_codegen
     from generate_data import generate_input_data
     from validate import validate
+    from mace_engine_generator import gen_mace_engine_creator
 except Exception as e:
     print("Import error:\n%s" % e)
     exit(1)
@@ -74,15 +75,15 @@ def is_device_locked(serialno):
 ################################
 # clear data
 ################################
-def clear_mace_run_data(abi,
-                        serialno,
-                        phone_data_dir,
-                        model_codegen_dir="mace/codegen/models"):
+def clear_phone_data_dir(abi, serialno, phone_data_dir):
     if abi != "host":
         sh.adb("-s",
                serialno,
                "shell",
                "rm -rf %s" % phone_data_dir)
+
+
+def clear_model_codegen(model_codegen_dir="mace/codegen/models"):
     if os.path.exists(model_codegen_dir):
         sh.rm("-rf", model_codegen_dir)
 
@@ -268,7 +269,6 @@ def adb_run_valgrind(serialno,
 def bazel_build(target,
                 strip="always",
                 abi="armeabi-v7a",
-                model_tag="",
                 production_mode=False,
                 hexagon_mode=False,
                 disable_no_tuning_warning=False,
@@ -289,7 +289,6 @@ def bazel_build(target,
             "--copt=-std=c++11",
             "--copt=-D_GLIBCXX_USE_C99_MATH_TR1",
             "--copt=-Werror=return-type",
-            "--copt=-DMACE_MODEL_TAG=%s" % model_tag,
             "--copt=-O3",
             "--define",
             "openmp=%s" % str(enable_openmp).lower(),
@@ -315,7 +314,6 @@ def bazel_build(target,
             "--copt=-D_GLIBCXX_USE_C99_MATH_TR1",
             "--copt=-Werror=return-type",
             "--copt=-DMACE_OBFUSCATE_LITERALS",
-            "--copt=-DMACE_MODEL_TAG=%s" % model_tag,
             "--copt=-O3",
             "--define",
             "neon=true",
@@ -371,7 +369,21 @@ def gen_encrypted_opencl_source(codegen_path="mace/codegen"):
                            "mace/codegen/opencl/opencl_encrypt_program.cc")
 
 
-def pull_binaries(abi, serialno, model_output_dirs):
+def gen_mace_engine_creator_source(model_tags, codegen_path="mace/codegen"):
+    print("* Genearte mace engine creator source")
+    codegen_tools_dir = "%s/engine" % codegen_path
+    sh.rm("-rf", codegen_tools_dir)
+    sh.mkdir("-p", codegen_tools_dir)
+    gen_mace_engine_creator(
+        model_tags,
+        "mace/python/tools",
+        codegen_tools_dir)
+    print("Genearte mace engine creator source done!\n")
+
+
+def pull_binaries(abi, serialno, model_output_dirs,
+                  cl_built_kernel_file_name,
+                  cl_platform_info_file_name):
     compiled_opencl_dir = "/data/local/tmp/mace_run/cl_program/"
     mace_run_param_file = "mace_run.config"
 
@@ -385,15 +397,18 @@ def pull_binaries(abi, serialno, model_output_dirs):
             sh.rm("-rf", cl_bin_dir)
         sh.mkdir("-p", cl_bin_dir)
         if abi != "host":
-            adb_pull(compiled_opencl_dir, cl_bin_dir, serialno)
+            adb_pull(compiled_opencl_dir + cl_built_kernel_file_name,
+                     cl_bin_dir, serialno)
+            adb_pull(compiled_opencl_dir + cl_platform_info_file_name,
+                     cl_bin_dir, serialno)
             adb_pull("/data/local/tmp/mace_run/%s" % mace_run_param_file,
                      cl_bin_dir, serialno)
 
 
 def gen_opencl_binary_code(model_output_dirs,
+                           cl_built_kernel_file_name,
+                           cl_platform_info_file_name,
                            codegen_path="mace/codegen"):
-    cl_built_kernel_file_name = "mace_cl_compiled_program.bin"
-    cl_platform_info_file_name = "mace_cl_platform_info.txt"
     opencl_codegen_file = "%s/opencl/opencl_compiled_program.cc" % codegen_path
 
     cl_bin_dirs = []
@@ -528,25 +543,8 @@ def gen_random_input(model_output_dir,
 
 
 def update_mace_run_lib(model_output_dir,
-                        abi,
                         model_tag,
-                        embed_model_data,
-                        generated_model_lib_dir="bazel-bin/mace/codegen/"):
-    model_lib_path = model_output_dir + "/libmace_%s.a" % model_tag
-    if abi == "host":
-        bazel_build(
-                "//mace/codegen:generated_models",
-                abi=abi,
-                model_tag=model_tag)
-        generated_model_lib_name = "libgenerated_models.pic.a"
-    else:
-        generated_model_lib_name = "libgenerated_models.a"
-
-    if os.path.exists(model_lib_path):
-        sh.rm("-rf", model_lib_path)
-    sh.cp("-f", generated_model_lib_dir + "/" + generated_model_lib_name,
-          model_lib_path)
-
+                        embed_model_data):
     mace_run_filepath = model_output_dir + "/mace_run"
     if os.path.exists(mace_run_filepath):
         sh.rm("-rf", mace_run_filepath)
@@ -560,6 +558,11 @@ def update_mace_run_lib(model_output_dir,
           model_output_dir)
 
 
+def create_compiled_opencl_dir(serialno):
+    compiled_opencl_dir = "/data/local/tmp/mace_run/cl_program/"
+    sh.adb("-s", serialno, "shell", "mkdir", "-p", compiled_opencl_dir)
+
+
 def tuning_run(abi,
                serialno,
                vlog_level,
@@ -598,6 +601,7 @@ def tuning_run(abi,
                 "env",
                 "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
                 "%s/mace_run" % model_output_dir,
+                "--model_tag=%s" % model_tag,
                 "--input_node=%s" % ",".join(input_nodes),
                 "--output_node=%s" % ",".join(output_nodes),
                 "--input_shape=%s" % ":".join(input_shapes),
@@ -622,8 +626,7 @@ def tuning_run(abi,
         return stdout
     else:
         sh.adb("-s", serialno, "shell", "mkdir", "-p", phone_data_dir)
-        compiled_opencl_dir = "/data/local/tmp/mace_run/cl_program/"
-        sh.adb("-s", serialno, "shell", "mkdir", "-p", compiled_opencl_dir)
+        create_compiled_opencl_dir(serialno)
 
         for input_name in input_nodes:
             formatted_name = common.formatted_file_name(input_file_name,
@@ -657,6 +660,7 @@ def tuning_run(abi,
             ])
         adb_cmd.extend([
             "%s/mace_run" % phone_data_dir,
+            "--model_tag=%s" % model_tag,
             "--input_node=%s" % ",".join(input_nodes),
             "--output_node=%s" % ",".join(output_nodes),
             "--input_shape=%s" % ":".join(input_shapes),
@@ -846,6 +850,12 @@ def merge_libs(target_soc,
         mri_stream += (
                 "addlib "
                 "bazel-bin/mace/codegen/libgenerated_tuning_params.pic.a\n")
+        mri_stream += (
+            "addlib "
+            "bazel-bin/mace/codegen/libgenerated_models.pic.a\n")
+        mri_stream += (
+            "addlib "
+            "bazel-bin/mace/codegen/libgenerated_mace_engine_creator.pic.a\n")
     else:
         mri_stream += "create %s/libmace_%s.%s.a\n" % \
                       (model_bin_dir, project_name, target_soc)
@@ -858,6 +868,12 @@ def merge_libs(target_soc,
         mri_stream += (
                 "addlib "
                 "bazel-bin/mace/codegen/libgenerated_version.a\n")
+        mri_stream += (
+                "addlib "
+                "bazel-bin/mace/codegen/libgenerated_models.a\n")
+        mri_stream += (
+                "addlib "
+                "bazel-bin/mace/codegen/libgenerated_mace_engine_creator.a\n")
         mri_stream += (
                 "addlib "
                 "bazel-bin/mace/core/libcore.a\n")
@@ -875,8 +891,6 @@ def merge_libs(target_soc,
                 "bazel-bin/mace/ops/libops.lo\n")
 
     for model_output_dir in model_output_dirs:
-        for lib in sh.ls(glob.glob("%s/*.a" % model_output_dir), "-1"):
-            mri_stream += "addlib %s\n" % lib
         if not embed_model_data:
             sh.cp("-f", glob.glob("%s/*.data" % model_output_dir),
                   model_data_dir)
@@ -921,6 +935,28 @@ def packaging_lib(libmace_output_dir, project_name):
     print("Packaging Done!\n")
 
 
+def build_benchmark_model(abi,
+                          embed_model_data,
+                          model_output_dir,
+                          model_tag,
+                          hexagon_mode):
+    benchmark_binary_file = "%s/benchmark_model" % model_output_dir
+    if os.path.exists(benchmark_binary_file):
+        sh.rm("-rf", benchmark_binary_file)
+    if not embed_model_data:
+        sh.cp("-f", "mace/codegen/models/%s/%s.data" % (model_tag, model_tag),
+              model_output_dir)
+
+    benchmark_target = "//mace/benchmark:benchmark_model"
+    bazel_build(benchmark_target,
+                abi=abi,
+                production_mode=True,
+                hexagon_mode=hexagon_mode)
+
+    target_bin = "/".join(bazel_target_to_bin(benchmark_target))
+    sh.cp("-f", target_bin, model_output_dir)
+
+
 def benchmark_model(abi,
                     serialno,
                     vlog_level,
@@ -932,31 +968,13 @@ def benchmark_model(abi,
                     output_shapes,
                     model_tag,
                     device_type,
-                    hexagon_mode,
                     phone_data_dir,
                     omp_num_threads=-1,
                     cpu_affinity_policy=1,
                     gpu_perf_hint=3,
                     gpu_priority_hint=3,
-                    input_file_name="model_input",
-                    output_file_name="model_out"):
+                    input_file_name="model_input"):
     print("* Benchmark for %s" % model_tag)
-    benchmark_binary_file = "%s/benchmark_model" % model_output_dir
-    if os.path.exists(benchmark_binary_file):
-        sh.rm("-rf", benchmark_binary_file)
-    if not embed_model_data:
-        sh.cp("-f", "mace/codegen/models/%s/%s.data" % (model_tag, model_tag),
-              model_output_dir)
-
-    benchmark_target = "//mace/benchmark:benchmark_model"
-    bazel_build(benchmark_target,
-                abi=abi,
-                model_tag=model_tag,
-                production_mode=True,
-                hexagon_mode=hexagon_mode)
-
-    target_bin = "/".join(bazel_target_to_bin(benchmark_target))
-    sh.cp("-f", target_bin, model_output_dir)
 
     stdout_buff = []
     process_output = make_output_processor(stdout_buff)
@@ -966,6 +984,7 @@ def benchmark_model(abi,
                 "env",
                 "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
                 "%s/benchmark_model" % model_output_dir,
+                "--model_tag=%s" % model_tag,
                 "--input_node=%s" % ",".join(input_nodes),
                 "--output_node=%s" % ",".join(output_nodes),
                 "--input_shape=%s" % ":".join(input_shapes),
@@ -981,6 +1000,7 @@ def benchmark_model(abi,
         p.wait()
     else:
         sh.adb("-s", serialno, "shell", "mkdir", "-p", phone_data_dir)
+        create_compiled_opencl_dir(serialno)
 
         for input_name in input_nodes:
             formatted_name = common.formatted_file_name(input_file_name,
@@ -1002,6 +1022,7 @@ def benchmark_model(abi,
             phone_data_dir,
             "MACE_OPENCL_PROFILING=1",
             "%s/benchmark_model" % phone_data_dir,
+            "--model_tag=%s" % model_tag,
             "--input_node=%s" % ",".join(input_nodes),
             "--output_node=%s" % ",".join(output_nodes),
             "--input_shape=%s" % ":".join(input_shapes),
-- 
GitLab