Add model benchmark metrics

4b6dc13c · Liangliang He · db75d542 · 4b6dc13c · 4b6dc13c · 4b6dc13c
Showing with 91 addition and 39 deletion

mace/examples/mace_run.cc mace/examples/mace_run.cc +25 -13

tools/bazel_adb_run.py tools/bazel_adb_run.py +8 -2

tools/mace_tools.py tools/mace_tools.py +46 -15

tools/sh_commands.py tools/sh_commands.py +12 -9

未找到文件。
--- a/mace/examples/mace_run.cc
+++ b/mace/examples/mace_run.cc
@@ -16,6 +16,7 @@
 */
 #include <malloc.h>
 #include <stdint.h>
+#include <cstdio>
 #include <cstdlib>
 #include <fstream>
 #include <iostream>
@@ -189,8 +190,8 @@ bool RunModel(const std::vector<std::string> &input_names,
      mace::MACE_MODEL_TAG::LoadModelData(FLAGS_model_data_file.c_str());
  NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(model_data);
  int64_t t1 = NowMicros();
-  LOG(INFO) << "CreateNetDef latency: " << t1 - t0 << " us";
-  int64_t init_micros = t1 - t0;
+  double create_net_millis = (t1 - t0) / 1000.0;
+  LOG(INFO) << "CreateNetDef latency: " << create_net_millis << " ms";

  DeviceType device_type = ParseDeviceType(FLAGS_device);
  LOG(INFO) << "Runing with device type: " << device_type;
@@ -207,15 +208,16 @@ bool RunModel(const std::vector<std::string> &input_names,

  // Init model
  LOG(INFO) << "Run init";
-  t0 = NowMicros();
  mace::MaceEngine engine(&net_def, device_type, input_names, output_names);
  if (device_type == DeviceType::OPENCL || device_type == DeviceType::HEXAGON) {
    mace::MACE_MODEL_TAG::UnloadModelData(model_data);
  }
-  t1 = NowMicros();
-  init_micros += t1 - t0;
-  LOG(INFO) << "Net init latency: " << t1 - t0 << " us";
-  LOG(INFO) << "Total init latency: " << init_micros << " us";
+  int64_t t2 = NowMicros();
+  double mace_engine_ctor_millis = (t2 - t1) / 1000.0;
+  double init_millis = (t2 - t0) / 1000.0;
+  LOG(INFO) << "MaceEngine constructor latency: "
+            << mace_engine_ctor_millis << " ms";
+  LOG(INFO) << "Total init latency: " << init_millis << " ms";

  const size_t input_count = input_names.size();
  const size_t output_count = output_names.size();
@@ -253,14 +255,16 @@ bool RunModel(const std::vector<std::string> &input_names,
  }

  LOG(INFO) << "Warm up run";
-  t0 = NowMicros();
+  int64_t t3 = NowMicros();
  engine.Run(inputs, &outputs);
-  t1 = NowMicros();
-  LOG(INFO) << "1st warm up run latency: " << t1 - t0 << " us";
+  int64_t t4 = NowMicros();
+  double warmup_millis = (t4 - t3) / 1000.0;
+  LOG(INFO) << "1st warm up run latency: " << warmup_millis << " ms";

+  double model_run_millis = -1;
  if (FLAGS_round > 0) {
    LOG(INFO) << "Run model";
-    t0 = NowMicros();
+    int64_t t0 = NowMicros();
    struct mallinfo prev = mallinfo();
    for (int i = 0; i < FLAGS_round; ++i) {
      engine.Run(inputs, &outputs);
@@ -269,10 +273,18 @@ bool RunModel(const std::vector<std::string> &input_names,
        prev = LogMallinfoChange(prev);
      }
    }
-    t1 = NowMicros();
-    LOG(INFO) << "Average latency: " << (t1 - t0) / FLAGS_round << " us";
+    int64_t t1 = NowMicros();
+    model_run_millis = (t1 - t0) / 1000.0 / FLAGS_round;
+    LOG(INFO) << "Average latency: " << model_run_millis << " ms";
  }

+  // Metrics reporting tools depends on the format, keep in consistent
+  printf("================================================================\n");
+  printf("      create_net engine_ctor        init      warmup     run_avg\n");
+  printf("================================================================\n");
+  printf("time %11.3f %11.3f %11.3f %11.3f %11.3f\n", create_net_millis,
+         mace_engine_ctor_millis, init_millis, warmup_millis, model_run_millis);
+
  for (size_t i = 0; i < output_count; ++i) {
    std::string output_name =
        FLAGS_output_file + "_" + FormatName(output_names[i]);

--- a/tools/bazel_adb_run.py
+++ b/tools/bazel_adb_run.py
@@ -33,10 +33,16 @@ def ops_benchmark_stdout_processor(stdout, device_properties, abi):
    line = line.strip()
    parts = line.split()
    if len(parts) == 5 and parts[0].startswith("BM_"):
-      metrics["%s.time_ms" % parts[0]] = str(float(parts[1])/1000000.0)
+      metrics["%s.time_ms" % parts[0]] = str(float(parts[1])/1e6)
      metrics["%s.input_mb_per_sec" % parts[0]] = parts[3]
      metrics["%s.gmacc_per_sec" % parts[0]] = parts[4]
-  sh_commands.falcon_push_metrics(metrics, device_properties, abi,
+
+  platform = device_properties["ro.board.platform"].replace(" ", "-")
+  model = device_properties["ro.product.model"].replace(" ", "-")
+  tags = {"ro.board.platform": platform,
+          "ro.product.model": model,
+          "abi": abi}
+  sh_commands.falcon_push_metrics(metrics, tags=tags,
                                  endpoint="mace_ops_benchmark")

 def parse_args():

--- a/tools/mace_tools.py
+++ b/tools/mace_tools.py
@@ -9,6 +9,7 @@
 import argparse
 import hashlib
 import os
+import sh
 import shutil
 import subprocess
 import sys
@@ -111,18 +112,42 @@ def build_mace_run(production_mode, model_output_dir, hexagon_mode):
  run_command(command)


-def tuning_run(target_soc,
+def tuning_run(model_name,
+               target_runtime,
+               target_abi,
+               target_soc,
               model_output_dir,
               running_round,
               tuning,
               production_mode,
               restart_round,
               option_args=''):
-  command = "bash tools/tuning_run.sh {} {} {} {} {} {} \"{}\"".format(
-      target_soc, model_output_dir, running_round, int(tuning),
-      int(production_mode), restart_round, option_args)
-  run_command(command)
-
+  # TODO(yejianwu) refactoring the hackish code
+  stdout_buff = []
+  process_output = sh_commands.make_output_processor(stdout_buff)
+  p = sh.bash("tools/tuning_run.sh", target_soc, model_output_dir,
+              running_round, int(tuning), int(production_mode),
+              restart_round, option_args, _out=process_output,
+              _bg=True, _err_to_out=True)
+  p.wait()
+  metrics = {}
+  for line in stdout_buff:
+    line = line.strip()
+    parts = line.split()
+    if len(parts) == 6 and parts[0].startswith("time"):
+      metrics["%s.create_net_ms" % model_name] = str(float(parts[1]))
+      metrics["%s.mace_engine_ctor_ms" % model_name] = str(float(parts[2]))
+      metrics["%s.init_ms" % model_name] = str(float(parts[3]))
+      metrics["%s.warmup_ms" % model_name] = str(float(parts[4]))
+      if float(parts[5]) > 0:
+        metrics["%s.avg_latency_ms" % model_name] = str(float(parts[5]))
+  tags = {"ro.board.platform": target_soc,
+          "abi": target_abi,
+          # "runtime": target_runtime, # TODO(yejianwu) Add the actual runtime
+          "round": running_round, # TODO(yejianwu) change this to source/binary
+          "tuning": tuning}
+  sh_commands.falcon_push_metrics(metrics, endpoint="mace_model_benchmark",
+                                  tags=tags)

 def benchmark_model(target_soc, model_output_dir, option_args=''):
  command = "bash tools/benchmark.sh {} {} \"{}\"".format(
@@ -130,9 +155,10 @@ def benchmark_model(target_soc, model_output_dir, option_args=''):
  run_command(command)


-def run_model(target_soc, model_output_dir, running_round, restart_round,
-              option_args):
-  tuning_run(target_soc, model_output_dir, running_round, False, False,
+def run_model(model_name, target_runtime, target_abi, target_soc,
+              model_output_dir, running_round, restart_round, option_args):
+  tuning_run(model_name, target_runtime, target_abi, target_soc,
+             model_output_dir, running_round, False, False,
             restart_round, option_args)


@@ -146,8 +172,9 @@ def generate_production_code(target_soc, model_output_dirs, pull_or_not):
  run_command(command)


-def build_mace_run_prod(target_soc, model_output_dir, tuning, global_runtime):
-  if "dsp" == global_runtime:
+def build_mace_run_prod(model_name, target_runtime, target_abi, target_soc,
+                        model_output_dir, tuning):
+  if "dsp" == target_runtime:
    hexagon_mode = True
  else:
    hexagon_mode = False
@@ -155,6 +182,9 @@ def build_mace_run_prod(target_soc, model_output_dir, tuning, global_runtime):
  production_or_not = False
  build_mace_run(production_or_not, model_output_dir, hexagon_mode)
  tuning_run(
+      model_name,
+      target_runtime,
+      target_abi, 
      target_soc,
      model_output_dir,
      running_round=0,
@@ -346,12 +376,13 @@ def main(unused_args):

        if FLAGS.mode == "build" or FLAGS.mode == "all":
          generate_model_code()
-          build_mace_run_prod(target_soc, model_output_dir, FLAGS.tuning,
-                              global_runtime)
+          build_mace_run_prod(model_name, global_runtime, target_abi,
+                              target_soc, model_output_dir, FLAGS.tuning)

        if FLAGS.mode == "run" or FLAGS.mode == "validate" or FLAGS.mode == "all":
-          run_model(target_soc, model_output_dir, FLAGS.round,
-                    FLAGS.restart_round, option_args)
+          run_model(model_name, global_runtime, target_abi, target_soc,
+                    model_output_dir, FLAGS.round, FLAGS.restart_round,
+                    option_args)

        if FLAGS.mode == "benchmark":
          benchmark_model(target_soc, model_output_dir, option_args)

--- a/tools/sh_commands.py
+++ b/tools/sh_commands.py
@@ -78,7 +78,7 @@ def adb_run(serialno, host_bin_path, bin_name,
  sh.adb("-s", serialno, "shell", "mkdir -p %s" % device_bin_path)
  sh.adb("-s", serialno, "shell", "mkdir -p %s" % device_cl_path)
  print("Push %s to %s" % (host_bin_full_path, device_bin_full_path))
-  sh.adb("-s", serialno, "push", host_bin_full_path, device_bin_path)
+  sh.adb("-s", serialno, "push", host_bin_full_path, device_bin_full_path)
  print("Run %s" % device_bin_full_path)
  stdout_buff=[]
  process_output = make_output_processor(stdout_buff)
@@ -142,21 +142,24 @@ def gen_mace_version(codegen_path="mace/codegen"):
 ################################
 # falcon
 ################################
-def falcon_tags(platform, model, abi):
-  return "ro.board.platform=%s,ro.product.model=%s,abi=%s" % (platform, model, abi)
-
-def falcon_push_metrics(metrics, device_properties, abi, endpoint="mace_dev"):
+def falcon_tags(tags_dict):
+  tags = ""
+  for k, v in tags_dict.iteritems():
+    if tags == "":
+      tags = "%s=%s" % (k, v)
+    else:
+      tags = tags + ",%s=%s" % (k, v)
+  return tags
+
+def falcon_push_metrics(metrics, endpoint="mace_dev", tags={}):
  cli = falcon_cli.FalconCli.connect(server="transfer.falcon.miliao.srv",
                                     port=8433,
                                     debug=False)
-  platform = device_properties["ro.board.platform"].replace(" ", "-")
-  model = device_properties["ro.product.model"].replace(" ", "-")
-  tags = falcon_tags(platform, model, abi)
  ts = int(time.time())
  falcon_metrics = [{
      "endpoint": endpoint,
      "metric": key,
-      "tags": tags,
+      "tags": falcon_tags(tags),
      "timestamp": ts,
      "value": value,
      "step": 86400,