From 4b6dc13c6a9cb78b8a40e5e232b1f2b597b6181d Mon Sep 17 00:00:00 2001
From: Liangliang He <lliang.he@gmail.com>
Date: Wed, 4 Apr 2018 16:33:16 +0800
Subject: [PATCH] Add model benchmark metrics

---
 mace/examples/mace_run.cc | 38 +++++++++++++++---------
 tools/bazel_adb_run.py    | 10 +++++--
 tools/mace_tools.py       | 61 +++++++++++++++++++++++++++++----------
 tools/sh_commands.py      | 21 ++++++++------
 4 files changed, 91 insertions(+), 39 deletions(-)
diff --git a/mace/examples/mace_run.cc b/mace/examples/mace_run.cc
index f71ca08d..207cfbec 100644
--- a/mace/examples/mace_run.cc
+++ b/mace/examples/mace_run.cc
@@ -16,6 +16,7 @@
  */
 #include <malloc.h>
 #include <stdint.h>
+#include <cstdio>
 #include <cstdlib>
 #include <fstream>
 #include <iostream>
@@ -189,8 +190,8 @@ bool RunModel(const std::vector<std::string> &input_names,
       mace::MACE_MODEL_TAG::LoadModelData(FLAGS_model_data_file.c_str());
   NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(model_data);
   int64_t t1 = NowMicros();
-  LOG(INFO) << "CreateNetDef latency: " << t1 - t0 << " us";
-  int64_t init_micros = t1 - t0;
+  double create_net_millis = (t1 - t0) / 1000.0;
+  LOG(INFO) << "CreateNetDef latency: " << create_net_millis << " ms";
 
   DeviceType device_type = ParseDeviceType(FLAGS_device);
   LOG(INFO) << "Runing with device type: " << device_type;
@@ -207,15 +208,16 @@ bool RunModel(const std::vector<std::string> &input_names,
 
   // Init model
   LOG(INFO) << "Run init";
-  t0 = NowMicros();
   mace::MaceEngine engine(&net_def, device_type, input_names, output_names);
   if (device_type == DeviceType::OPENCL || device_type == DeviceType::HEXAGON) {
     mace::MACE_MODEL_TAG::UnloadModelData(model_data);
   }
-  t1 = NowMicros();
-  init_micros += t1 - t0;
-  LOG(INFO) << "Net init latency: " << t1 - t0 << " us";
-  LOG(INFO) << "Total init latency: " << init_micros << " us";
+  int64_t t2 = NowMicros();
+  double mace_engine_ctor_millis = (t2 - t1) / 1000.0;
+  double init_millis = (t2 - t0) / 1000.0;
+  LOG(INFO) << "MaceEngine constructor latency: "
+            << mace_engine_ctor_millis << " ms";
+  LOG(INFO) << "Total init latency: " << init_millis << " ms";
 
   const size_t input_count = input_names.size();
   const size_t output_count = output_names.size();
@@ -253,14 +255,16 @@ bool RunModel(const std::vector<std::string> &input_names,
   }
 
   LOG(INFO) << "Warm up run";
-  t0 = NowMicros();
+  int64_t t3 = NowMicros();
   engine.Run(inputs, &outputs);
-  t1 = NowMicros();
-  LOG(INFO) << "1st warm up run latency: " << t1 - t0 << " us";
+  int64_t t4 = NowMicros();
+  double warmup_millis = (t4 - t3) / 1000.0;
+  LOG(INFO) << "1st warm up run latency: " << warmup_millis << " ms";
 
+  double model_run_millis = -1;
   if (FLAGS_round > 0) {
     LOG(INFO) << "Run model";
-    t0 = NowMicros();
+    int64_t t0 = NowMicros();
     struct mallinfo prev = mallinfo();
     for (int i = 0; i < FLAGS_round; ++i) {
       engine.Run(inputs, &outputs);
@@ -269,10 +273,18 @@ bool RunModel(const std::vector<std::string> &input_names,
         prev = LogMallinfoChange(prev);
       }
     }
-    t1 = NowMicros();
-    LOG(INFO) << "Average latency: " << (t1 - t0) / FLAGS_round << " us";
+    int64_t t1 = NowMicros();
+    model_run_millis = (t1 - t0) / 1000.0 / FLAGS_round;
+    LOG(INFO) << "Average latency: " << model_run_millis << " ms";
   }
 
+  // Metrics reporting tools depends on the format, keep in consistent
+  printf("================================================================\n");
+  printf("      create_net engine_ctor        init      warmup     run_avg\n");
+  printf("================================================================\n");
+  printf("time %11.3f %11.3f %11.3f %11.3f %11.3f\n", create_net_millis,
+         mace_engine_ctor_millis, init_millis, warmup_millis, model_run_millis);
+
   for (size_t i = 0; i < output_count; ++i) {
     std::string output_name =
         FLAGS_output_file + "_" + FormatName(output_names[i]);
diff --git a/tools/bazel_adb_run.py b/tools/bazel_adb_run.py
index 6e083de2..ea0fe7dc 100644
--- a/tools/bazel_adb_run.py
+++ b/tools/bazel_adb_run.py
@@ -33,10 +33,16 @@ def ops_benchmark_stdout_processor(stdout, device_properties, abi):
     line = line.strip()
     parts = line.split()
     if len(parts) == 5 and parts[0].startswith("BM_"):
-      metrics["%s.time_ms" % parts[0]] = str(float(parts[1])/1000000.0)
+      metrics["%s.time_ms" % parts[0]] = str(float(parts[1])/1e6)
       metrics["%s.input_mb_per_sec" % parts[0]] = parts[3]
       metrics["%s.gmacc_per_sec" % parts[0]] = parts[4]
-  sh_commands.falcon_push_metrics(metrics, device_properties, abi,
+
+  platform = device_properties["ro.board.platform"].replace(" ", "-")
+  model = device_properties["ro.product.model"].replace(" ", "-")
+  tags = {"ro.board.platform": platform,
+          "ro.product.model": model,
+          "abi": abi}
+  sh_commands.falcon_push_metrics(metrics, tags=tags,
                                   endpoint="mace_ops_benchmark")
 
 def parse_args():
diff --git a/tools/mace_tools.py b/tools/mace_tools.py
index f9dd9181..a00b15cf 100644
--- a/tools/mace_tools.py
+++ b/tools/mace_tools.py
@@ -9,6 +9,7 @@
 import argparse
 import hashlib
 import os
+import sh
 import shutil
 import subprocess
 import sys
@@ -111,18 +112,42 @@ def build_mace_run(production_mode, model_output_dir, hexagon_mode):
   run_command(command)
 
 
-def tuning_run(target_soc,
+def tuning_run(model_name,
+               target_runtime,
+               target_abi,
+               target_soc,
                model_output_dir,
                running_round,
                tuning,
                production_mode,
                restart_round,
                option_args=''):
-  command = "bash tools/tuning_run.sh {} {} {} {} {} {} \"{}\"".format(
-      target_soc, model_output_dir, running_round, int(tuning),
-      int(production_mode), restart_round, option_args)
-  run_command(command)
-
+  # TODO(yejianwu) refactoring the hackish code
+  stdout_buff = []
+  process_output = sh_commands.make_output_processor(stdout_buff)
+  p = sh.bash("tools/tuning_run.sh", target_soc, model_output_dir,
+              running_round, int(tuning), int(production_mode),
+              restart_round, option_args, _out=process_output,
+              _bg=True, _err_to_out=True)
+  p.wait()
+  metrics = {}
+  for line in stdout_buff:
+    line = line.strip()
+    parts = line.split()
+    if len(parts) == 6 and parts[0].startswith("time"):
+      metrics["%s.create_net_ms" % model_name] = str(float(parts[1]))
+      metrics["%s.mace_engine_ctor_ms" % model_name] = str(float(parts[2]))
+      metrics["%s.init_ms" % model_name] = str(float(parts[3]))
+      metrics["%s.warmup_ms" % model_name] = str(float(parts[4]))
+      if float(parts[5]) > 0:
+        metrics["%s.avg_latency_ms" % model_name] = str(float(parts[5]))
+  tags = {"ro.board.platform": target_soc,
+          "abi": target_abi,
+          # "runtime": target_runtime, # TODO(yejianwu) Add the actual runtime
+          "round": running_round, # TODO(yejianwu) change this to source/binary
+          "tuning": tuning}
+  sh_commands.falcon_push_metrics(metrics, endpoint="mace_model_benchmark",
+                                  tags=tags)
 
 def benchmark_model(target_soc, model_output_dir, option_args=''):
   command = "bash tools/benchmark.sh {} {} \"{}\"".format(
@@ -130,9 +155,10 @@ def benchmark_model(target_soc, model_output_dir, option_args=''):
   run_command(command)
 
 
-def run_model(target_soc, model_output_dir, running_round, restart_round,
-              option_args):
-  tuning_run(target_soc, model_output_dir, running_round, False, False,
+def run_model(model_name, target_runtime, target_abi, target_soc,
+              model_output_dir, running_round, restart_round, option_args):
+  tuning_run(model_name, target_runtime, target_abi, target_soc,
+             model_output_dir, running_round, False, False,
              restart_round, option_args)
 
 
@@ -146,8 +172,9 @@ def generate_production_code(target_soc, model_output_dirs, pull_or_not):
   run_command(command)
 
 
-def build_mace_run_prod(target_soc, model_output_dir, tuning, global_runtime):
-  if "dsp" == global_runtime:
+def build_mace_run_prod(model_name, target_runtime, target_abi, target_soc,
+                        model_output_dir, tuning):
+  if "dsp" == target_runtime:
     hexagon_mode = True
   else:
     hexagon_mode = False
@@ -155,6 +182,9 @@ def build_mace_run_prod(target_soc, model_output_dir, tuning, global_runtime):
   production_or_not = False
   build_mace_run(production_or_not, model_output_dir, hexagon_mode)
   tuning_run(
+      model_name,
+      target_runtime,
+      target_abi, 
       target_soc,
       model_output_dir,
       running_round=0,
@@ -346,12 +376,13 @@ def main(unused_args):
 
         if FLAGS.mode == "build" or FLAGS.mode == "all":
           generate_model_code()
-          build_mace_run_prod(target_soc, model_output_dir, FLAGS.tuning,
-                              global_runtime)
+          build_mace_run_prod(model_name, global_runtime, target_abi,
+                              target_soc, model_output_dir, FLAGS.tuning)
 
         if FLAGS.mode == "run" or FLAGS.mode == "validate" or FLAGS.mode == "all":
-          run_model(target_soc, model_output_dir, FLAGS.round,
-                    FLAGS.restart_round, option_args)
+          run_model(model_name, global_runtime, target_abi, target_soc,
+                    model_output_dir, FLAGS.round, FLAGS.restart_round,
+                    option_args)
 
         if FLAGS.mode == "benchmark":
           benchmark_model(target_soc, model_output_dir, option_args)
diff --git a/tools/sh_commands.py b/tools/sh_commands.py
index 4b47544f..a4a16a56 100644
--- a/tools/sh_commands.py
+++ b/tools/sh_commands.py
@@ -78,7 +78,7 @@ def adb_run(serialno, host_bin_path, bin_name,
   sh.adb("-s", serialno, "shell", "mkdir -p %s" % device_bin_path)
   sh.adb("-s", serialno, "shell", "mkdir -p %s" % device_cl_path)
   print("Push %s to %s" % (host_bin_full_path, device_bin_full_path))
-  sh.adb("-s", serialno, "push", host_bin_full_path, device_bin_path)
+  sh.adb("-s", serialno, "push", host_bin_full_path, device_bin_full_path)
   print("Run %s" % device_bin_full_path)
   stdout_buff=[]
   process_output = make_output_processor(stdout_buff)
@@ -142,21 +142,24 @@ def gen_mace_version(codegen_path="mace/codegen"):
 ################################
 # falcon
 ################################
-def falcon_tags(platform, model, abi):
-  return "ro.board.platform=%s,ro.product.model=%s,abi=%s" % (platform, model, abi)
-
-def falcon_push_metrics(metrics, device_properties, abi, endpoint="mace_dev"):
+def falcon_tags(tags_dict):
+  tags = ""
+  for k, v in tags_dict.iteritems():
+    if tags == "":
+      tags = "%s=%s" % (k, v)
+    else:
+      tags = tags + ",%s=%s" % (k, v)
+  return tags
+
+def falcon_push_metrics(metrics, endpoint="mace_dev", tags={}):
   cli = falcon_cli.FalconCli.connect(server="transfer.falcon.miliao.srv",
                                      port=8433,
                                      debug=False)
-  platform = device_properties["ro.board.platform"].replace(" ", "-")
-  model = device_properties["ro.product.model"].replace(" ", "-")
-  tags = falcon_tags(platform, model, abi)
   ts = int(time.time())
   falcon_metrics = [{
       "endpoint": endpoint,
       "metric": key,
-      "tags": tags,
+      "tags": falcon_tags(tags),
       "timestamp": ts,
       "value": value,
       "step": 86400,
-- 
GitLab