提交 4b6dc13c 编写于 作者: L Liangliang He

Add model benchmark metrics

上级 db75d542
......@@ -16,6 +16,7 @@
*/
#include <malloc.h>
#include <stdint.h>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <iostream>
......@@ -189,8 +190,8 @@ bool RunModel(const std::vector<std::string> &input_names,
mace::MACE_MODEL_TAG::LoadModelData(FLAGS_model_data_file.c_str());
NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(model_data);
int64_t t1 = NowMicros();
LOG(INFO) << "CreateNetDef latency: " << t1 - t0 << " us";
int64_t init_micros = t1 - t0;
double create_net_millis = (t1 - t0) / 1000.0;
LOG(INFO) << "CreateNetDef latency: " << create_net_millis << " ms";
DeviceType device_type = ParseDeviceType(FLAGS_device);
LOG(INFO) << "Runing with device type: " << device_type;
......@@ -207,15 +208,16 @@ bool RunModel(const std::vector<std::string> &input_names,
// Init model
LOG(INFO) << "Run init";
t0 = NowMicros();
mace::MaceEngine engine(&net_def, device_type, input_names, output_names);
if (device_type == DeviceType::OPENCL || device_type == DeviceType::HEXAGON) {
mace::MACE_MODEL_TAG::UnloadModelData(model_data);
}
t1 = NowMicros();
init_micros += t1 - t0;
LOG(INFO) << "Net init latency: " << t1 - t0 << " us";
LOG(INFO) << "Total init latency: " << init_micros << " us";
int64_t t2 = NowMicros();
double mace_engine_ctor_millis = (t2 - t1) / 1000.0;
double init_millis = (t2 - t0) / 1000.0;
LOG(INFO) << "MaceEngine constructor latency: "
<< mace_engine_ctor_millis << " ms";
LOG(INFO) << "Total init latency: " << init_millis << " ms";
const size_t input_count = input_names.size();
const size_t output_count = output_names.size();
......@@ -253,14 +255,16 @@ bool RunModel(const std::vector<std::string> &input_names,
}
LOG(INFO) << "Warm up run";
t0 = NowMicros();
int64_t t3 = NowMicros();
engine.Run(inputs, &outputs);
t1 = NowMicros();
LOG(INFO) << "1st warm up run latency: " << t1 - t0 << " us";
int64_t t4 = NowMicros();
double warmup_millis = (t4 - t3) / 1000.0;
LOG(INFO) << "1st warm up run latency: " << warmup_millis << " ms";
double model_run_millis = -1;
if (FLAGS_round > 0) {
LOG(INFO) << "Run model";
t0 = NowMicros();
int64_t t0 = NowMicros();
struct mallinfo prev = mallinfo();
for (int i = 0; i < FLAGS_round; ++i) {
engine.Run(inputs, &outputs);
......@@ -269,10 +273,18 @@ bool RunModel(const std::vector<std::string> &input_names,
prev = LogMallinfoChange(prev);
}
}
t1 = NowMicros();
LOG(INFO) << "Average latency: " << (t1 - t0) / FLAGS_round << " us";
int64_t t1 = NowMicros();
model_run_millis = (t1 - t0) / 1000.0 / FLAGS_round;
LOG(INFO) << "Average latency: " << model_run_millis << " ms";
}
// Metrics reporting tools depends on the format, keep in consistent
printf("================================================================\n");
printf(" create_net engine_ctor init warmup run_avg\n");
printf("================================================================\n");
printf("time %11.3f %11.3f %11.3f %11.3f %11.3f\n", create_net_millis,
mace_engine_ctor_millis, init_millis, warmup_millis, model_run_millis);
for (size_t i = 0; i < output_count; ++i) {
std::string output_name =
FLAGS_output_file + "_" + FormatName(output_names[i]);
......
......@@ -33,10 +33,16 @@ def ops_benchmark_stdout_processor(stdout, device_properties, abi):
line = line.strip()
parts = line.split()
if len(parts) == 5 and parts[0].startswith("BM_"):
metrics["%s.time_ms" % parts[0]] = str(float(parts[1])/1000000.0)
metrics["%s.time_ms" % parts[0]] = str(float(parts[1])/1e6)
metrics["%s.input_mb_per_sec" % parts[0]] = parts[3]
metrics["%s.gmacc_per_sec" % parts[0]] = parts[4]
sh_commands.falcon_push_metrics(metrics, device_properties, abi,
platform = device_properties["ro.board.platform"].replace(" ", "-")
model = device_properties["ro.product.model"].replace(" ", "-")
tags = {"ro.board.platform": platform,
"ro.product.model": model,
"abi": abi}
sh_commands.falcon_push_metrics(metrics, tags=tags,
endpoint="mace_ops_benchmark")
def parse_args():
......
......@@ -9,6 +9,7 @@
import argparse
import hashlib
import os
import sh
import shutil
import subprocess
import sys
......@@ -111,18 +112,42 @@ def build_mace_run(production_mode, model_output_dir, hexagon_mode):
run_command(command)
def tuning_run(target_soc,
def tuning_run(model_name,
target_runtime,
target_abi,
target_soc,
model_output_dir,
running_round,
tuning,
production_mode,
restart_round,
option_args=''):
command = "bash tools/tuning_run.sh {} {} {} {} {} {} \"{}\"".format(
target_soc, model_output_dir, running_round, int(tuning),
int(production_mode), restart_round, option_args)
run_command(command)
# TODO(yejianwu) refactoring the hackish code
stdout_buff = []
process_output = sh_commands.make_output_processor(stdout_buff)
p = sh.bash("tools/tuning_run.sh", target_soc, model_output_dir,
running_round, int(tuning), int(production_mode),
restart_round, option_args, _out=process_output,
_bg=True, _err_to_out=True)
p.wait()
metrics = {}
for line in stdout_buff:
line = line.strip()
parts = line.split()
if len(parts) == 6 and parts[0].startswith("time"):
metrics["%s.create_net_ms" % model_name] = str(float(parts[1]))
metrics["%s.mace_engine_ctor_ms" % model_name] = str(float(parts[2]))
metrics["%s.init_ms" % model_name] = str(float(parts[3]))
metrics["%s.warmup_ms" % model_name] = str(float(parts[4]))
if float(parts[5]) > 0:
metrics["%s.avg_latency_ms" % model_name] = str(float(parts[5]))
tags = {"ro.board.platform": target_soc,
"abi": target_abi,
# "runtime": target_runtime, # TODO(yejianwu) Add the actual runtime
"round": running_round, # TODO(yejianwu) change this to source/binary
"tuning": tuning}
sh_commands.falcon_push_metrics(metrics, endpoint="mace_model_benchmark",
tags=tags)
def benchmark_model(target_soc, model_output_dir, option_args=''):
command = "bash tools/benchmark.sh {} {} \"{}\"".format(
......@@ -130,9 +155,10 @@ def benchmark_model(target_soc, model_output_dir, option_args=''):
run_command(command)
def run_model(target_soc, model_output_dir, running_round, restart_round,
option_args):
tuning_run(target_soc, model_output_dir, running_round, False, False,
def run_model(model_name, target_runtime, target_abi, target_soc,
model_output_dir, running_round, restart_round, option_args):
tuning_run(model_name, target_runtime, target_abi, target_soc,
model_output_dir, running_round, False, False,
restart_round, option_args)
......@@ -146,8 +172,9 @@ def generate_production_code(target_soc, model_output_dirs, pull_or_not):
run_command(command)
def build_mace_run_prod(target_soc, model_output_dir, tuning, global_runtime):
if "dsp" == global_runtime:
def build_mace_run_prod(model_name, target_runtime, target_abi, target_soc,
model_output_dir, tuning):
if "dsp" == target_runtime:
hexagon_mode = True
else:
hexagon_mode = False
......@@ -155,6 +182,9 @@ def build_mace_run_prod(target_soc, model_output_dir, tuning, global_runtime):
production_or_not = False
build_mace_run(production_or_not, model_output_dir, hexagon_mode)
tuning_run(
model_name,
target_runtime,
target_abi,
target_soc,
model_output_dir,
running_round=0,
......@@ -346,12 +376,13 @@ def main(unused_args):
if FLAGS.mode == "build" or FLAGS.mode == "all":
generate_model_code()
build_mace_run_prod(target_soc, model_output_dir, FLAGS.tuning,
global_runtime)
build_mace_run_prod(model_name, global_runtime, target_abi,
target_soc, model_output_dir, FLAGS.tuning)
if FLAGS.mode == "run" or FLAGS.mode == "validate" or FLAGS.mode == "all":
run_model(target_soc, model_output_dir, FLAGS.round,
FLAGS.restart_round, option_args)
run_model(model_name, global_runtime, target_abi, target_soc,
model_output_dir, FLAGS.round, FLAGS.restart_round,
option_args)
if FLAGS.mode == "benchmark":
benchmark_model(target_soc, model_output_dir, option_args)
......
......@@ -78,7 +78,7 @@ def adb_run(serialno, host_bin_path, bin_name,
sh.adb("-s", serialno, "shell", "mkdir -p %s" % device_bin_path)
sh.adb("-s", serialno, "shell", "mkdir -p %s" % device_cl_path)
print("Push %s to %s" % (host_bin_full_path, device_bin_full_path))
sh.adb("-s", serialno, "push", host_bin_full_path, device_bin_path)
sh.adb("-s", serialno, "push", host_bin_full_path, device_bin_full_path)
print("Run %s" % device_bin_full_path)
stdout_buff=[]
process_output = make_output_processor(stdout_buff)
......@@ -142,21 +142,24 @@ def gen_mace_version(codegen_path="mace/codegen"):
################################
# falcon
################################
def falcon_tags(platform, model, abi):
return "ro.board.platform=%s,ro.product.model=%s,abi=%s" % (platform, model, abi)
def falcon_push_metrics(metrics, device_properties, abi, endpoint="mace_dev"):
def falcon_tags(tags_dict):
tags = ""
for k, v in tags_dict.iteritems():
if tags == "":
tags = "%s=%s" % (k, v)
else:
tags = tags + ",%s=%s" % (k, v)
return tags
def falcon_push_metrics(metrics, endpoint="mace_dev", tags={}):
cli = falcon_cli.FalconCli.connect(server="transfer.falcon.miliao.srv",
port=8433,
debug=False)
platform = device_properties["ro.board.platform"].replace(" ", "-")
model = device_properties["ro.product.model"].replace(" ", "-")
tags = falcon_tags(platform, model, abi)
ts = int(time.time())
falcon_metrics = [{
"endpoint": endpoint,
"metric": key,
"tags": tags,
"tags": falcon_tags(tags),
"timestamp": ts,
"value": value,
"step": 86400,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册