From 846ebc418b75f97633cae84d784be6d640431250 Mon Sep 17 00:00:00 2001 From: liuqi Date: Thu, 22 Feb 2018 17:54:26 +0800 Subject: [PATCH] Remove unused benchmark code. --- mace/tools/benchmark/BUILD | 28 -- mace/tools/benchmark/benchmark_model.cc | 349 ------------------------ mace/tools/benchmark/stat_summarizer.cc | 316 --------------------- mace/tools/benchmark/stat_summarizer.h | 199 -------------- 4 files changed, 892 deletions(-) delete mode 100644 mace/tools/benchmark/BUILD delete mode 100644 mace/tools/benchmark/benchmark_model.cc delete mode 100644 mace/tools/benchmark/stat_summarizer.cc delete mode 100644 mace/tools/benchmark/stat_summarizer.h diff --git a/mace/tools/benchmark/BUILD b/mace/tools/benchmark/BUILD deleted file mode 100644 index 5241146b..00000000 --- a/mace/tools/benchmark/BUILD +++ /dev/null @@ -1,28 +0,0 @@ -# Benchmark -load("//mace:mace.bzl", "if_android") - -licenses(["notice"]) # Apache 2.0 - -cc_library( - name = "stat_summarizer", - srcs = ["stat_summarizer.cc"], - hdrs = ["stat_summarizer.h"], - linkstatic = 1, - deps = [ - "//mace/core", - ], -) - -cc_binary( - name = "benchmark_model", - srcs = [ - "benchmark_model.cc", - ], - linkstatic = 1, - deps = [ - ":stat_summarizer", - "//mace/core", - "//mace/ops", - "//mace/utils:command_line_flags", - ], -) diff --git a/mace/tools/benchmark/benchmark_model.cc b/mace/tools/benchmark/benchmark_model.cc deleted file mode 100644 index 06816386..00000000 --- a/mace/tools/benchmark/benchmark_model.cc +++ /dev/null @@ -1,349 +0,0 @@ -// -// Copyright (c) 2017 XiaoMi All rights reserved. -// - -#include "mace/core/net.h" -#include "mace/core/runtime/opencl/opencl_runtime.h" -#include "mace/tools/benchmark/stat_summarizer.h" -#include "mace/utils/command_line_flags.h" -#include "mace/utils/utils.h" - -#include -#include - -namespace mace { -namespace str_util { - -std::vector Split(const string &str, char delims) { - std::vector result; - string tmp = str; - while (!tmp.empty()) { - result.push_back(tmp.data()); - size_t next_offset = tmp.find(delims); - if (next_offset == string::npos) { - break; - } else { - tmp = tmp.substr(next_offset + 1); - } - } - return result; -} - -bool SplitAndParseToInts(const string &str, - char delims, - std::vector *result) { - string tmp = str; - while (!tmp.empty()) { - index_t dim = atoi(tmp.data()); - result->push_back(dim); - size_t next_offset = tmp.find(delims); - if (next_offset == string::npos) { - break; - } else { - tmp = tmp.substr(next_offset + 1); - } - } - return true; -} - -} // namespace str_util - -namespace benchmark { - -bool RunInference(NetBase *net, - StatSummarizer *summarizer, - int64_t *inference_time_us) { - RunMetadata run_metadata; - RunMetadata *run_metadata_ptr = nullptr; - if (summarizer) { - run_metadata_ptr = &run_metadata; - } - const int64_t start_time = NowInMicroSec(); - bool s = net->Run(run_metadata_ptr); - const int64_t end_time = NowInMicroSec(); - - if (!s) { - LOG(ERROR) << "Error during inference."; - return s; - } - *inference_time_us = end_time - start_time; - - if (summarizer != nullptr) { - summarizer->ProcessMetadata(run_metadata); - } - - return true; -} - -bool Run(NetBase *net, - StatSummarizer *summarizer, - int num_runs, - double max_time_sec, - int64_t sleep_sec, - int64_t *total_time_us, - int64_t *actual_num_runs) { - *total_time_us = 0; - - LOG(INFO) << "Running benchmark for max " << num_runs << " iterators, max " - << max_time_sec << " seconds " - << (summarizer != nullptr ? "with " : "without ") - << "detailed stat logging, with " << sleep_sec - << "s sleep between inferences"; - - Stat stat; - - bool util_max_time = (num_runs <= 0); - for (int i = 0; util_max_time || i < num_runs; ++i) { - int64_t inference_time_us = 0; - bool s = RunInference(net, summarizer, &inference_time_us); - stat.UpdateStat(inference_time_us); - (*total_time_us) += inference_time_us; - ++(*actual_num_runs); - - if (max_time_sec > 0 && (*total_time_us / 1000000.0) > max_time_sec) { - break; - } - - if (!s) { - LOG(INFO) << "Failed on run " << i; - return s; - } - - if (sleep_sec > 0) { - std::this_thread::sleep_for(std::chrono::seconds(sleep_sec)); - } - } - - std::stringstream stream; - stat.OutputToStream(&stream); - LOG(INFO) << stream.str(); - - return true; -} - -int Main(int argc, char **argv) { - std::string model_file = "/data/local/tmp/mobi_mace.pb"; - std::string device = "CPU"; - std::string input_layer_string = "input:0"; - std::string input_layer_shape_string = "1,224,224,3"; - std::string input_layer_type_string = "float"; - std::string input_layer_files_string = ""; - std::string output_layer_string = "output:0"; - int max_num_runs = 10; - std::string max_time = "10.0"; - std::string inference_delay = "-1"; - std::string inter_benchmark_delay = "-1"; - int num_threads = -1; - std::string benchmark_name = ""; - std::string output_prefix = ""; - bool show_sizes = false; - bool show_run_order = true; - int run_order_limit = 0; - bool show_time = true; - int time_limit = 10; - bool show_memory = true; - int memory_limit = 10; - bool show_type = true; - bool show_summary = true; - bool show_flops = false; - int warmup_runs = 2; - - std::vector flag_list = { - Flag("model_file", &model_file, "graph file name"), - Flag("device", &device, "CPU/NEON/OPENCL"), - Flag("input_layer", &input_layer_string, "input layer names"), - Flag("input_layer_shape", &input_layer_shape_string, "input layer shape"), - Flag("input_layer_type", &input_layer_type_string, "input layer type"), - Flag("input_layer_files", &input_layer_files_string, - "files to initialize the inputs with"), - Flag("output_layer", &output_layer_string, "output layer name"), - Flag("max_num_runs", &max_num_runs, "number of runs max"), - Flag("max_time", &max_time, "length to run max"), - Flag("inference_delay", &inference_delay, - "delay between runs in seconds"), - Flag("inter_benchmark_delay", &inter_benchmark_delay, - "delay between benchmarks in seconds"), - Flag("num_threads", &num_threads, "number of threads"), - Flag("benchmark_name", &benchmark_name, "benchmark name"), - Flag("output_prefix", &output_prefix, "benchmark output prefix"), - Flag("show_sizes", &show_sizes, "whether to show sizes"), - Flag("show_run_order", &show_run_order, - "whether to list stats by run order"), - Flag("run_order_limit", &run_order_limit, - "how many items to show by run order"), - Flag("show_time", &show_time, "whether to list stats by time taken"), - Flag("time_limit", &time_limit, "how many items to show by time taken"), - Flag("show_memory", &show_memory, "whether to list stats by memory used"), - Flag("memory_limit", &memory_limit, - "how many items to show by memory used"), - Flag("show_type", &show_type, "whether to list stats by op type"), - Flag("show_summary", &show_summary, - "whether to show a summary of the stats"), - Flag("show_flops", &show_flops, "whether to estimate the model's FLOPs"), - Flag("warmup_runs", &warmup_runs, "how many runs to initialize model"), - }; - string usage = Flags::Usage(argv[0], flag_list); - const bool parse_result = Flags::Parse(&argc, argv, flag_list); - - if (!parse_result) { - LOG(ERROR) << usage; - return -1; - } - - std::vector input_layers = - str_util::Split(input_layer_string, ','); - std::vector input_layer_shapes = - str_util::Split(input_layer_shape_string, ':'); - std::vector input_layer_types = - str_util::Split(input_layer_type_string, ','); - std::vector input_layer_files = - str_util::Split(input_layer_files_string, ':'); - std::vector output_layers = str_util::Split(output_layer_string, ','); - if ((input_layers.size() != input_layer_shapes.size()) || - (input_layers.size() != input_layer_types.size())) { - LOG(ERROR) << "There must be the same number of items in --input_layer," - << " --input_layer_shape, and --input_layer_type, for example" - << " --input_layer=input1,input2 --input_layer_type=float,float " - << " --input_layer_shape=1,224,224,4:1,20"; - LOG(ERROR) << "--input_layer=" << input_layer_string << " (" - << input_layers.size() << " items)"; - LOG(ERROR) << "--input_layer_type=" << input_layer_type_string << " (" - << input_layer_types.size() << " items)"; - LOG(ERROR) << "--input_layer_shape=" << input_layer_shape_string << " (" - << input_layer_shapes.size() << " items)"; - return -1; - } - const size_t inputs_count = input_layers.size(); - - if (argc > 1) { - LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; - return -1; - } - - LOG(INFO) << "Model file: [" << model_file << "]"; - LOG(INFO) << "Device: [" << device << "]"; - LOG(INFO) << "Input layers: [" << input_layer_string << "]"; - LOG(INFO) << "Input shapes: [" << input_layer_shape_string << "]"; - LOG(INFO) << "Input types: [" << input_layer_type_string << "]"; - LOG(INFO) << "Output layers: [" << output_layer_string << "]"; - LOG(INFO) << "Num runs: [" << max_num_runs << "]"; - LOG(INFO) << "Inter-inference delay (seconds): [" << inference_delay << "]"; - LOG(INFO) << "Inter-benchmark delay (seconds): [" << inter_benchmark_delay - << "]"; - LOG(INFO) << "Num threads: [" << num_threads << "]"; - LOG(INFO) << "Benchmark name: [" << benchmark_name << "]"; - LOG(INFO) << "Output prefix: [" << output_prefix << "]"; - LOG(INFO) << "Show sizes: [" << show_sizes << "]"; - LOG(INFO) << "Warmup runs: [" << warmup_runs << "]"; - - const long int inter_inference_sleep_seconds = - std::strtol(inference_delay.c_str(), nullptr, 10); - const long int inter_benchmark_sleep_seconds = - std::strtol(inter_benchmark_delay.c_str(), nullptr, 10); - const double max_benchmark_time_seconds = - std::strtod(max_time.c_str(), nullptr); - - std::unique_ptr stats; - - StatSummarizerOptions stats_options; - stats_options.show_run_order = show_run_order; - stats_options.run_order_limit = run_order_limit; - stats_options.show_time = show_time; - stats_options.time_limit = time_limit; - stats_options.show_memory = show_memory; - stats_options.memory_limit = memory_limit; - stats_options.show_type = show_type; - stats_options.show_summary = show_summary; - stats.reset(new StatSummarizer(stats_options)); - - DeviceType device_type; - DeviceType_Parse(device, &device_type); - VLOG(0) << device_type; - - - // load model - std::ifstream model_file_stream(model_file, std::ios::in | std::ios::binary); - if (!model_file_stream.is_open()) { - LOG(ERROR) << "model file open failed"; - return -1; - } - NetDef net_def; - net_def.ParseFromIstream(&model_file_stream); - model_file_stream.close(); - - Workspace ws; - ws.LoadModelTensor(net_def, device_type); - // Load inputs - for (size_t i = 0; i < inputs_count; ++i) { - Tensor *input_tensor = - ws.CreateTensor(input_layers[i], GetDeviceAllocator(device_type), DT_FLOAT); - vector shapes; - str_util::SplitAndParseToInts(input_layer_shapes[i], ',', &shapes); - input_tensor->Resize(shapes); - { - Tensor::MappingGuard input_guard(input_tensor); - float *input_data = input_tensor->mutable_data(); - - // load input - if (i < input_layer_files.size()) { - std::ifstream in_file(input_layer_files[i], - std::ios::in | std::ios::binary); - in_file.read(reinterpret_cast(input_data), - input_tensor->size() * sizeof(float)); - in_file.close(); - } - } - } - auto net = CreateNet(net_def, &ws, device_type, NetMode::INIT); - net->Run(); - - // create net - net = CreateNet(net_def, &ws, device_type); - - int64_t warmup_time_us = 0; - int64_t num_warmup_runs = 0; - if (warmup_runs > 0) { - bool status = - Run(net.get(), nullptr, warmup_runs, -1.0, - inter_inference_sleep_seconds, &warmup_time_us, &num_warmup_runs); - if (!status) { - LOG(ERROR) << "Failed at warm up run"; - } - } - - if (inter_benchmark_sleep_seconds > 0) { - std::this_thread::sleep_for( - std::chrono::seconds(inter_benchmark_sleep_seconds)); - } - int64_t no_stat_time_us = 0; - int64_t no_stat_runs = 0; - bool status = - Run(net.get(), nullptr, max_num_runs, max_benchmark_time_seconds, - inter_inference_sleep_seconds, &no_stat_time_us, &no_stat_runs); - if (!status) { - LOG(ERROR) << "Failed at normal no-stat run"; - } - - int64_t stat_time_us = 0; - int64_t stat_runs = 0; - status = Run(net.get(), stats.get(), max_num_runs, max_benchmark_time_seconds, - inter_inference_sleep_seconds, &stat_time_us, &stat_runs); - if (!status) { - LOG(ERROR) << "Failed at normal stat run"; - } - - LOG(INFO) << "Average inference timings in us: " - << "Warmup: " - << (warmup_runs > 0 ? warmup_time_us / warmup_runs : 0) << ", " - << "no stats: " << no_stat_time_us / no_stat_runs << ", " - << "with stats: " << stat_time_us / stat_runs; - - stats->PrintOperatorStats(); - - return 0; -} - -} // namespace benchmark -} // namespace mace - -int main(int argc, char **argv) { mace::benchmark::Main(argc, argv); } diff --git a/mace/tools/benchmark/stat_summarizer.cc b/mace/tools/benchmark/stat_summarizer.cc deleted file mode 100644 index 0b0d8e5a..00000000 --- a/mace/tools/benchmark/stat_summarizer.cc +++ /dev/null @@ -1,316 +0,0 @@ -// -// Copyright (c) 2017 XiaoMi All rights reserved. -// - -#include "mace/tools/benchmark/stat_summarizer.h" -#include "mace/core/common.h" -#include "mace/core/operator.h" - -#include -#include - -namespace mace { - -StatSummarizer::StatSummarizer(const StatSummarizerOptions &options) - : options_(options) {} - -StatSummarizer::~StatSummarizer() {} - -void StatSummarizer::Reset() { - run_total_us_.Reset(); - memory_.Reset(); - details_.clear(); -} - -void StatSummarizer::ProcessMetadata(const RunMetadata &run_metadata) { - int64_t curr_total_us = 0; - int64_t mem_total = 0; - - MACE_CHECK(!run_metadata.op_stats.empty()); - int64_t first_node_start_us = run_metadata.op_stats[0].stats.start_micros; - - int node_num = 0; - for (const auto &ops : run_metadata.op_stats) { - std::string name = ops.operator_name; - std::string op_type = ops.type; - - ++node_num; - const int64_t curr_time = ops.stats.end_micros - ops.stats.start_micros; - curr_total_us += curr_time; - auto result = details_.emplace(name, Detail()); - Detail *detail = &(result.first->second); - - detail->start_us.UpdateStat(ops.stats.start_micros - first_node_start_us); - detail->rel_end_us.UpdateStat(curr_time); - - // If this is the first pass, initialize some values. - if (result.second) { - detail->name = name; - detail->type = op_type; - - detail->run_order = node_num; - - detail->times_called = 0; - } - - ++detail->times_called; - } - - run_total_us_.UpdateStat(curr_total_us); - memory_.UpdateStat(mem_total); -} - -std::string StatSummarizer::ShortSummary() const { - std::stringstream stream; - stream << "Timings (microseconds): "; - run_total_us_.OutputToStream(&stream); - stream << std::endl; - - stream << "Memory (bytes): "; - memory_.OutputToStream(&stream); - stream << std::endl; - - stream << details_.size() << " nodes observed" << std::endl; - return stream.str(); -} - -std::ostream &InitField(std::ostream &stream, int width) { - stream << "\t" << std::right << std::setw(width) << std::fixed - << std::setprecision(3); - return stream; -} - -std::string StatSummarizer::HeaderString(const std::string &title) const { - std::stringstream stream; - - stream << "============================== " << title - << " ==============================" << std::endl; - - InitField(stream, 24) << "[node type]"; - InitField(stream, 9) << "[start]"; - InitField(stream, 9) << "[first]"; - InitField(stream, 9) << "[avg ms]"; - InitField(stream, 8) << "[%]"; - InitField(stream, 8) << "[cdf%]"; - InitField(stream, 10) << "[mem KB]"; - InitField(stream, 9) << "[times called]"; - stream << "\t" - << "[Name]"; - return stream.str(); -} - -std::string StatSummarizer::ColumnString(const StatSummarizer::Detail &detail, - const int64_t cumulative_stat_on_node, - const Stat &stat) const { - const double start_ms = detail.start_us.avg() / 1000.0; - const double first_time_ms = detail.rel_end_us.first() / 1000.0; - const double avg_time_ms = detail.rel_end_us.avg() / 1000.0; - const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum(); - const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum(); - const int64_t times_called = detail.times_called / num_runs(); - - std::stringstream stream; - InitField(stream, 24) << detail.type; - InitField(stream, 9) << start_ms; - InitField(stream, 9) << first_time_ms; - InitField(stream, 9) << avg_time_ms; - InitField(stream, 7) << percentage << "%"; - InitField(stream, 7) << cdf_percentage << "%"; - InitField(stream, 10) << detail.mem_used.newest() / 1000.0; - InitField(stream, 9) << times_called; - stream << "\t" << detail.name; - - return stream.str(); -} - -void StatSummarizer::OrderNodesByMetric( - SortingMetric metric, std::vector *details) const { - std::priority_queue> sorted_list; - const int num_nodes = details_.size(); - - for (const auto &det : details_) { - const Detail *detail = &(det.second); - std::stringstream stream; - stream << std::setw(20) << std::right << std::setprecision(10) - << std::fixed; - - switch (metric) { - case BY_NAME: - stream << detail->name; - break; - case BY_RUN_ORDER: - stream << num_nodes - detail->run_order; - break; - case BY_TIME: - stream << detail->rel_end_us.avg(); - break; - case BY_MEMORY: - stream << detail->mem_used.avg(); - break; - case BY_TYPE: - stream << detail->type; - break; - default: - stream << ""; - break; - } - - sorted_list.emplace(stream.str(), detail); - } - - while (!sorted_list.empty()) { - auto entry = sorted_list.top(); - sorted_list.pop(); - details->push_back(entry.second); - } -} - -void StatSummarizer::ComputeStatsByType( - std::map *node_type_map_count, - std::map *node_type_map_time, - std::map *node_type_map_memory, - std::map *node_type_map_times_called, - int64_t *accumulated_us) const { - int64_t run_count = run_total_us_.count(); - - for (const auto &det : details_) { - const std::string node_name = det.first; - const Detail &detail = det.second; - - int64_t curr_time_val = - static_cast(detail.rel_end_us.sum() / run_count); - *accumulated_us += curr_time_val; - - int64_t curr_memory_val = detail.mem_used.newest(); - - const std::string &node_type = detail.type; - - (*node_type_map_count)[node_type] += 1; - (*node_type_map_time)[node_type] += curr_time_val; - (*node_type_map_memory)[node_type] += curr_memory_val; - (*node_type_map_times_called)[node_type] += detail.times_called / run_count; - } -} - -std::string StatSummarizer::GetStatsByNodeType() const { - std::stringstream stream; - - stream << "============================== Summary by node type " - "==============================" - << std::endl; - - LOG(INFO) << "Number of nodes executed: " << details_.size(); - - std::map node_type_map_count; - std::map node_type_map_time; - std::map node_type_map_memory; - std::map node_type_map_times_called; - int64_t accumulated_us = 0; - - ComputeStatsByType(&node_type_map_count, &node_type_map_time, - &node_type_map_memory, &node_type_map_times_called, - &accumulated_us); - - // Sort them. - std::priority_queue>> - timings; - for (const auto &node_type : node_type_map_time) { - const int64_t mem_used = node_type_map_memory[node_type.first]; - timings.emplace(node_type.second, - std::pair(node_type.first, mem_used)); - } - - InitField(stream, 24) << "[Node type]"; - InitField(stream, 9) << "[count]"; - InitField(stream, 10) << "[avg ms]"; - InitField(stream, 11) << "[avg %]"; - InitField(stream, 11) << "[cdf %]"; - InitField(stream, 10) << "[mem KB]"; - InitField(stream, 10) << "[times called]"; - stream << std::endl; - - float cdf = 0.0f; - while (!timings.empty()) { - auto entry = timings.top(); - timings.pop(); - - const std::string node_type = entry.second.first; - const float memory = entry.second.second / 1000.0f; - - const int64_t node_type_total_us = entry.first; - const float time_per_run_ms = node_type_total_us / 1000.0f; - - const float percentage = - ((entry.first / static_cast(accumulated_us)) * 100.0f); - cdf += percentage; - - InitField(stream, 24) << node_type; - InitField(stream, 9) << node_type_map_count[node_type]; - InitField(stream, 10) << time_per_run_ms; - InitField(stream, 10) << percentage << "%"; - InitField(stream, 10) << cdf << "%"; - InitField(stream, 10) << memory; - InitField(stream, 9) << node_type_map_times_called[node_type]; - stream << std::endl; - } - stream << std::endl; - return stream.str(); -} - -std::string StatSummarizer::GetStatsByMetric(const std::string &title, - SortingMetric sorting_metric, - int num_stats) const { - std::vector details; - OrderNodesByMetric(sorting_metric, &details); - - double cumulative_stat_on_node = 0; - - std::stringstream stream; - stream << HeaderString(title) << std::endl; - int stat_num = 0; - for (auto detail : details) { - ++stat_num; - if (num_stats > 0 && stat_num > num_stats) { - break; - } - - cumulative_stat_on_node += detail->rel_end_us.sum(); - stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_) - << std::endl; - } - stream << std::endl; - return stream.str(); -} - -std::string StatSummarizer::GetOutputString() const { - std::stringstream stream; - if (options_.show_run_order) { - stream << GetStatsByMetric("Run Order", BY_RUN_ORDER, - options_.run_order_limit); - } - if (options_.show_time) { - stream << GetStatsByMetric("Top by Computation Time", BY_TIME, - options_.time_limit); - } - if (options_.show_memory) { - stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY, - options_.memory_limit); - } - if (options_.show_type) { - stream << GetStatsByNodeType(); - } - if (options_.show_summary) { - stream << ShortSummary() << std::endl; - } - return stream.str(); -} - -void StatSummarizer::PrintOperatorStats() const { - std::string output = GetOutputString(); - std::istringstream iss(output); - for (std::string line; std::getline(iss, line);) { - LOG(INFO) << line; - } -} - -} // namespace mace diff --git a/mace/tools/benchmark/stat_summarizer.h b/mace/tools/benchmark/stat_summarizer.h deleted file mode 100644 index 368b1e30..00000000 --- a/mace/tools/benchmark/stat_summarizer.h +++ /dev/null @@ -1,199 +0,0 @@ -// -// Copyright (c) 2017 XiaoMi All rights reserved. -// - -#ifndef MACE_TOOLS_BENCHMARK_STAT_SUMMARIZER_H_ -#define MACE_TOOLS_BENCHMARK_STAT_SUMMARIZER_H_ - -#include -#include -#include -#include -#include -#include -#include - -namespace mace { - -class RunMetadata; - -template -class Stat { - public: - void UpdateStat(ValueType v) { - if (count_ == 0) { - first_ = v; - } - - newest_ = v; - max_ = std::max(v, max_); - min_ = std::min(v, min_); - ++count_; - sum_ += v; - squared_sum_ += static_cast(v) * v; - } - - void Reset() { new (this) Stat(); } - - bool empty() const { return count_ == 0; } - - ValueType first() const { return first_; } - - ValueType newest() const { return newest_; } - - ValueType max() const { return max_; } - - ValueType min() const { return min_; } - - int64_t count() const { return count_; } - - ValueType sum() const { return sum_; } - - HighPrecisionValueType squared_sum() const { return squared_sum_; } - - bool all_same() const { return (count_ == 0 || min_ == max_); } - - HighPrecisionValueType avg() const { - return empty() ? std::numeric_limits::quiet_NaN() - : static_cast(sum_) / count_; - } - - ValueType std_deviation() const { - return all_same() ? 0 : std::sqrt(squared_sum_ / count_ - avg() * avg()); - } - - void OutputToStream(std::ostream *stream) const { - if (empty()) { - *stream << "count=0"; - } else if (all_same()) { - *stream << "count=" << count_ << " curr=" << newest_; - if (count_ > 1) *stream << "(all same)"; - } else { - *stream << "count=" << count_ << " first=" << first_ - << " curr=" << newest_ << " min=" << min_ << " max=" << max_ - << " avg=" << avg() << " std=" << std_deviation(); - } - } - - friend std::ostream &operator<<(std::ostream &stream, - const Stat &stat) { - stat.OutputToStream(&stream); - return stream; - } - - private: - ValueType first_ = 0; - ValueType newest_ = 0; - ValueType max_ = std::numeric_limits::min(); - ValueType min_ = std::numeric_limits::max(); - int64_t count_ = 0; - ValueType sum_ = 0; - HighPrecisionValueType squared_sum_ = 0; -}; - -// Used to control the output of the statistics summarizer; -class StatSummarizerOptions { - public: - StatSummarizerOptions() - : show_run_order(true), - run_order_limit(0), - show_time(true), - time_limit(10), - show_memory(true), - memory_limit(10), - show_type(true), - show_summary(true) {} - - bool show_run_order; - int run_order_limit; - bool show_time; - int time_limit; - bool show_memory; - int memory_limit; - bool show_type; - bool show_summary; -}; - -// A StatSummarizer assists in performance analysis of Graph executions. -// -// It summarizes time spent executing (on GPU/CPU), memory used etc. across -// multiple executions of a single Graph from the StepStats collected during -// graph execution. -// -// See tensorflow/tools/benchmark/benchmark_model.cc for an example usage. -class StatSummarizer { - public: - enum SortingMetric { - BY_NAME, - BY_RUN_ORDER, - BY_TIME, - BY_MEMORY, - BY_TYPE, - }; - - explicit StatSummarizer(const StatSummarizerOptions &options); - - ~StatSummarizer(); - - // Adds another run's StepStats output to the aggregate counts. - void ProcessMetadata(const RunMetadata &run_metadata); - - // Returns a string detailing the accumulated runtime stats in a tab-separated - // format which can be pasted into a spreadsheet for further analysis. - std::string GetOutputString() const; - - std::string ShortSummary() const; - - // Prints the string returned by GetOutputString(). - void PrintOperatorStats() const; - - void ComputeStatsByType( - std::map *node_type_map_count, - std::map *node_type_map_time, - std::map *node_type_map_memory, - std::map *node_type_map_times_called, - int64_t *accumulated_us) const; - - std::string GetStatsByNodeType() const; - - std::string GetStatsByMetric(const std::string &title, - SortingMetric sorting_metric, - int num_stats) const; - - void Reset(); - - // Returns number of runs. - int num_runs() const { return run_total_us_.count(); } - - // Returns stats of total microseconds spent by all nodes in each run. - const Stat &run_total_us() const { return run_total_us_; } - - private: - struct Detail { - std::string name; - std::string type; - int64_t run_order; - Stat start_us; - Stat rel_end_us; - Stat mem_used; - int64_t times_called; - }; - - void OrderNodesByMetric(SortingMetric sorting_metric, - std::vector *details) const; - - std::string HeaderString(const std::string &title) const; - std::string ColumnString(const Detail &detail, - const int64_t cumulative_stat_on_node, - const Stat &stat) const; - - Stat run_total_us_; - Stat memory_; - - std::map details_; - StatSummarizerOptions options_; -}; - -} // namespace mace - -#endif // MACE_TOOLS_BENCHMARK_STAT_SUMMARIZER_H_ -- GitLab