diff --git a/mace/benchmark/BUILD b/mace/benchmark/BUILD index 50ed42ee135ab6f1f560a49b2d90c66bd49b2828..0cc23bb1a5951dfb25f88737f7b3eefdf1242d12 100644 --- a/mace/benchmark/BUILD +++ b/mace/benchmark/BUILD @@ -11,14 +11,12 @@ load( licenses(["notice"]) # Apache 2.0 cc_library( - name = "stat_summarizer", - srcs = ["stat_summarizer.cc"], - hdrs = ["stat_summarizer.h"], + name = "statistics", + srcs = ["statistics.cc"], + hdrs = ["statistics.h"], linkstatic = 1, deps = [ - "//mace/core", "//mace/kernels", - "//mace/public", "//mace/utils", ], ) @@ -31,7 +29,7 @@ cc_binary( linkopts = if_openmp_enabled(["-fopenmp"]), linkstatic = 1, deps = [ - ":stat_summarizer", + ":statistics", "//external:gflags_nothreads", "//mace/codegen:generated_models", ], diff --git a/mace/benchmark/benchmark_model.cc b/mace/benchmark/benchmark_model.cc index 00f46ab615eb6a0efdc30a364271d27881154b63..b282af94019e0853ef2decdc741c4a8c00a521f2 100644 --- a/mace/benchmark/benchmark_model.cc +++ b/mace/benchmark/benchmark_model.cc @@ -16,6 +16,7 @@ #include #include +#include #include #include // NOLINT(build/c++11) @@ -23,7 +24,7 @@ #include "mace/public/mace.h" #include "mace/public/mace_runtime.h" #include "mace/utils/logging.h" -#include "mace/benchmark/stat_summarizer.h" +#include "mace/benchmark/statistics.h" namespace mace { namespace MACE_MODEL_TAG { @@ -120,12 +121,12 @@ DeviceType ParseDeviceType(const std::string &device_str) { bool RunInference(MaceEngine *engine, const std::map &input_infos, std::map *output_infos, - StatSummarizer *summarizer, - int64_t *inference_time_us) { + int64_t *inference_time_us, + OpStat *statistician) { MACE_CHECK_NOTNULL(output_infos); RunMetadata run_metadata; RunMetadata *run_metadata_ptr = nullptr; - if (summarizer) { + if (statistician) { run_metadata_ptr = &run_metadata; } @@ -139,39 +140,33 @@ bool RunInference(MaceEngine *engine, } *inference_time_us = end_time - start_time; - if (summarizer != nullptr) { - summarizer->ProcessMetadata(run_metadata); + if (statistician != nullptr) { + statistician->StatMetadata(run_metadata); } return true; } -bool Run(MaceEngine *engine, +bool Run(const std::string &title, + MaceEngine *engine, const std::map &input_infos, std::map *output_infos, - StatSummarizer *summarizer, int num_runs, double max_time_sec, - int64_t sleep_sec, int64_t *total_time_us, - int64_t *actual_num_runs) { + int64_t *actual_num_runs, + OpStat *statistician) { MACE_CHECK_NOTNULL(output_infos); *total_time_us = 0; - LOG(INFO) << "Running benchmark for max " << num_runs << " iterators, max " - << max_time_sec << " seconds " - << (summarizer != nullptr ? "with " : "without ") - << "detailed stat logging, with " << sleep_sec - << "s sleep between inferences"; - - Stat stat; + TimeInfo time_info; bool util_max_time = (num_runs <= 0); for (int i = 0; util_max_time || i < num_runs; ++i) { int64_t inference_time_us = 0; bool s = RunInference(engine, input_infos, output_infos, - summarizer, &inference_time_us); - stat.UpdateStat(inference_time_us); + &inference_time_us, statistician); + time_info.UpdateTime(inference_time_us); (*total_time_us) += inference_time_us; ++(*actual_num_runs); @@ -183,16 +178,13 @@ bool Run(MaceEngine *engine, LOG(INFO) << "Failed on run " << i; return s; } - - if (sleep_sec > 0) { - std::this_thread::sleep_for(std::chrono::seconds(sleep_sec)); - } } - std::stringstream stream; - stat.OutputToStream(&stream); - LOG(INFO) << stream.str(); - + std::stringstream stream(time_info.ToString(title)); + stream << std::endl; + for (std::string line; std::getline(stream, line);) { + LOG(INFO) << line; + } return true; } @@ -206,19 +198,7 @@ DEFINE_string(output_shape, "", "output shape, separated by colon and comma"); DEFINE_string(input_file, "", "input file name"); DEFINE_int32(max_num_runs, 100, "number of runs max"); DEFINE_string(max_time, "10.0", "length to run max"); -DEFINE_string(inference_delay, "-1", "delay between runs in seconds"); -DEFINE_string(inter_benchmark_delay, "-1", - "delay between benchmarks in seconds"); DEFINE_string(benchmark_name, "", "benchmark name"); -DEFINE_bool(show_run_order, true, "whether to list stats by run order"); -DEFINE_int32(run_order_limit, 0, "how many items to show by run order"); -DEFINE_bool(show_time, true, "whether to list stats by time taken"); -DEFINE_int32(time_limit, 10, "how many items to show by time taken"); -DEFINE_bool(show_memory, false, "whether to list stats by memory used"); -DEFINE_int32(memory_limit, 10, "how many items to show by memory used"); -DEFINE_bool(show_type, true, "whether to list stats by op type"); -DEFINE_bool(show_summary, true, "whether to show a summary of the stats"); -DEFINE_bool(show_flops, true, "whether to estimate the model's FLOPs"); DEFINE_int32(warmup_runs, 1, "how many runs to initialize model"); DEFINE_string(model_data_file, "", "model data file name, used when EMBED_MODEL_DATA set to 0"); @@ -246,30 +226,12 @@ int Main(int argc, char **argv) { LOG(INFO) << "output shapes: [" << FLAGS_output_shape << "]"; LOG(INFO) << "Warmup runs: [" << FLAGS_warmup_runs << "]"; LOG(INFO) << "Num runs: [" << FLAGS_max_num_runs << "]"; - LOG(INFO) << "Inter-inference delay (seconds): [" - << FLAGS_inference_delay << "]"; - LOG(INFO) << "Inter-benchmark delay (seconds): [" - << FLAGS_inter_benchmark_delay << "]"; - - const int64_t inter_inference_sleep_seconds = - std::strtol(FLAGS_inference_delay.c_str(), nullptr, 10); - const int64_t inter_benchmark_sleep_seconds = - std::strtol(FLAGS_inter_benchmark_delay.c_str(), nullptr, 10); + LOG(INFO) << "Max run time: [" << FLAGS_max_time << "]"; + const double max_benchmark_time_seconds = std::strtod(FLAGS_max_time.c_str(), nullptr); - std::unique_ptr stats; - - StatSummarizerOptions stats_options; - stats_options.show_run_order = FLAGS_show_run_order; - stats_options.run_order_limit = FLAGS_run_order_limit; - stats_options.show_time = FLAGS_show_time; - stats_options.time_limit = FLAGS_time_limit; - stats_options.show_memory = FLAGS_show_memory; - stats_options.memory_limit = FLAGS_memory_limit; - stats_options.show_type = FLAGS_show_type; - stats_options.show_summary = FLAGS_show_summary; - stats.reset(new StatSummarizer(stats_options)); + std::unique_ptr statistician(new OpStat()); mace::DeviceType device_type = ParseDeviceType(FLAGS_device); @@ -349,50 +311,38 @@ int Main(int argc, char **argv) { mace::MACE_MODEL_TAG::UnloadModelData(model_data); } - LOG(INFO) << "Warm up"; - int64_t warmup_time_us = 0; int64_t num_warmup_runs = 0; if (FLAGS_warmup_runs > 0) { bool status = - Run(engine_ptr.get(), inputs, &outputs, nullptr, + Run("Warm Up", engine_ptr.get(), inputs, &outputs, FLAGS_warmup_runs, -1.0, - inter_inference_sleep_seconds, &warmup_time_us, &num_warmup_runs); + &warmup_time_us, &num_warmup_runs, nullptr); if (!status) { LOG(ERROR) << "Failed at warm up run"; } } - if (inter_benchmark_sleep_seconds > 0) { - std::this_thread::sleep_for( - std::chrono::seconds(inter_benchmark_sleep_seconds)); - } int64_t no_stat_time_us = 0; int64_t no_stat_runs = 0; bool status = - Run(engine_ptr.get(), inputs, &outputs, - nullptr, FLAGS_max_num_runs, max_benchmark_time_seconds, - inter_inference_sleep_seconds, &no_stat_time_us, &no_stat_runs); + Run("Run without statistics", engine_ptr.get(), inputs, &outputs, + FLAGS_max_num_runs, max_benchmark_time_seconds, + &no_stat_time_us, &no_stat_runs, nullptr); if (!status) { LOG(ERROR) << "Failed at normal no-stat run"; } int64_t stat_time_us = 0; int64_t stat_runs = 0; - status = Run(engine_ptr.get(), inputs, &outputs, - stats.get(), FLAGS_max_num_runs, max_benchmark_time_seconds, - inter_inference_sleep_seconds, &stat_time_us, &stat_runs); + status = Run("Run with statistics", engine_ptr.get(), inputs, &outputs, + FLAGS_max_num_runs, max_benchmark_time_seconds, + &stat_time_us, &stat_runs, statistician.get()); if (!status) { LOG(ERROR) << "Failed at normal stat run"; } - LOG(INFO) << "Average inference timings in us: " - << "Warmup: " - << (FLAGS_warmup_runs > 0 ? warmup_time_us / FLAGS_warmup_runs : 0) - << ", " << "no stats: " << no_stat_time_us / no_stat_runs << ", " - << "with stats: " << stat_time_us / stat_runs; - - stats->PrintOperatorStats(); + statistician->PrintStat(); return 0; } diff --git a/mace/benchmark/stat_summarizer.cc b/mace/benchmark/stat_summarizer.cc deleted file mode 100644 index 291fd5ded7cf71097e23c7ea7a04a951a8c7dd95..0000000000000000000000000000000000000000 --- a/mace/benchmark/stat_summarizer.cc +++ /dev/null @@ -1,406 +0,0 @@ -// Copyright 2018 Xiaomi, Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "mace/benchmark/stat_summarizer.h" - -#include -#include -#include -#include - -#include "mace/public/mace.h" -#include "mace/utils/logging.h" -#include "mace/core/types.h" -#include "mace/kernels/conv_pool_2d_util.h" - -namespace mace { -namespace benchmark { - -StatSummarizer::StatSummarizer(const StatSummarizerOptions &options) - : options_(options) {} - -StatSummarizer::~StatSummarizer() {} - -void StatSummarizer::Reset() { - run_total_us_.Reset(); - memory_.Reset(); - details_.clear(); -} - -void StatSummarizer::ProcessMetadata(const RunMetadata &run_metadata) { - int64_t curr_total_us = 0; - int64_t mem_total = 0; - - if (run_metadata.op_stats.empty()) { - std::cerr << "Runtime op stats should not be empty" << std::endl; - abort(); - } - int64_t first_node_start_us = run_metadata.op_stats[0].stats.start_micros; - - int node_num = 0; - for (const auto &ops : run_metadata.op_stats) { - std::string name = ops.operator_name; - std::string op_type = ops.type; - - ++node_num; - const int64_t curr_time = ops.stats.end_micros - ops.stats.start_micros; - curr_total_us += curr_time; - auto result = details_.emplace(name, Detail()); - Detail *detail = &(result.first->second); - - detail->start_us.UpdateStat(ops.stats.start_micros - first_node_start_us); - detail->rel_end_us.UpdateStat(curr_time); - - // If this is the first pass, initialize some values. - if (result.second) { - detail->name = name; - detail->type = op_type; - detail->output_shape = ops.output_shape; - detail->args = ops.args; - - detail->run_order = node_num; - - detail->times_called = 0; - } - - ++detail->times_called; - } - - run_total_us_.UpdateStat(curr_total_us); - memory_.UpdateStat(mem_total); -} - -std::string StatSummarizer::ShortSummary() const { - std::stringstream stream; - stream << "Timings (microseconds): "; - run_total_us_.OutputToStream(&stream); - stream << std::endl; - - stream << "Memory (bytes): "; - memory_.OutputToStream(&stream); - stream << std::endl; - - stream << details_.size() << " nodes observed" << std::endl; - return stream.str(); -} - -std::ostream &InitField(std::ostream &stream, int width) { - stream << std::right << std::setw(width) << std::fixed - << std::setprecision(3); - return stream; -} - -std::string StatSummarizer::HeaderString(const std::string &title) const { - std::stringstream stream; - - stream << "============================== " << title - << " ==============================" << std::endl; - - InitField(stream, 24) << "[node type]"; - InitField(stream, 9) << "[start]"; - InitField(stream, 9) << "[first]"; - InitField(stream, 9) << "[avg ms]"; - InitField(stream, 9) << "[%]"; - InitField(stream, 9) << "[cdf%]"; - InitField(stream, 10) << "[mem KB]"; - InitField(stream, 10) << "[Name]"; - InitField(stream, 8) << "[stride]"; - InitField(stream, 10) << "[padding]"; - InitField(stream, 10) << "[dilation]"; - InitField(stream, 15) << "[kernel]"; - stream << std::right << std::setw(45) << "[output shape]"; - - return stream.str(); -} - -std::string PaddingTypeToString(int padding_type) { - std::stringstream stream; - Padding type = static_cast(padding_type); - switch (type) { - case VALID: stream << "VALID"; break; - case SAME: stream << "SAME"; break; - case FULL: stream << "FULL"; break; - default: stream << padding_type; break; - } - - return stream.str(); -} - -std::string ShapeToString(const std::vector &output_shape) { - if (output_shape.empty()) { - return ""; - } - - std::stringstream stream; - stream << "["; - for (int i = 0; i < output_shape.size(); ++i) { - const std::vector &dims = output_shape[i].dims(); - for (int j = 0; j < dims.size(); ++j) { - stream << dims[j]; - if (j != dims.size() - 1) { - stream << ","; - } - } - if (i != output_shape.size() - 1) { - stream << ":"; - } - } - stream << "]"; - - return stream.str(); -} - -template -std::string VectorToString(const std::vector &vec) { - if (vec.empty()) { - return ""; - } - - std::stringstream stream; - stream << "["; - for (int i = 0; i < vec.size(); ++i) { - stream << vec[i]; - if (i != vec.size() - 1) { - stream << ","; - } - } - stream << "]"; - - return stream.str(); -} - -std::string StatSummarizer::ColumnString(const StatSummarizer::Detail &detail, - const int64_t cumulative_stat_on_node, - const Stat &stat) const { - const double start_ms = detail.start_us.avg() / 1000.0; - const double first_time_ms = detail.rel_end_us.first() / 1000.0; - const double avg_time_ms = detail.rel_end_us.avg() / 1000.0; - const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum(); - const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum(); - - std::stringstream stream; - InitField(stream, 24) << detail.type; - InitField(stream, 9) << start_ms; - InitField(stream, 9) << first_time_ms; - InitField(stream, 9) << avg_time_ms; - InitField(stream, 8) << percentage << "%"; - InitField(stream, 8) << cdf_percentage << "%"; - InitField(stream, 10) << detail.mem_used.newest() / 1000.0; - InitField(stream, 10) << detail.name; - InitField(stream, 8) << VectorToString(detail.args.strides); - if (detail.args.padding_type != -1) { - InitField(stream, 10) << PaddingTypeToString(detail.args.padding_type); - } else { - InitField(stream, 10) << VectorToString(detail.args.paddings); - } - InitField(stream, 10) << VectorToString(detail.args.dilations); - InitField(stream, 15) << VectorToString(detail.args.kernels); - stream << std::right << std::setw(45) << ShapeToString(detail.output_shape); - - return stream.str(); -} - -void StatSummarizer::OrderNodesByMetric( - SortingMetric metric, std::vector *details) const { - std::priority_queue> sorted_list; - const int num_nodes = details_.size(); - - for (const auto &det : details_) { - const Detail *detail = &(det.second); - - std::stringstream stream; - stream << std::setw(20) << std::right << std::setprecision(10) - << std::fixed; - - switch (metric) { - case BY_NAME: - stream << detail->name; - break; - case BY_RUN_ORDER: - stream << num_nodes - detail->run_order; - break; - case BY_TIME: - stream << detail->rel_end_us.avg(); - break; - case BY_MEMORY: - stream << detail->mem_used.avg(); - break; - case BY_TYPE: - stream << detail->type; - break; - default: - stream << ""; - break; - } - - sorted_list.emplace(stream.str(), detail); - } - - while (!sorted_list.empty()) { - auto entry = sorted_list.top(); - sorted_list.pop(); - details->push_back(entry.second); - } -} - -void StatSummarizer::ComputeStatsByType( - std::map *node_type_map_count, - std::map *node_type_map_time, - std::map *node_type_map_memory, - std::map *node_type_map_times_called, - int64_t *accumulated_us) const { - int64_t run_count = run_total_us_.count(); - - for (const auto &det : details_) { - const std::string node_name = det.first; - const Detail &detail = det.second; - - int64_t curr_time_val = - static_cast(detail.rel_end_us.sum() / run_count); - *accumulated_us += curr_time_val; - - int64_t curr_memory_val = detail.mem_used.newest(); - - const std::string &node_type = detail.type; - - (*node_type_map_count)[node_type] += 1; - (*node_type_map_time)[node_type] += curr_time_val; - (*node_type_map_memory)[node_type] += curr_memory_val; - (*node_type_map_times_called)[node_type] += detail.times_called / run_count; - } -} - -std::string StatSummarizer::GetStatsByNodeType() const { - std::stringstream stream; - - stream << "============================== Summary by node type " - "==============================" - << std::endl; - - LOG(INFO) << "Number of nodes executed: " << details_.size() << std::endl; - - std::map node_type_map_count; - std::map node_type_map_time; - std::map node_type_map_memory; - std::map node_type_map_times_called; - int64_t accumulated_us = 0; - - ComputeStatsByType(&node_type_map_count, &node_type_map_time, - &node_type_map_memory, &node_type_map_times_called, - &accumulated_us); - - // Sort them. - std::priority_queue>> - timings; - for (const auto &node_type : node_type_map_time) { - const int64_t mem_used = node_type_map_memory[node_type.first]; - timings.emplace(node_type.second, - std::pair(node_type.first, mem_used)); - } - - InitField(stream, 24) << "[Node type]"; - InitField(stream, 9) << "[count]"; - InitField(stream, 10) << "[avg ms]"; - InitField(stream, 11) << "[avg %]"; - InitField(stream, 11) << "[cdf %]"; - InitField(stream, 10) << "[mem KB]"; - InitField(stream, 10) << "[times called]"; - stream << std::endl; - - float cdf = 0.0f; - while (!timings.empty()) { - auto entry = timings.top(); - timings.pop(); - - const std::string node_type = entry.second.first; - const float memory = entry.second.second / 1000.0f; - - const int64_t node_type_total_us = entry.first; - const float time_per_run_ms = node_type_total_us / 1000.0f; - - const float percentage = - ((entry.first / static_cast(accumulated_us)) * 100.0f); - cdf += percentage; - - InitField(stream, 24) << node_type; - InitField(stream, 9) << node_type_map_count[node_type]; - InitField(stream, 10) << time_per_run_ms; - InitField(stream, 10) << percentage << "%"; - InitField(stream, 10) << cdf << "%"; - InitField(stream, 10) << memory; - InitField(stream, 9) << node_type_map_times_called[node_type]; - stream << std::endl; - } - stream << std::endl; - return stream.str(); -} - -std::string StatSummarizer::GetStatsByMetric(const std::string &title, - SortingMetric sorting_metric, - int num_stats) const { - std::vector details; - OrderNodesByMetric(sorting_metric, &details); - - double cumulative_stat_on_node = 0; - - std::stringstream stream; - stream << HeaderString(title) << std::endl; - int stat_num = 0; - for (auto detail : details) { - ++stat_num; - if (num_stats > 0 && stat_num > num_stats) { - break; - } - - cumulative_stat_on_node += detail->rel_end_us.sum(); - stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_) - << std::endl; - } - stream << std::endl; - return stream.str(); -} - -std::string StatSummarizer::GetOutputString() const { - std::stringstream stream; - if (options_.show_run_order) { - stream << GetStatsByMetric("Run Order", BY_RUN_ORDER, - options_.run_order_limit); - } - if (options_.show_time) { - stream << GetStatsByMetric("Top by Computation Time", BY_TIME, - options_.time_limit); - } - if (options_.show_memory) { - stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY, - options_.memory_limit); - } - if (options_.show_type) { - stream << GetStatsByNodeType(); - } - if (options_.show_summary) { - stream << ShortSummary() << std::endl; - } - return stream.str(); -} - -void StatSummarizer::PrintOperatorStats() const { - std::string output = GetOutputString(); - std::istringstream iss(output); - for (std::string line; std::getline(iss, line);) { - LOG(INFO) << line; - } -} - -} // namespace benchmark -} // namespace mace diff --git a/mace/benchmark/stat_summarizer.h b/mace/benchmark/stat_summarizer.h deleted file mode 100644 index e457dfc39ecb25a7bc038e3bc7d5ba0b2409866c..0000000000000000000000000000000000000000 --- a/mace/benchmark/stat_summarizer.h +++ /dev/null @@ -1,216 +0,0 @@ -// Copyright 2018 Xiaomi, Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef MACE_BENCHMARK_STAT_SUMMARIZER_H_ -#define MACE_BENCHMARK_STAT_SUMMARIZER_H_ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "mace/public/mace_types.h" - -namespace mace { - -class RunMetadata; - -namespace benchmark { - -template -class Stat { - public: - void UpdateStat(ValueType v) { - if (count_ == 0) { - first_ = v; - } - - newest_ = v; - max_ = std::max(v, max_); - min_ = std::min(v, min_); - ++count_; - sum_ += v; - squared_sum_ += static_cast(v) * v; - } - - void Reset() { new (this) Stat(); } - - bool empty() const { return count_ == 0; } - - ValueType first() const { return first_; } - - ValueType newest() const { return newest_; } - - ValueType max() const { return max_; } - - ValueType min() const { return min_; } - - int64_t count() const { return count_; } - - ValueType sum() const { return sum_; } - - HighPrecisionValueType squared_sum() const { return squared_sum_; } - - bool all_same() const { return (count_ == 0 || min_ == max_); } - - HighPrecisionValueType avg() const { - return empty() ? std::numeric_limits::quiet_NaN() - : static_cast(sum_) / count_; - } - - ValueType std_deviation() const { - return all_same() ? 0 : std::sqrt(squared_sum_ / count_ - avg() * avg()); - } - - void OutputToStream(std::ostream *stream) const { - if (empty()) { - *stream << "count=0"; - } else if (all_same()) { - *stream << "count=" << count_ << " curr=" << newest_; - if (count_ > 1) *stream << "(all same)"; - } else { - *stream << "count=" << count_ << " first=" << first_ - << " curr=" << newest_ << " min=" << min_ << " max=" << max_ - << " avg=" << avg() << " std=" << std_deviation(); - } - } - - friend std::ostream &operator<<(std::ostream &stream, - const Stat &stat) { - stat.OutputToStream(&stream); - return stream; - } - - private: - ValueType first_ = 0; - ValueType newest_ = 0; - ValueType max_ = std::numeric_limits::min(); - ValueType min_ = std::numeric_limits::max(); - int64_t count_ = 0; - ValueType sum_ = 0; - HighPrecisionValueType squared_sum_ = 0; -}; - -// Used to control the output of the statistics summarizer; -class StatSummarizerOptions { - public: - StatSummarizerOptions() - : show_run_order(true), - run_order_limit(0), - show_time(true), - time_limit(10), - show_memory(true), - memory_limit(10), - show_type(true), - show_summary(true) {} - - bool show_run_order; - int run_order_limit; - bool show_time; - int time_limit; - bool show_memory; - int memory_limit; - bool show_type; - bool show_summary; -}; - -// A StatSummarizer assists in performance analysis of Graph executions. -// -// It summarizes time spent executing (on GPU/CPU), memory used etc. across -// multiple executions of a single Graph from the StepStats collected during -// graph execution. -// -// See tensorflow/tools/benchmark/benchmark_model.cc for an example usage. -class StatSummarizer { - public: - enum SortingMetric { - BY_NAME, - BY_RUN_ORDER, - BY_TIME, - BY_MEMORY, - BY_TYPE, - }; - - explicit StatSummarizer(const StatSummarizerOptions &options); - - ~StatSummarizer(); - - // Adds another run's StepStats output to the aggregate counts. - void ProcessMetadata(const RunMetadata &run_metadata); - - // Returns a string detailing the accumulated runtime stats in a tab-separated - // format which can be pasted into a spreadsheet for further analysis. - std::string GetOutputString() const; - - std::string ShortSummary() const; - - // Prints the string returned by GetOutputString(). - void PrintOperatorStats() const; - - void ComputeStatsByType( - std::map *node_type_map_count, - std::map *node_type_map_time, - std::map *node_type_map_memory, - std::map *node_type_map_times_called, - int64_t *accumulated_us) const; - - std::string GetStatsByNodeType() const; - - std::string GetStatsByMetric(const std::string &title, - SortingMetric sorting_metric, - int num_stats) const; - - void Reset(); - - // Returns number of runs. - int num_runs() const { return run_total_us_.count(); } - - // Returns stats of total microseconds spent by all nodes in each run. - const Stat &run_total_us() const { return run_total_us_; } - - private: - struct Detail { - std::string name; - std::string type; - std::vector output_shape; - ConvPoolArgs args; - int64_t run_order; - Stat start_us; - Stat rel_end_us; - Stat mem_used; - int64_t times_called; - }; - - void OrderNodesByMetric(SortingMetric sorting_metric, - std::vector *details) const; - - std::string HeaderString(const std::string &title) const; - std::string ColumnString(const Detail &detail, - const int64_t cumulative_stat_on_node, - const Stat &stat) const; - Stat run_total_us_; - Stat memory_; - - std::map details_; - StatSummarizerOptions options_; -}; - -} // namespace benchmark -} // namespace mace - -#endif // MACE_BENCHMARK_STAT_SUMMARIZER_H_ diff --git a/mace/benchmark/statistics.cc b/mace/benchmark/statistics.cc new file mode 100644 index 0000000000000000000000000000000000000000..60ca2fd542f6d8651ea086edc22c951ed9f055ee --- /dev/null +++ b/mace/benchmark/statistics.cc @@ -0,0 +1,269 @@ +// Copyright 2018 Xiaomi, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/benchmark/statistics.h" + +#include + +#include "mace/kernels/conv_pool_2d_util.h" +#include "mace/public/mace_types.h" +#include "mace/utils/logging.h" +#include "mace/utils/string_util.h" + +namespace mace { +namespace benchmark { + +namespace { +std::string MetricToString(const Metric metric) { + switch (metric) { + case NAME: + return "Name"; + case RUN_ORDER: + return "Run Order"; + case COMPUTATION_TIME: + return "Computation Time"; + default: + return ""; + } +} + +std::string PaddingTypeToString(int padding_type) { + std::stringstream stream; + Padding type = static_cast(padding_type); + switch (type) { + case VALID: stream << "VALID"; break; + case SAME: stream << "SAME"; break; + case FULL: stream << "FULL"; break; + default: stream << padding_type; break; + } + + return stream.str(); +} + +std::string ShapeToString(const std::vector &output_shape) { + if (output_shape.empty()) { + return ""; + } + + std::stringstream stream; + stream << "["; + for (int i = 0; i < output_shape.size(); ++i) { + const std::vector &dims = output_shape[i].dims(); + for (int j = 0; j < dims.size(); ++j) { + stream << dims[j]; + if (j != dims.size() - 1) { + stream << ","; + } + } + if (i != output_shape.size() - 1) { + stream << ":"; + } + } + stream << "]"; + + return stream.str(); +} + +template +std::string VectorToString(const std::vector &vec) { + if (vec.empty()) { + return ""; + } + + std::stringstream stream; + stream << "["; + for (int i = 0; i < vec.size(); ++i) { + stream << vec[i]; + if (i != vec.size() - 1) { + stream << ","; + } + } + stream << "]"; + + return stream.str(); +} + +} // namespace + +void OpStat::StatMetadata(const RunMetadata &meta_data) { + if (meta_data.op_stats.empty()) { + LOG(FATAL) << "Op metadata should not be empty"; + } + int64_t order_idx = 0; + int64_t total_time = 0; + + const int64_t first_op_start_time = meta_data.op_stats[0].stats.start_micros; + + for (auto &op_stat : meta_data.op_stats) { + auto result = records_.emplace(op_stat.operator_name, Record()); + Record *record = &(result.first->second); + + if (result.second) { + record->name = op_stat.operator_name; + record->type = op_stat.type; + record->args = op_stat.args; + record->output_shape = op_stat.output_shape; + record->order = order_idx; + order_idx += 1; + } + record->start.UpdateTime(op_stat.stats.start_micros - first_op_start_time); + int64_t run_time = op_stat.stats.end_micros - op_stat.stats.start_micros; + record->rel_end.UpdateTime(run_time); + record->called_times += 1; + total_time += run_time; + } + total_time_.UpdateTime(total_time); +} + +std::string OpStat::StatByMetric(const Metric metric, + const int top_limit) const { + if (records_.empty()) { + return ""; + } + // sort + std::vector records; + for (auto &record : records_) { + records.push_back(record.second); + } + std::sort(records.begin(), records.end(), + [=](const Record &lhs, const Record &rhs) { + if (metric == RUN_ORDER) { + return lhs.order < rhs.order; + } else if (metric == NAME) { + return lhs.name.compare(rhs.name) < 0; + } else { + return lhs.rel_end.avg() > rhs.rel_end.avg(); + } + }); + + // generate string + std::string title = "Sort by " + MetricToString(metric); + const std::vector header = { + "Node Type", "Start", "First", "Avg(ms)", "%", "cdf%", + "Stride", "Pad", "Filter Shape", "Output Shape", "Dilation", "name" + }; + std::vector> data; + int count = top_limit; + if (top_limit <= 0) count = static_cast(records.size()); + + int64_t accumulate_time = 0; + for (int i = 0; i < count; ++i) { + Record &record = records[i]; + accumulate_time += record.rel_end.sum(); + + std::vector tuple; + tuple.push_back(record.type); + tuple.push_back(FloatToString(record.start.avg() / 1000.0f, 3)); + tuple.push_back(FloatToString(record.rel_end.first() / 1000.0f, 3)); + tuple.push_back(FloatToString(record.rel_end.avg() / 1000.0f, 3)); + tuple.push_back( + FloatToString(record.rel_end.sum() * 100.f / total_time_.sum(), 3)); + tuple.push_back( + FloatToString(accumulate_time * 100.f / total_time_.sum(), 3)); + tuple.push_back(VectorToString(record.args.strides)); + if (record.args.padding_type != -1) { + tuple.push_back(PaddingTypeToString(record.args.padding_type)); + } else { + tuple.push_back(VectorToString(record.args.paddings)); + } + tuple.push_back(VectorToString(record.args.kernels)); + tuple.push_back(ShapeToString(record.output_shape)); + tuple.push_back(VectorToString(record.args.dilations)); + tuple.push_back(record.name); + data.emplace_back(tuple); + } + return mace::string_util::StringFormatter::Table(title, header, data); +} + +std::string OpStat::StatByNodeType() const { + if (records_.empty()) { + return ""; + } + const int64_t round = total_time_.round(); + int64_t total_time = 0; + std::map type_time_map; + std::map type_count_map; + std::map type_called_times_map; + std::set node_types_set; + for (auto &record : records_) { + std::string node_type = record.second.type; + node_types_set.insert(node_type); + + type_time_map[node_type] += record.second.rel_end.sum() / round; + total_time += record.second.rel_end.sum() / round; + type_count_map[node_type] += 1; + type_called_times_map[node_type] += record.second.called_times / round; + } + std::vector node_types(node_types_set.begin(), + node_types_set.end()); + std::sort(node_types.begin(), node_types.end(), + [&](const std::string &lhs, const std::string &rhs) { + return type_time_map[lhs] > type_time_map[rhs]; + }); + + std::string title = "Stat by node type"; + const std::vector header = { + "Node Type", "Count", "Avg(ms)", "%", "cdf%", "Called times" + }; + + float cdf = 0.0f; + std::vector> data; + for (auto type : node_types) { + const float avg_time = type_time_map[type] / 1000.0f; + const float percentage = type_time_map[type] * 100.0f / total_time; + cdf += percentage; + + std::vector tuple; + tuple.push_back(type); + tuple.push_back(IntToString(type_count_map[type])); + tuple.push_back(FloatToString(avg_time, 3)); + tuple.push_back(FloatToString(percentage, 3)); + tuple.push_back(FloatToString(cdf, 3)); + tuple.push_back(IntToString(type_called_times_map[type])); + data.emplace_back(tuple); + } + return mace::string_util::StringFormatter::Table(title, header, data); +} + +std::string OpStat::Summary() const { + std::stringstream stream; + if (!records_.empty()) { + stream << total_time_.ToString("Summary") << std::endl; + } + + stream << records_.size() << " ops total." << std::endl; + + return stream.str(); +} + +void OpStat::PrintStat() const { + std::stringstream stream; + if (!records_.empty()) { + // op stat by run order + stream << StatByMetric(Metric::RUN_ORDER, 0) << std::endl; + // top-10 op stat by time + stream << StatByMetric(Metric::COMPUTATION_TIME, 10) << std::endl; + // op stat by node type + stream << StatByNodeType() << std::endl; + } + // Print summary + stream << Summary(); + + for (std::string line; std::getline(stream, line);) { + LOG(INFO) << line; + } +} + +} // namespace benchmark +} // namespace mace diff --git a/mace/benchmark/statistics.h b/mace/benchmark/statistics.h new file mode 100644 index 0000000000000000000000000000000000000000..056df9f4cdf371063937fc3acce6eaa92f555985 --- /dev/null +++ b/mace/benchmark/statistics.h @@ -0,0 +1,159 @@ +// Copyright 2018 Xiaomi, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_BENCHMARK_STATISTICS_H_ +#define MACE_BENCHMARK_STATISTICS_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "mace/kernels/conv_pool_2d_util.h" +#include "mace/utils/string_util.h" + +namespace mace { + +class RunMetadata; + +namespace benchmark { + +template +std::string IntToString(const IntType v) { + std::stringstream stream; + stream << v; + return stream.str(); +} + +template +std::string FloatToString(const FloatType v, const int32_t precision) { + std::stringstream stream; + stream << std::fixed << std::setprecision(precision) << v; + return stream.str(); +} +// microseconds +template +class TimeInfo { + public: + TimeInfo():round_(0), first_(0), curr_(0), + min_(std::numeric_limits::max()), max_(0), + sum_(0), square_sum(0) + {} + + const int64_t round() const { + return round_; + } + + const T first() const { + return first_; + } + + const T sum() const { + return sum_; + } + + const double avg() const { + return round_ == 0 ? std::numeric_limits::quiet_NaN() : + sum_ * 1.0f / round_; + } + + const double std_deviation() const { + if (round_ == 0 || min_ == max_) { + return 0; + } + const double avg_value = avg(); + return std::sqrt(square_sum / round_ - avg_value * avg_value); + } + + void UpdateTime(const T time) { + if (round_ == 0) { + first_ = time; + } + + curr_ = time; + min_ = std::min(min_, time); + max_ = std::max(max_, time); + + sum_ += time; + square_sum += static_cast(time) * time; + round_ += 1; + } + + std::string ToString(const std::string &title) const { + std::vector header = { + "round", "first(ms)", "curr(ms)", + "min(ms)", "max(ms)", + "avg(ms)", "std" + }; + std::vector> data(1); + data[0].push_back(IntToString(round_)); + data[0].push_back(FloatToString(first_ / 1000.0, 3)); + data[0].push_back(FloatToString(curr_ / 1000.0, 3)); + data[0].push_back(FloatToString(min_ / 1000.0, 3)); + data[0].push_back(FloatToString(max_ / 1000.0, 3)); + data[0].push_back(FloatToString(avg() / 1000.0, 3)); + data[0].push_back(FloatToString(std_deviation(), 3)); + return mace::string_util::StringFormatter::Table(title, header, data); + } + + private: + T first_; + T curr_; + T min_; + T max_; + T sum_; + int64_t round_; + double square_sum; +}; + +enum Metric { + NAME, + RUN_ORDER, + COMPUTATION_TIME, +}; + +class OpStat{ + public: + void StatMetadata(const RunMetadata &meta_data); + + void PrintStat() const; + + private: + std::string StatByMetric(const Metric metric, + const int top_limit) const; + std::string StatByNodeType() const; + std::string Summary() const; + + private: + struct Record{ + std::string name; + std::string type; + std::vector output_shape; + ConvPoolArgs args; + int64_t order; + TimeInfo start; + TimeInfo rel_end; + int64_t called_times; + }; + + std::map records_; + TimeInfo total_time_; +}; + +} // namespace benchmark +} // namespace mace +#endif // MACE_BENCHMARK_STATISTICS_H_ diff --git a/mace/utils/BUILD b/mace/utils/BUILD index 85e0647d2db75971ecca95f8e9a251befdfd5f26..57bec3d0ea8714ddb272a0117b96ae39567ced37 100644 --- a/mace/utils/BUILD +++ b/mace/utils/BUILD @@ -14,6 +14,7 @@ cc_library( srcs = [ "command_line_flags.cc", "logging.cc", + "string_util.cc", ], hdrs = [ "command_line_flags.h", diff --git a/mace/utils/string_util.cc b/mace/utils/string_util.cc new file mode 100644 index 0000000000000000000000000000000000000000..e41ba52220f486c0e4ba07814dcdc875994b57f4 --- /dev/null +++ b/mace/utils/string_util.cc @@ -0,0 +1,85 @@ +// Copyright 2018 Xiaomi, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/utils/string_util.h" + +#include +#include +#include +#include +#include +#include +#include + +namespace mace { +namespace string_util { + +std::ostream &FormatRow(std::ostream &stream, int width) { + stream << std::right << std::setw(width); + return stream; +} + +std::string StringFormatter::Table( + const std::string &title, + const std::vector &header, + const std::vector> &data) { + if (header.empty()) return ""; + const size_t column_size = header.size(); + const size_t data_size = data.size(); + std::vector max_column_len(header.size(), 0); + for (size_t col_idx = 0; col_idx < column_size; ++col_idx) { + max_column_len[col_idx] = std::max( + max_column_len[col_idx], static_cast(header[col_idx].size())); + for (size_t data_idx = 0; data_idx < data_size; ++data_idx) { + if (col_idx < data[data_idx].size()) { + max_column_len[col_idx] = std::max( + max_column_len[col_idx], + static_cast(data[data_idx][col_idx].size())); + } + } + } + const size_t row_length = + std::accumulate(max_column_len.begin(), max_column_len.end(), + 0, std::plus()) + + 2 * column_size + column_size + 1; + const std::string dash_line(row_length, '-'); + std::stringstream stream; + stream << dash_line << std::endl; + FormatRow(stream, static_cast(row_length / 2 + title.size() / 2)) + << title << std::endl; + stream << dash_line << std::endl; + // format header + stream << "|"; + for (size_t h_idx = 0; h_idx < column_size; ++h_idx) { + stream << " "; + FormatRow(stream, max_column_len[h_idx]) << header[h_idx]; + stream << " |"; + } + stream << std::endl << dash_line << std::endl; + // format data + for (size_t data_idx = 0; data_idx < data_size; ++data_idx) { + stream << "|"; + for (size_t h_idx = 0; h_idx < column_size; ++h_idx) { + stream << " "; + FormatRow(stream, max_column_len[h_idx]) << data[data_idx][h_idx]; + stream << " |"; + } + stream << std::endl; + } + stream << dash_line << std::endl; + return stream.str(); +} + +} // namespace string_util +} // namespace mace diff --git a/mace/utils/string_util.h b/mace/utils/string_util.h index 7727d24c92f1cff495f17b79a4e68969870b42ae..e95bd9024c4788aca9e3d07bf7f4afd45fd7bfcf 100644 --- a/mace/utils/string_util.h +++ b/mace/utils/string_util.h @@ -37,6 +37,13 @@ inline void MakeStringInternal(std::stringstream &ss, MakeStringInternal(ss, args...); } +class StringFormatter { + public: + static std::string Table(const std::string &title, + const std::vector &header, + const std::vector> &data); +}; + } // namespace string_util template