From 846ebc418b75f97633cae84d784be6d640431250 Mon Sep 17 00:00:00 2001
From: liuqi <liuqi10@xiaomi.com>
Date: Thu, 22 Feb 2018 17:54:26 +0800
Subject: [PATCH] Remove unused benchmark code.

---
 mace/tools/benchmark/BUILD              |  28 --
 mace/tools/benchmark/benchmark_model.cc | 349 ------------------------
 mace/tools/benchmark/stat_summarizer.cc | 316 ---------------------
 mace/tools/benchmark/stat_summarizer.h  | 199 --------------
 4 files changed, 892 deletions(-)
 delete mode 100644 mace/tools/benchmark/BUILD
 delete mode 100644 mace/tools/benchmark/benchmark_model.cc
 delete mode 100644 mace/tools/benchmark/stat_summarizer.cc
 delete mode 100644 mace/tools/benchmark/stat_summarizer.h
diff --git a/mace/tools/benchmark/BUILD b/mace/tools/benchmark/BUILD
deleted file mode 100644
index 5241146b..00000000
--- a/mace/tools/benchmark/BUILD
+++ /dev/null
@@ -1,28 +0,0 @@
-# Benchmark
-load("//mace:mace.bzl", "if_android")
-
-licenses(["notice"])  # Apache 2.0
-
-cc_library(
-    name = "stat_summarizer",
-    srcs = ["stat_summarizer.cc"],
-    hdrs = ["stat_summarizer.h"],
-    linkstatic = 1,
-    deps = [
-        "//mace/core",
-    ],
-)
-
-cc_binary(
-    name = "benchmark_model",
-    srcs = [
-        "benchmark_model.cc",
-    ],
-    linkstatic = 1,
-    deps = [
-        ":stat_summarizer",
-        "//mace/core",
-        "//mace/ops",
-        "//mace/utils:command_line_flags",
-    ],
-)
diff --git a/mace/tools/benchmark/benchmark_model.cc b/mace/tools/benchmark/benchmark_model.cc
deleted file mode 100644
index 06816386..00000000
--- a/mace/tools/benchmark/benchmark_model.cc
+++ /dev/null
@@ -1,349 +0,0 @@
-//
-// Copyright (c) 2017 XiaoMi All rights reserved.
-//
-
-#include "mace/core/net.h"
-#include "mace/core/runtime/opencl/opencl_runtime.h"
-#include "mace/tools/benchmark/stat_summarizer.h"
-#include "mace/utils/command_line_flags.h"
-#include "mace/utils/utils.h"
-
-#include <fstream>
-#include <thread>
-
-namespace mace {
-namespace str_util {
-
-std::vector<std::string> Split(const string &str, char delims) {
-  std::vector<std::string> result;
-  string tmp = str;
-  while (!tmp.empty()) {
-    result.push_back(tmp.data());
-    size_t next_offset = tmp.find(delims);
-    if (next_offset == string::npos) {
-      break;
-    } else {
-      tmp = tmp.substr(next_offset + 1);
-    }
-  }
-  return result;
-}
-
-bool SplitAndParseToInts(const string &str,
-                         char delims,
-                         std::vector<index_t> *result) {
-  string tmp = str;
-  while (!tmp.empty()) {
-    index_t dim = atoi(tmp.data());
-    result->push_back(dim);
-    size_t next_offset = tmp.find(delims);
-    if (next_offset == string::npos) {
-      break;
-    } else {
-      tmp = tmp.substr(next_offset + 1);
-    }
-  }
-  return true;
-}
-
-}  //  namespace str_util
-
-namespace benchmark {
-
-bool RunInference(NetBase *net,
-                  StatSummarizer *summarizer,
-                  int64_t *inference_time_us) {
-  RunMetadata run_metadata;
-  RunMetadata *run_metadata_ptr = nullptr;
-  if (summarizer) {
-    run_metadata_ptr = &run_metadata;
-  }
-  const int64_t start_time = NowInMicroSec();
-  bool s = net->Run(run_metadata_ptr);
-  const int64_t end_time = NowInMicroSec();
-
-  if (!s) {
-    LOG(ERROR) << "Error during inference.";
-    return s;
-  }
-  *inference_time_us = end_time - start_time;
-
-  if (summarizer != nullptr) {
-    summarizer->ProcessMetadata(run_metadata);
-  }
-
-  return true;
-}
-
-bool Run(NetBase *net,
-         StatSummarizer *summarizer,
-         int num_runs,
-         double max_time_sec,
-         int64_t sleep_sec,
-         int64_t *total_time_us,
-         int64_t *actual_num_runs) {
-  *total_time_us = 0;
-
-  LOG(INFO) << "Running benchmark for max " << num_runs << " iterators, max "
-            << max_time_sec << " seconds "
-            << (summarizer != nullptr ? "with " : "without ")
-            << "detailed stat logging, with " << sleep_sec
-            << "s sleep between inferences";
-
-  Stat<int64_t> stat;
-
-  bool util_max_time = (num_runs <= 0);
-  for (int i = 0; util_max_time || i < num_runs; ++i) {
-    int64_t inference_time_us = 0;
-    bool s = RunInference(net, summarizer, &inference_time_us);
-    stat.UpdateStat(inference_time_us);
-    (*total_time_us) += inference_time_us;
-    ++(*actual_num_runs);
-
-    if (max_time_sec > 0 && (*total_time_us / 1000000.0) > max_time_sec) {
-      break;
-    }
-
-    if (!s) {
-      LOG(INFO) << "Failed on run " << i;
-      return s;
-    }
-
-    if (sleep_sec > 0) {
-      std::this_thread::sleep_for(std::chrono::seconds(sleep_sec));
-    }
-  }
-
-  std::stringstream stream;
-  stat.OutputToStream(&stream);
-  LOG(INFO) << stream.str();
-
-  return true;
-}
-
-int Main(int argc, char **argv) {
-  std::string model_file = "/data/local/tmp/mobi_mace.pb";
-  std::string device = "CPU";
-  std::string input_layer_string = "input:0";
-  std::string input_layer_shape_string = "1,224,224,3";
-  std::string input_layer_type_string = "float";
-  std::string input_layer_files_string = "";
-  std::string output_layer_string = "output:0";
-  int max_num_runs = 10;
-  std::string max_time = "10.0";
-  std::string inference_delay = "-1";
-  std::string inter_benchmark_delay = "-1";
-  int num_threads = -1;
-  std::string benchmark_name = "";
-  std::string output_prefix = "";
-  bool show_sizes = false;
-  bool show_run_order = true;
-  int run_order_limit = 0;
-  bool show_time = true;
-  int time_limit = 10;
-  bool show_memory = true;
-  int memory_limit = 10;
-  bool show_type = true;
-  bool show_summary = true;
-  bool show_flops = false;
-  int warmup_runs = 2;
-
-  std::vector<Flag> flag_list = {
-      Flag("model_file", &model_file, "graph file name"),
-      Flag("device", &device, "CPU/NEON/OPENCL"),
-      Flag("input_layer", &input_layer_string, "input layer names"),
-      Flag("input_layer_shape", &input_layer_shape_string, "input layer shape"),
-      Flag("input_layer_type", &input_layer_type_string, "input layer type"),
-      Flag("input_layer_files", &input_layer_files_string,
-           "files to initialize the inputs with"),
-      Flag("output_layer", &output_layer_string, "output layer name"),
-      Flag("max_num_runs", &max_num_runs, "number of runs max"),
-      Flag("max_time", &max_time, "length to run max"),
-      Flag("inference_delay", &inference_delay,
-           "delay between runs in seconds"),
-      Flag("inter_benchmark_delay", &inter_benchmark_delay,
-           "delay between benchmarks in seconds"),
-      Flag("num_threads", &num_threads, "number of threads"),
-      Flag("benchmark_name", &benchmark_name, "benchmark name"),
-      Flag("output_prefix", &output_prefix, "benchmark output prefix"),
-      Flag("show_sizes", &show_sizes, "whether to show sizes"),
-      Flag("show_run_order", &show_run_order,
-           "whether to list stats by run order"),
-      Flag("run_order_limit", &run_order_limit,
-           "how many items to show by run order"),
-      Flag("show_time", &show_time, "whether to list stats by time taken"),
-      Flag("time_limit", &time_limit, "how many items to show by time taken"),
-      Flag("show_memory", &show_memory, "whether to list stats by memory used"),
-      Flag("memory_limit", &memory_limit,
-           "how many items to show by memory used"),
-      Flag("show_type", &show_type, "whether to list stats by op type"),
-      Flag("show_summary", &show_summary,
-           "whether to show a summary of the stats"),
-      Flag("show_flops", &show_flops, "whether to estimate the model's FLOPs"),
-      Flag("warmup_runs", &warmup_runs, "how many runs to initialize model"),
-  };
-  string usage = Flags::Usage(argv[0], flag_list);
-  const bool parse_result = Flags::Parse(&argc, argv, flag_list);
-
-  if (!parse_result) {
-    LOG(ERROR) << usage;
-    return -1;
-  }
-
-  std::vector<std::string> input_layers =
-      str_util::Split(input_layer_string, ',');
-  std::vector<std::string> input_layer_shapes =
-      str_util::Split(input_layer_shape_string, ':');
-  std::vector<string> input_layer_types =
-      str_util::Split(input_layer_type_string, ',');
-  std::vector<string> input_layer_files =
-      str_util::Split(input_layer_files_string, ':');
-  std::vector<string> output_layers = str_util::Split(output_layer_string, ',');
-  if ((input_layers.size() != input_layer_shapes.size()) ||
-      (input_layers.size() != input_layer_types.size())) {
-    LOG(ERROR) << "There must be the same number of items in --input_layer,"
-               << " --input_layer_shape, and --input_layer_type, for example"
-               << " --input_layer=input1,input2 --input_layer_type=float,float "
-               << " --input_layer_shape=1,224,224,4:1,20";
-    LOG(ERROR) << "--input_layer=" << input_layer_string << " ("
-               << input_layers.size() << " items)";
-    LOG(ERROR) << "--input_layer_type=" << input_layer_type_string << " ("
-               << input_layer_types.size() << " items)";
-    LOG(ERROR) << "--input_layer_shape=" << input_layer_shape_string << " ("
-               << input_layer_shapes.size() << " items)";
-    return -1;
-  }
-  const size_t inputs_count = input_layers.size();
-
-  if (argc > 1) {
-    LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage;
-    return -1;
-  }
-
-  LOG(INFO) << "Model file: [" << model_file << "]";
-  LOG(INFO) << "Device: [" << device << "]";
-  LOG(INFO) << "Input layers: [" << input_layer_string << "]";
-  LOG(INFO) << "Input shapes: [" << input_layer_shape_string << "]";
-  LOG(INFO) << "Input types: [" << input_layer_type_string << "]";
-  LOG(INFO) << "Output layers: [" << output_layer_string << "]";
-  LOG(INFO) << "Num runs: [" << max_num_runs << "]";
-  LOG(INFO) << "Inter-inference delay (seconds): [" << inference_delay << "]";
-  LOG(INFO) << "Inter-benchmark delay (seconds): [" << inter_benchmark_delay
-            << "]";
-  LOG(INFO) << "Num threads: [" << num_threads << "]";
-  LOG(INFO) << "Benchmark name: [" << benchmark_name << "]";
-  LOG(INFO) << "Output prefix: [" << output_prefix << "]";
-  LOG(INFO) << "Show sizes: [" << show_sizes << "]";
-  LOG(INFO) << "Warmup runs: [" << warmup_runs << "]";
-
-  const long int inter_inference_sleep_seconds =
-      std::strtol(inference_delay.c_str(), nullptr, 10);
-  const long int inter_benchmark_sleep_seconds =
-      std::strtol(inter_benchmark_delay.c_str(), nullptr, 10);
-  const double max_benchmark_time_seconds =
-      std::strtod(max_time.c_str(), nullptr);
-
-  std::unique_ptr<StatSummarizer> stats;
-
-  StatSummarizerOptions stats_options;
-  stats_options.show_run_order = show_run_order;
-  stats_options.run_order_limit = run_order_limit;
-  stats_options.show_time = show_time;
-  stats_options.time_limit = time_limit;
-  stats_options.show_memory = show_memory;
-  stats_options.memory_limit = memory_limit;
-  stats_options.show_type = show_type;
-  stats_options.show_summary = show_summary;
-  stats.reset(new StatSummarizer(stats_options));
-
-  DeviceType device_type;
-  DeviceType_Parse(device, &device_type);
-  VLOG(0) << device_type;
-
-
-  // load model
-  std::ifstream model_file_stream(model_file, std::ios::in | std::ios::binary);
-  if (!model_file_stream.is_open()) {
-    LOG(ERROR) << "model file open failed";
-    return -1;
-  }
-  NetDef net_def;
-  net_def.ParseFromIstream(&model_file_stream);
-  model_file_stream.close();
-
-  Workspace ws;
-  ws.LoadModelTensor(net_def, device_type);
-  // Load inputs
-  for (size_t i = 0; i < inputs_count; ++i) {
-    Tensor *input_tensor =
-        ws.CreateTensor(input_layers[i], GetDeviceAllocator(device_type), DT_FLOAT);
-    vector<index_t> shapes;
-    str_util::SplitAndParseToInts(input_layer_shapes[i], ',', &shapes);
-    input_tensor->Resize(shapes);
-    {
-      Tensor::MappingGuard input_guard(input_tensor);
-      float *input_data = input_tensor->mutable_data<float>();
-
-      // load input
-      if (i < input_layer_files.size()) {
-        std::ifstream in_file(input_layer_files[i],
-                              std::ios::in | std::ios::binary);
-        in_file.read(reinterpret_cast<char *>(input_data),
-                     input_tensor->size() * sizeof(float));
-        in_file.close();
-      }
-    }
-  }
-  auto net = CreateNet(net_def, &ws, device_type, NetMode::INIT);
-  net->Run();
-
-  // create net
-  net = CreateNet(net_def, &ws, device_type);
-
-  int64_t warmup_time_us = 0;
-  int64_t num_warmup_runs = 0;
-  if (warmup_runs > 0) {
-    bool status =
-        Run(net.get(), nullptr, warmup_runs, -1.0,
-            inter_inference_sleep_seconds, &warmup_time_us, &num_warmup_runs);
-    if (!status) {
-      LOG(ERROR) << "Failed at warm up run";
-    }
-  }
-
-  if (inter_benchmark_sleep_seconds > 0) {
-    std::this_thread::sleep_for(
-        std::chrono::seconds(inter_benchmark_sleep_seconds));
-  }
-  int64_t no_stat_time_us = 0;
-  int64_t no_stat_runs = 0;
-  bool status =
-      Run(net.get(), nullptr, max_num_runs, max_benchmark_time_seconds,
-          inter_inference_sleep_seconds, &no_stat_time_us, &no_stat_runs);
-  if (!status) {
-    LOG(ERROR) << "Failed at normal no-stat run";
-  }
-
-  int64_t stat_time_us = 0;
-  int64_t stat_runs = 0;
-  status = Run(net.get(), stats.get(), max_num_runs, max_benchmark_time_seconds,
-               inter_inference_sleep_seconds, &stat_time_us, &stat_runs);
-  if (!status) {
-    LOG(ERROR) << "Failed at normal stat run";
-  }
-
-  LOG(INFO) << "Average inference timings in us: "
-            << "Warmup: "
-            << (warmup_runs > 0 ? warmup_time_us / warmup_runs : 0) << ", "
-            << "no stats: " << no_stat_time_us / no_stat_runs << ", "
-            << "with stats: " << stat_time_us / stat_runs;
-
-  stats->PrintOperatorStats();
-
-  return 0;
-}
-
-}  //  namespace benchmark
-}  //  namespace mace
-
-int main(int argc, char **argv) { mace::benchmark::Main(argc, argv); }
diff --git a/mace/tools/benchmark/stat_summarizer.cc b/mace/tools/benchmark/stat_summarizer.cc
deleted file mode 100644
index 0b0d8e5a..00000000
--- a/mace/tools/benchmark/stat_summarizer.cc
+++ /dev/null
@@ -1,316 +0,0 @@
-//
-// Copyright (c) 2017 XiaoMi All rights reserved.
-//
-
-#include "mace/tools/benchmark/stat_summarizer.h"
-#include "mace/core/common.h"
-#include "mace/core/operator.h"
-
-#include <iomanip>
-#include <queue>
-
-namespace mace {
-
-StatSummarizer::StatSummarizer(const StatSummarizerOptions &options)
-    : options_(options) {}
-
-StatSummarizer::~StatSummarizer() {}
-
-void StatSummarizer::Reset() {
-  run_total_us_.Reset();
-  memory_.Reset();
-  details_.clear();
-}
-
-void StatSummarizer::ProcessMetadata(const RunMetadata &run_metadata) {
-  int64_t curr_total_us = 0;
-  int64_t mem_total = 0;
-
-  MACE_CHECK(!run_metadata.op_stats.empty());
-  int64_t first_node_start_us = run_metadata.op_stats[0].stats.start_micros;
-
-  int node_num = 0;
-  for (const auto &ops : run_metadata.op_stats) {
-    std::string name = ops.operator_name;
-    std::string op_type = ops.type;
-
-    ++node_num;
-    const int64_t curr_time = ops.stats.end_micros - ops.stats.start_micros;
-    curr_total_us += curr_time;
-    auto result = details_.emplace(name, Detail());
-    Detail *detail = &(result.first->second);
-
-    detail->start_us.UpdateStat(ops.stats.start_micros - first_node_start_us);
-    detail->rel_end_us.UpdateStat(curr_time);
-
-    // If this is the first pass, initialize some values.
-    if (result.second) {
-      detail->name = name;
-      detail->type = op_type;
-
-      detail->run_order = node_num;
-
-      detail->times_called = 0;
-    }
-
-    ++detail->times_called;
-  }
-
-  run_total_us_.UpdateStat(curr_total_us);
-  memory_.UpdateStat(mem_total);
-}
-
-std::string StatSummarizer::ShortSummary() const {
-  std::stringstream stream;
-  stream << "Timings (microseconds): ";
-  run_total_us_.OutputToStream(&stream);
-  stream << std::endl;
-
-  stream << "Memory (bytes): ";
-  memory_.OutputToStream(&stream);
-  stream << std::endl;
-
-  stream << details_.size() << " nodes observed" << std::endl;
-  return stream.str();
-}
-
-std::ostream &InitField(std::ostream &stream, int width) {
-  stream << "\t" << std::right << std::setw(width) << std::fixed
-         << std::setprecision(3);
-  return stream;
-}
-
-std::string StatSummarizer::HeaderString(const std::string &title) const {
-  std::stringstream stream;
-
-  stream << "============================== " << title
-         << " ==============================" << std::endl;
-
-  InitField(stream, 24) << "[node type]";
-  InitField(stream, 9) << "[start]";
-  InitField(stream, 9) << "[first]";
-  InitField(stream, 9) << "[avg ms]";
-  InitField(stream, 8) << "[%]";
-  InitField(stream, 8) << "[cdf%]";
-  InitField(stream, 10) << "[mem KB]";
-  InitField(stream, 9) << "[times called]";
-  stream << "\t"
-         << "[Name]";
-  return stream.str();
-}
-
-std::string StatSummarizer::ColumnString(const StatSummarizer::Detail &detail,
-                                         const int64_t cumulative_stat_on_node,
-                                         const Stat<int64_t> &stat) const {
-  const double start_ms = detail.start_us.avg() / 1000.0;
-  const double first_time_ms = detail.rel_end_us.first() / 1000.0;
-  const double avg_time_ms = detail.rel_end_us.avg() / 1000.0;
-  const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum();
-  const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum();
-  const int64_t times_called = detail.times_called / num_runs();
-
-  std::stringstream stream;
-  InitField(stream, 24) << detail.type;
-  InitField(stream, 9) << start_ms;
-  InitField(stream, 9) << first_time_ms;
-  InitField(stream, 9) << avg_time_ms;
-  InitField(stream, 7) << percentage << "%";
-  InitField(stream, 7) << cdf_percentage << "%";
-  InitField(stream, 10) << detail.mem_used.newest() / 1000.0;
-  InitField(stream, 9) << times_called;
-  stream << "\t" << detail.name;
-
-  return stream.str();
-}
-
-void StatSummarizer::OrderNodesByMetric(
-    SortingMetric metric, std::vector<const Detail *> *details) const {
-  std::priority_queue<std::pair<std::string, const Detail *>> sorted_list;
-  const int num_nodes = details_.size();
-
-  for (const auto &det : details_) {
-    const Detail *detail = &(det.second);
-    std::stringstream stream;
-    stream << std::setw(20) << std::right << std::setprecision(10)
-           << std::fixed;
-
-    switch (metric) {
-      case BY_NAME:
-        stream << detail->name;
-        break;
-      case BY_RUN_ORDER:
-        stream << num_nodes - detail->run_order;
-        break;
-      case BY_TIME:
-        stream << detail->rel_end_us.avg();
-        break;
-      case BY_MEMORY:
-        stream << detail->mem_used.avg();
-        break;
-      case BY_TYPE:
-        stream << detail->type;
-        break;
-      default:
-        stream << "";
-        break;
-    }
-
-    sorted_list.emplace(stream.str(), detail);
-  }
-
-  while (!sorted_list.empty()) {
-    auto entry = sorted_list.top();
-    sorted_list.pop();
-    details->push_back(entry.second);
-  }
-}
-
-void StatSummarizer::ComputeStatsByType(
-    std::map<std::string, int64_t> *node_type_map_count,
-    std::map<std::string, int64_t> *node_type_map_time,
-    std::map<std::string, int64_t> *node_type_map_memory,
-    std::map<std::string, int64_t> *node_type_map_times_called,
-    int64_t *accumulated_us) const {
-  int64_t run_count = run_total_us_.count();
-
-  for (const auto &det : details_) {
-    const std::string node_name = det.first;
-    const Detail &detail = det.second;
-
-    int64_t curr_time_val =
-        static_cast<int64_t>(detail.rel_end_us.sum() / run_count);
-    *accumulated_us += curr_time_val;
-
-    int64_t curr_memory_val = detail.mem_used.newest();
-
-    const std::string &node_type = detail.type;
-
-    (*node_type_map_count)[node_type] += 1;
-    (*node_type_map_time)[node_type] += curr_time_val;
-    (*node_type_map_memory)[node_type] += curr_memory_val;
-    (*node_type_map_times_called)[node_type] += detail.times_called / run_count;
-  }
-}
-
-std::string StatSummarizer::GetStatsByNodeType() const {
-  std::stringstream stream;
-
-  stream << "============================== Summary by node type "
-            "=============================="
-         << std::endl;
-
-  LOG(INFO) << "Number of nodes executed: " << details_.size();
-
-  std::map<std::string, int64_t> node_type_map_count;
-  std::map<std::string, int64_t> node_type_map_time;
-  std::map<std::string, int64_t> node_type_map_memory;
-  std::map<std::string, int64_t> node_type_map_times_called;
-  int64_t accumulated_us = 0;
-
-  ComputeStatsByType(&node_type_map_count, &node_type_map_time,
-                     &node_type_map_memory, &node_type_map_times_called,
-                     &accumulated_us);
-
-  // Sort them.
-  std::priority_queue<std::pair<int64_t, std::pair<std::string, int64_t>>>
-      timings;
-  for (const auto &node_type : node_type_map_time) {
-    const int64_t mem_used = node_type_map_memory[node_type.first];
-    timings.emplace(node_type.second,
-                    std::pair<std::string, int64_t>(node_type.first, mem_used));
-  }
-
-  InitField(stream, 24) << "[Node type]";
-  InitField(stream, 9) << "[count]";
-  InitField(stream, 10) << "[avg ms]";
-  InitField(stream, 11) << "[avg %]";
-  InitField(stream, 11) << "[cdf %]";
-  InitField(stream, 10) << "[mem KB]";
-  InitField(stream, 10) << "[times called]";
-  stream << std::endl;
-
-  float cdf = 0.0f;
-  while (!timings.empty()) {
-    auto entry = timings.top();
-    timings.pop();
-
-    const std::string node_type = entry.second.first;
-    const float memory = entry.second.second / 1000.0f;
-
-    const int64_t node_type_total_us = entry.first;
-    const float time_per_run_ms = node_type_total_us / 1000.0f;
-
-    const float percentage =
-        ((entry.first / static_cast<float>(accumulated_us)) * 100.0f);
-    cdf += percentage;
-
-    InitField(stream, 24) << node_type;
-    InitField(stream, 9) << node_type_map_count[node_type];
-    InitField(stream, 10) << time_per_run_ms;
-    InitField(stream, 10) << percentage << "%";
-    InitField(stream, 10) << cdf << "%";
-    InitField(stream, 10) << memory;
-    InitField(stream, 9) << node_type_map_times_called[node_type];
-    stream << std::endl;
-  }
-  stream << std::endl;
-  return stream.str();
-}
-
-std::string StatSummarizer::GetStatsByMetric(const std::string &title,
-                                             SortingMetric sorting_metric,
-                                             int num_stats) const {
-  std::vector<const Detail *> details;
-  OrderNodesByMetric(sorting_metric, &details);
-
-  double cumulative_stat_on_node = 0;
-
-  std::stringstream stream;
-  stream << HeaderString(title) << std::endl;
-  int stat_num = 0;
-  for (auto detail : details) {
-    ++stat_num;
-    if (num_stats > 0 && stat_num > num_stats) {
-      break;
-    }
-
-    cumulative_stat_on_node += detail->rel_end_us.sum();
-    stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_)
-           << std::endl;
-  }
-  stream << std::endl;
-  return stream.str();
-}
-
-std::string StatSummarizer::GetOutputString() const {
-  std::stringstream stream;
-  if (options_.show_run_order) {
-    stream << GetStatsByMetric("Run Order", BY_RUN_ORDER,
-                               options_.run_order_limit);
-  }
-  if (options_.show_time) {
-    stream << GetStatsByMetric("Top by Computation Time", BY_TIME,
-                               options_.time_limit);
-  }
-  if (options_.show_memory) {
-    stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY,
-                               options_.memory_limit);
-  }
-  if (options_.show_type) {
-    stream << GetStatsByNodeType();
-  }
-  if (options_.show_summary) {
-    stream << ShortSummary() << std::endl;
-  }
-  return stream.str();
-}
-
-void StatSummarizer::PrintOperatorStats() const {
-  std::string output = GetOutputString();
-  std::istringstream iss(output);
-  for (std::string line; std::getline(iss, line);) {
-    LOG(INFO) << line;
-  }
-}
-
-}  // namespace mace
diff --git a/mace/tools/benchmark/stat_summarizer.h b/mace/tools/benchmark/stat_summarizer.h
deleted file mode 100644
index 368b1e30..00000000
--- a/mace/tools/benchmark/stat_summarizer.h
+++ /dev/null
@@ -1,199 +0,0 @@
-//
-// Copyright (c) 2017 XiaoMi All rights reserved.
-//
-
-#ifndef MACE_TOOLS_BENCHMARK_STAT_SUMMARIZER_H_
-#define MACE_TOOLS_BENCHMARK_STAT_SUMMARIZER_H_
-
-#include <stdlib.h>
-#include <cmath>
-#include <limits>
-#include <map>
-#include <sstream>
-#include <string>
-#include <vector>
-
-namespace mace {
-
-class RunMetadata;
-
-template <typename ValueType, typename HighPrecisionValueType = double>
-class Stat {
- public:
-  void UpdateStat(ValueType v) {
-    if (count_ == 0) {
-      first_ = v;
-    }
-
-    newest_ = v;
-    max_ = std::max(v, max_);
-    min_ = std::min(v, min_);
-    ++count_;
-    sum_ += v;
-    squared_sum_ += static_cast<HighPrecisionValueType>(v) * v;
-  }
-
-  void Reset() { new (this) Stat<ValueType, HighPrecisionValueType>(); }
-
-  bool empty() const { return count_ == 0; }
-
-  ValueType first() const { return first_; }
-
-  ValueType newest() const { return newest_; }
-
-  ValueType max() const { return max_; }
-
-  ValueType min() const { return min_; }
-
-  int64_t count() const { return count_; }
-
-  ValueType sum() const { return sum_; }
-
-  HighPrecisionValueType squared_sum() const { return squared_sum_; }
-
-  bool all_same() const { return (count_ == 0 || min_ == max_); }
-
-  HighPrecisionValueType avg() const {
-    return empty() ? std::numeric_limits<ValueType>::quiet_NaN()
-                   : static_cast<HighPrecisionValueType>(sum_) / count_;
-  }
-
-  ValueType std_deviation() const {
-    return all_same() ? 0 : std::sqrt(squared_sum_ / count_ - avg() * avg());
-  }
-
-  void OutputToStream(std::ostream *stream) const {
-    if (empty()) {
-      *stream << "count=0";
-    } else if (all_same()) {
-      *stream << "count=" << count_ << " curr=" << newest_;
-      if (count_ > 1) *stream << "(all same)";
-    } else {
-      *stream << "count=" << count_ << " first=" << first_
-              << " curr=" << newest_ << " min=" << min_ << " max=" << max_
-              << " avg=" << avg() << " std=" << std_deviation();
-    }
-  }
-
-  friend std::ostream &operator<<(std::ostream &stream,
-                                  const Stat<ValueType> &stat) {
-    stat.OutputToStream(&stream);
-    return stream;
-  }
-
- private:
-  ValueType first_ = 0;
-  ValueType newest_ = 0;
-  ValueType max_ = std::numeric_limits<ValueType>::min();
-  ValueType min_ = std::numeric_limits<ValueType>::max();
-  int64_t count_ = 0;
-  ValueType sum_ = 0;
-  HighPrecisionValueType squared_sum_ = 0;
-};
-
-// Used to control the output of the statistics summarizer;
-class StatSummarizerOptions {
- public:
-  StatSummarizerOptions()
-      : show_run_order(true),
-        run_order_limit(0),
-        show_time(true),
-        time_limit(10),
-        show_memory(true),
-        memory_limit(10),
-        show_type(true),
-        show_summary(true) {}
-
-  bool show_run_order;
-  int run_order_limit;
-  bool show_time;
-  int time_limit;
-  bool show_memory;
-  int memory_limit;
-  bool show_type;
-  bool show_summary;
-};
-
-// A StatSummarizer assists in performance analysis of Graph executions.
-//
-// It summarizes time spent executing (on GPU/CPU), memory used etc. across
-// multiple executions of a single Graph from the StepStats collected during
-// graph execution.
-//
-// See tensorflow/tools/benchmark/benchmark_model.cc for an example usage.
-class StatSummarizer {
- public:
-  enum SortingMetric {
-    BY_NAME,
-    BY_RUN_ORDER,
-    BY_TIME,
-    BY_MEMORY,
-    BY_TYPE,
-  };
-
-  explicit StatSummarizer(const StatSummarizerOptions &options);
-
-  ~StatSummarizer();
-
-  // Adds another run's StepStats output to the aggregate counts.
-  void ProcessMetadata(const RunMetadata &run_metadata);
-
-  // Returns a string detailing the accumulated runtime stats in a tab-separated
-  // format which can be pasted into a spreadsheet for further analysis.
-  std::string GetOutputString() const;
-
-  std::string ShortSummary() const;
-
-  // Prints the string returned by GetOutputString().
-  void PrintOperatorStats() const;
-
-  void ComputeStatsByType(
-      std::map<std::string, int64_t> *node_type_map_count,
-      std::map<std::string, int64_t> *node_type_map_time,
-      std::map<std::string, int64_t> *node_type_map_memory,
-      std::map<std::string, int64_t> *node_type_map_times_called,
-      int64_t *accumulated_us) const;
-
-  std::string GetStatsByNodeType() const;
-
-  std::string GetStatsByMetric(const std::string &title,
-                               SortingMetric sorting_metric,
-                               int num_stats) const;
-
-  void Reset();
-
-  // Returns number of runs.
-  int num_runs() const { return run_total_us_.count(); }
-
-  // Returns stats of total microseconds spent by all nodes in each run.
-  const Stat<int64_t> &run_total_us() const { return run_total_us_; }
-
- private:
-  struct Detail {
-    std::string name;
-    std::string type;
-    int64_t run_order;
-    Stat<int64_t> start_us;
-    Stat<int64_t> rel_end_us;
-    Stat<int64_t> mem_used;
-    int64_t times_called;
-  };
-
-  void OrderNodesByMetric(SortingMetric sorting_metric,
-                          std::vector<const Detail *> *details) const;
-
-  std::string HeaderString(const std::string &title) const;
-  std::string ColumnString(const Detail &detail,
-                           const int64_t cumulative_stat_on_node,
-                           const Stat<int64_t> &stat) const;
-
-  Stat<int64_t> run_total_us_;
-  Stat<int64_t> memory_;
-
-  std::map<std::string, Detail> details_;
-  StatSummarizerOptions options_;
-};
-
-}  //  namespace mace
-
-#endif  // MACE_TOOLS_BENCHMARK_STAT_SUMMARIZER_H_
-- 
GitLab