Generic benchmarking. (#2125)

BUG=https://b.corp.google.com/277097397

Generic benchmarking. (#2125)
BUG=https://b.corp.google.com/277097397
6da07de9 · TANMAY DAS · GitHub · c3fddae7 · 6da07de9 · 6da07de9
10 changed file
--- a/tensorflow/lite/micro/micro_profiler.cc
+++ b/tensorflow/lite/micro/micro_profiler.cc
@@ -65,8 +65,13 @@ void MicroProfiler::LogCsv() const {
 #if !defined(TF_LITE_STRIP_ERROR_STRINGS)
  MicroPrintf("\"Event\",\"Tag\",\"Ticks\"");
  for (int i = 0; i < num_events_; ++i) {
+#if defined(HEXAGON) || defined(CMSIS_NN)
+    int ticks = end_ticks_[i] - start_ticks_[i];
+    MicroPrintf("%d,%s,%d", i, tags_[i], ticks);
+#else
    uint32_t ticks = end_ticks_[i] - start_ticks_[i];
    MicroPrintf("%d,%s,%" PRIu32, i, tags_[i], ticks);
+#endif
  }
 #endif
 }

--- a/tensorflow/lite/micro/tools/benchmarking/BUILD
+++ b/tensorflow/lite/micro/tools/benchmarking/BUILD
+cc_library(
+    name = "op_resolver",
+    hdrs = ["op_resolver.h"],
+    deps = ["//tensorflow/lite/micro:op_resolvers"],
+)
+
+cc_library(
+    name = "metrics",
+    srcs = ["metrics.cc"],
+    hdrs = ["metrics.h"],
+    deps = [
+        ":log_utils",
+        "//tensorflow/lite/micro:micro_profiler",
+        "//tensorflow/lite/micro:recording_allocators",
+        "//tensorflow/lite/micro/arena_allocator:recording_simple_memory_allocator",
+    ],
+)
+
+cc_library(
+    name = "log_utils",
+    srcs = ["log_utils.cc"],
+    hdrs = ["log_utils.h"],
+    deps = [
+        "//tensorflow/lite/micro:micro_log",
+    ],
+)
+
+cc_library(
+    name = "generic_benchmark_lib",
+    srcs = ["generic_model_benchmark.cc"],
+    deps = [
+        ":log_utils",
+        ":metrics",
+        ":op_resolver",
+        "//tensorflow/lite/c:c_api_types",
+        "//tensorflow/lite/c:common",
+        "//tensorflow/lite/micro:micro_log",
+        "//tensorflow/lite/micro:micro_profiler",
+        "//tensorflow/lite/micro:op_resolvers",
+        "//tensorflow/lite/micro:recording_allocators",
+        "//tensorflow/lite/micro:system_setup",
+        "//tensorflow/lite/schema:schema_fbs",
+    ],
+)
+
+cc_binary(
+    name = "tflm_benchmark",
+    deps = [":generic_benchmark_lib"],
+)
--- a/tensorflow/lite/micro/tools/benchmarking/Makefile.inc
+++ b/tensorflow/lite/micro/tools/benchmarking/Makefile.inc
+MICROLITE_BENCHMARK_ROOT_DIR := $(TENSORFLOW_ROOT)tensorflow/lite/micro/tools/benchmarking
+
+GENERIC_BENCHMARK_SRCS := \
+$(MICROLITE_BENCHMARK_ROOT_DIR)/generic_model_benchmark.cc \
+$(MICROLITE_BENCHMARK_ROOT_DIR)/log_utils.cc \
+$(MICROLITE_BENCHMARK_ROOT_DIR)/metrics.cc
+
+GENERIC_BENCHMARK_HDRS := \
+$(MICROLITE_BENCHMARK_ROOT_DIR)/op_resolver.h \
+$(MICROLITE_BENCHMARK_ROOT_DIR)/log_utils.h \
+$(MICROLITE_BENCHMARK_ROOT_DIR)/metrics.h
+
+ifneq ($(TARGET),bluepill)
+ifneq ($(TARGET_ARCH), $(filter $(TARGET_ARCH), hifi5 hifimini))
+    $(eval $(call microlite_test,tflm_benchmark,\
+    $(GENERIC_BENCHMARK_SRCS),$(GENERIC_BENCHMARK_HDRS),))
+endif
+endif
\ No newline at end of file
--- a/tensorflow/lite/micro/tools/benchmarking/generic_model_benchmark.cc
+++ b/tensorflow/lite/micro/tools/benchmarking/generic_model_benchmark.cc
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <memory>
+#include <random>
+
+#include "tensorflow/lite/c/c_api_types.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/micro/micro_log.h"
+#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
+#include "tensorflow/lite/micro/micro_op_resolver.h"
+#include "tensorflow/lite/micro/micro_profiler.h"
+#include "tensorflow/lite/micro/recording_micro_allocator.h"
+#include "tensorflow/lite/micro/recording_micro_interpreter.h"
+#include "tensorflow/lite/micro/system_setup.h"
+#include "tensorflow/lite/micro/tools/benchmarking/log_utils.h"
+#include "tensorflow/lite/micro/tools/benchmarking/metrics.h"
+#include "tensorflow/lite/micro/tools/benchmarking/op_resolver.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+
+/*
+ * Generic model benchmark.  Evaluates runtime performance of a provided model
+ * with random inputs.
+ */
+
+namespace tflite {
+
+namespace {
+
+using Profiler = ::tflite::MicroProfiler;
+
+using TflmOpResolver = tflite::MicroMutableOpResolver<96>;
+
+constexpr int kTfLiteAbort = -9;
+
+// Seed used for the random input. Input data shouldn't affect invocation timing
+// so randomness isn't really needed.
+constexpr uint32_t kRandomSeed = 0xFB;
+
+// Which format should be used to output debug information.
+constexpr PrettyPrintType kPrintType = PrettyPrintType::kTable;
+
+constexpr size_t kTensorArenaSize = 1024 * 1024;
+constexpr int kNumResourceVariable = 100;
+constexpr size_t kModelSize = 511408;
+
+void SetRandomInput(const uint32_t random_seed,
+                    tflite::MicroInterpreter& interpreter) {
+  std::mt19937 eng(random_seed);
+  std::uniform_int_distribution<uint32_t> dist(0, 255);
+
+  for (size_t i = 0; i < interpreter.inputs_size(); ++i) {
+    TfLiteTensor* input = interpreter.input_tensor(i);
+
+    // Pre-populate input tensor with random values.
+    int8_t* input_values = tflite::GetTensorData<int8_t>(input);
+    for (size_t j = 0; j < input->bytes; ++j) {
+      input_values[j] = dist(eng);
+    }
+  }
+}
+
+bool ReadFile(const char* file_name, void* buffer, size_t buffer_size) {
+  // Obtain the file size using fstat, or report an error if that fails.
+  std::unique_ptr<FILE, decltype(&fclose)> file(fopen(file_name, "rb"), fclose);
+  struct stat sb;
+
+// For CMSIS_NN, the compilation is failing with the following error
+// 'fileno' was not declared in this scope
+// TODO(b/290988791): Investigate why fileno is not defined in arm toolchain.
+#if defined(CMSIS_NN)
+  if (fstat(file.get()->_file, &sb) != 0) {
+#else
+  if (fstat(fileno(file.get()), &sb) != 0) {
+#endif
+    MicroPrintf("Failed to get file size of: %s\n", file_name);
+    return false;
+  }
+
+  if (!buffer) {
+    MicroPrintf("Malloc of buffer to hold copy of '%s' failed\n", file_name);
+    return false;
+  }
+
+  if (S_ISREG(sb.st_mode) != 0 &&
+      (sb.st_size < 0 || (static_cast<size_t>(sb.st_size) > buffer_size))) {
+    MicroPrintf(
+        "Buffer size (%ld) to hold the model is less than required %ld.\n",
+        buffer_size, sb.st_size);
+    return false;
+  }
+
+  if (!fread(buffer, sizeof(char), sb.st_size, file.get())) {
+    MicroPrintf("Unable to read the model file or the model file is empty.\n");
+    return false;
+  }
+
+  return true;
+}
+
+int Benchmark(const char* model_file_name) {
+  Profiler profiler;
+  alignas(16) static uint8_t tensor_arena[kTensorArenaSize];
+  alignas(16) unsigned char model_file_content[kModelSize];
+
+  if (!ReadFile(model_file_name, model_file_content, kModelSize)) {
+    return -1;
+  }
+  uint32_t event_handle = profiler.BeginEvent("TfliteGetModel");
+  const tflite::Model* model = tflite::GetModel(model_file_content);
+  profiler.EndEvent(event_handle);
+
+  TflmOpResolver op_resolver;
+  TF_LITE_ENSURE_STATUS(CreateOpResolver(op_resolver));
+
+  tflite::RecordingMicroAllocator* allocator(
+      tflite::RecordingMicroAllocator::Create(tensor_arena, kTensorArenaSize));
+  tflite::RecordingMicroInterpreter interpreter(
+      model, op_resolver, allocator,
+      tflite::MicroResourceVariables::Create(allocator, kNumResourceVariable),
+      &profiler);
+  TF_LITE_ENSURE_STATUS(interpreter.AllocateTensors());
+
+  profiler.Log();
+  profiler.ClearEvents();
+
+  MicroPrintf("");  // null MicroPrintf serves as a newline.
+
+  // For streaming models, the interpreter will return kTfLiteAbort if the model
+  // does not yet have enough data to make an inference. As such, we need to
+  // invoke the interpreter multiple times until we either receive an error or
+  // kTfLiteOk. This loop also works for non-streaming models, as they'll just
+  // return kTfLiteOk after the first invocation.
+  uint32_t seed = kRandomSeed;
+  while (true) {
+    SetRandomInput(seed++, interpreter);
+    TfLiteStatus status = interpreter.Invoke();
+    if ((status != kTfLiteOk) && (static_cast<int>(status) != kTfLiteAbort)) {
+      MicroPrintf("Model interpreter invocation failed: %d\n", status);
+      return -1;
+    }
+
+    profiler.Log();
+    MicroPrintf("");  // null MicroPrintf serves as a newline.
+    profiler.LogTicksPerTagCsv();
+    MicroPrintf("");  // null MicroPrintf serves as a newline.
+    profiler.ClearEvents();
+
+    if (status == kTfLiteOk) {
+      break;
+    }
+  }
+
+  LogAllocatorEvents(*allocator, kPrintType);
+
+  return 0;
+}
+}  // namespace
+}  // namespace tflite
+
+int main(int argc, char** argv) { return tflite::Benchmark(argv[1]); }
\ No newline at end of file
--- a/tensorflow/lite/micro/tools/benchmarking/log_utils.cc
+++ b/tensorflow/lite/micro/tools/benchmarking/log_utils.cc
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/micro/tools/benchmarking/log_utils.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+namespace tflite {
+
+int GetLongestStringLength(const char strings[][kMaxStringLength],
+                           const int count) {
+  int max_length = 0;
+
+  for (int i = 0; i < count; ++i) {
+    int size = strlen(strings[i]);
+    if (size > max_length) {
+      max_length = size;
+    }
+  }
+
+  return max_length;
+}
+
+void FillColumnPadding(char* string, const int size, const int max_size,
+                       const int padding) {
+  FillString(string, max_size - size + padding, kMaxStringLength);
+}
+
+void FillString(char* string, const int size, const int buffer_size,
+                const char value) {
+  if (buffer_size <= (static_cast<int>(strlen(string)))) {
+    for (int i = 0; i < buffer_size; ++i) {
+      string[i] = (i < size) ? value : 0;
+    }
+  }
+}
+
+void MicroStrcat(char* output, const char* input, const int size) {
+  if (size < 0) {
+    strcat(output, input);  // NOLINT: strcat required due to no dynamic memory.
+  } else {
+    strncat(output, input, size);
+  }
+}
+
+void MicroStrcpy(char* output, const char* input) {
+  strcpy(output, input);  // NOLINT: strcpy required due to no dynamic memory.
+}
+
+void FormatIntegerDivide(char* output, const int64_t numerator,
+                         const int64_t denominator, const int decimal_places) {
+  int64_t multiplier = 1;
+  for (int i = 0; i < decimal_places; ++i) {
+    multiplier *= 10;
+  }
+
+  const int64_t total = numerator * multiplier / denominator;
+  const int whole = static_cast<int>(total / multiplier);
+  const int fractional = static_cast<int>(total % multiplier);
+  sprintf(output, "%d.%d", whole, fractional);  // NOLINT: sprintf is required.
+}
+
+void FormatAsPercentage(char* output, const int64_t numerator,
+                        const int64_t denominator, const int decimal_places) {
+  FormatIntegerDivide(output, numerator * 100, denominator, decimal_places);
+}
+
+void PrettyPrintTableHeader(PrettyPrintType type, const char* table_name) {
+  switch (type) {
+    case PrettyPrintType::kCsv:
+      MicroPrintf("[[ CSV ]]: %s", table_name);
+      break;
+    case PrettyPrintType::kTable:
+      MicroPrintf("[[ TABLE ]]: %s", table_name);
+  }
+}
+
+template <>
+void FormatNumber<int32_t>(char* output, int32_t value) {
+#if defined(HEXAGON) || defined(CMSIS_NN)
+  sprintf(output, "%ld", value);  // NOLINT: sprintf required.
+#else
+  sprintf(output, "%d", value);  // NOLINT: sprintf required.
+#endif
+}
+
+template <>
+void FormatNumber<size_t>(char* output, size_t value) {
+  sprintf(output, "%zu", value);  // NOLINT: sprintf required.
+}
+
+template <>
+void FormatNumber<float>(char* output, float value) {
+  constexpr int64_t kDenominator = 1000;
+  FormatIntegerDivide(output, static_cast<int64_t>(value * kDenominator),
+                      kDenominator, 3);
+}
+
+template <>
+void FormatNumber<double>(char* output, double value) {
+  constexpr int64_t kDenominator = 1000;
+  FormatIntegerDivide(output, static_cast<int64_t>(value * kDenominator),
+                      kDenominator, 3);
+}
+}  // namespace tflite
--- a/tensorflow/lite/micro/tools/benchmarking/log_utils.h
+++ b/tensorflow/lite/micro/tools/benchmarking/log_utils.h
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TFLM_BENCHMARK_INTERNAL_LOG_UTILS_H_
+#define TFLM_BENCHMARK_INTERNAL_LOG_UTILS_H_
+
+#include <algorithm>
+#include <cstdarg>
+#include <cstdint>
+#include <cstring>
+
+#include "tensorflow/lite/micro/micro_log.h"
+
+namespace tflite {
+
+// The maxmimum length of a string.
+static constexpr int kMaxStringLength = 32;
+
+// The maximum length of a table row, applies to the header as well.
+static constexpr int kMaxRowLength = 100;
+
+// The default padding between columns in a table.
+static constexpr int kDefaultColumnPadding = 4;
+
+// Defines how formatted data is printed to stdout.
+enum class PrettyPrintType {
+  // Prints as a CSV file.
+  kCsv,
+  // Prints as a formatted table.
+  kTable,
+};
+
+// Returns the length of the longest string in an array.
+// Args:
+// - strings: An array of strings.
+// - count: The number of strings in the array.
+int GetLongestStringLength(const char strings[][kMaxStringLength], int count);
+
+// Adds padding between two columns in a table.
+// ex) "hello" is being inserted into a column. The largest value in that column
+//     is 10, and there's a global padding of 4 spaces. Therefore, 9 spaces (10
+//     - 5 + 4) are added as padding.
+// Args:
+// - string: The input padding string.
+// - size: The size of the string that's being inserted into a column.
+// - max_size: The size of the largest string in the column.
+// - padding: The amount of padding to add to each column regardless of its
+//     size.
+void FillColumnPadding(char* string, int size, int max_size,
+                       int padding = kDefaultColumnPadding);
+
+// Fills a string with a specified value.
+// Args:
+// - string: The input string. This is filled in with the specified value.
+// - size: The size of the string after being filled in. This must be less than
+//     the allocated space for the string.
+// - buffer_size: The size of the string's buffer.
+// - value: The value to insert into the string. Defaults to a space.
+void FillString(char* string, int size, int buffer_size, char value = ' ');
+
+// Concatenates the input string onto the first.
+// Args:
+// - output: The destination string for where to append input.
+// - input: The input string to concatenate.
+// - size: The number of characters to concatenate from the first string. If
+//     negative, the whole input string will be concatenated.
+void MicroStrcat(char* output, const char* input, int size = -1);
+
+// Copies the input string into the output.
+void MicroStrcpy(char* output, const char* input);
+
+// Formats a division operation to have a specified number of decimal places.
+// Args:
+// - output: The output string to be formatted.
+// - numerator: The numerator in the division operation.
+// - denominator: The denominator in the division operation.
+// - decimal places: The number of decimal places to print to.
+void FormatIntegerDivide(char* output, int64_t numerator, int64_t denominator,
+                         int decimal_places);
+
+// Formats a division operation as a percentage.
+// Args:
+// - output: The output string to be formatted.
+// - numerator: The numerator in the division operation.
+// - denominator: The denominator in the division operation.
+// - decimal places: The number of decimal places to print to.
+void FormatAsPercentage(char* output, int64_t numerator, int64_t denominator,
+                        int decimal_places);
+
+void PrettyPrintTableHeader(PrettyPrintType type, const char* table_name);
+
+// Formats a number as a string.
+// Args:
+// - output: The location of where to write the formatted number.
+// - value: The value to write to a string.
+template <typename T>
+void FormatNumber(char* output, T value);
+
+// Pretty prints a table to stdout.
+// Note: kMaxRows and kColumns should describe the allocated size of the table,
+//       not the amount of data that is populated. It is required that all
+//       columns are filled out, but not all rows.
+//
+// ex) PrintTable<3, 25>(headers, data, 4);
+//     This will print a table with 3 columns and 4 rows. In this example, it
+//     is required that data is defined as char[3][25][kMaxStringLength] to
+//     properly print.
+//
+// op        cycles    cpu %
+// -------------------------
+// foo     | 1000     | 10
+// bar     | 2500     | 25
+// baz     | 1000     | 10
+// lorem   | 2000     | 20
+//
+// Args:
+// - headers: A 1D array of strings containing the headers of the table. This
+//     must be equal in size to kColumns.
+// - data: A 2D array of string data organized in [columns, rows]. As stated
+//     above, it is required that all columns are populated, but not all rows.
+// - rows: The number of populated rows in `data`.
+template <int kMaxRows, int kColumns>
+void PrintTable(const char headers[kColumns][kMaxStringLength],
+                const char data[kColumns][kMaxRows][kMaxStringLength],
+                const int rows) {
+  // Get the maximum width for each column in the table.
+  int max_column_width[kColumns];
+  for (int i = 0; i < kColumns; ++i) {
+    max_column_width[i] = std::max(GetLongestStringLength(data[i], rows),
+                                   static_cast<int>(strlen(headers[i])));
+  }
+
+  // Add padding between each item in the header so it can be printed on one
+  // line.
+  char header_spaces[kColumns][kMaxStringLength];
+  for (int i = 0; i < kColumns; ++i) {
+    FillColumnPadding(header_spaces[i], strlen(headers[i]), max_column_width[i],
+                      kDefaultColumnPadding + 2);
+  }
+
+  // Print the header.
+  char header[kMaxRowLength];
+  memset(header, 0, kMaxRowLength);
+  for (int i = 0; i < kColumns; ++i) {
+    MicroStrcat(header, headers[i]);
+    MicroStrcat(header, header_spaces[i]);
+  }
+  MicroPrintf("%s", header);
+
+  // Print a separator to separate the header from the data.
+  char separator[kMaxRowLength];
+  FillString(separator, strlen(header) - 1, kMaxRowLength, '-');
+  MicroPrintf("%s", separator);
+
+  for (int i = 0; i < rows; ++i) {
+    char spaces[kColumns][kMaxStringLength];
+    for (int j = 0; j < kColumns; ++j) {
+      FillColumnPadding(spaces[j], strlen(data[j][i]), max_column_width[j]);
+    }
+
+    char row[kMaxRowLength];
+    memset(row, 0, kMaxRowLength);
+
+    // Concatenate each column in a row with the format "[data][padding]| "
+    for (int j = 0; j < kColumns; ++j) {
+      MicroStrcat(row, data[j][i]);
+      MicroStrcat(row, spaces[j]);
+      MicroStrcat(row, "| ");
+    }
+
+    MicroPrintf("%s", row);
+  }
+
+  MicroPrintf(separator);
+  MicroPrintf("");
+}
+
+// Pretty prints a csv to stdout.
+// Note: kMaxRows and kColumns should describe the allocated size of the table,
+//       not the amount of data that is populated. It is required that all
+//       columns are filled out, but not all rows.
+//
+// ex)
+// op,cycles,%cpu
+// foo,1000,10
+// bar,2500,25
+// baz,1000,10
+//
+// Args:
+// - headers: A 1D array of strings containing the headers of the table. This
+//     must be equal in size to kColumns.
+// - data: A 2D array of string data organized in [columns, rows]. As stated
+//     above, it is required that all columns are populated, but not all rows.
+// - rows: The number of populated rows in `data`.
+template <int kMaxRows, int kColumns>
+void PrintCsv(const char headers[kColumns][kMaxStringLength],
+              const char data[kColumns][kMaxRows][kMaxStringLength],
+              const int rows) {
+  char header[kMaxRowLength];
+  memset(header, 0, kMaxRowLength);
+  for (int i = 0; i < kColumns; ++i) {
+    MicroStrcat(header, headers[i]);
+    if (i < kColumns - 1) {
+      MicroStrcat(header, ",");
+    }
+  }
+
+  MicroPrintf("%s", header);
+
+  char row[kMaxRowLength];
+  for (int i = 0; i < rows; ++i) {
+    memset(row, 0, kMaxRowLength);
+    for (int j = 0; j < kColumns; ++j) {
+      MicroStrcat(row, data[j][i]);
+      if (j < kColumns - 1) {
+        MicroStrcat(row, ",");
+      }
+    }
+
+    MicroPrintf("%s", row);
+  }
+
+  MicroPrintf("");  // Serves as a new line.
+}
+
+// Prints a 2D array of strings in a formatted manner along with a table name
+// that includes the table type.
+//
+// Note: kMaxRows and kColumns should describe the allocated size of the table,
+//       not the amount of data that is populated. It is required that all
+//       columns are filled out, but not all rows.
+//
+// ex) PrettyPrint::kCsv will print a csv with a [[ CSV ]]: table_name header.
+//
+// Args:
+// - headers: A 1D array of strings containing the headers of the table. This
+//     must be equal in size to kColumns.
+// - data: A 2D array of string data organized in [columns, rows]. As stated
+//     above, it is required that all columns are populated, but not all rows.
+// - rows: The number of populated rows in `data`.
+// - type: The format type that should be used to pretty print.
+// - table_name: The name of the table to be printed alongside the format type.
+template <int kMaxRows, int kColumns>
+void PrintFormattedData(const char headers[kColumns][kMaxStringLength],
+                        const char data[kColumns][kMaxRows][kMaxStringLength],
+                        const int rows, const PrettyPrintType type,
+                        const char* table_name) {
+  PrettyPrintTableHeader(type, table_name);
+  switch (type) {
+    case PrettyPrintType::kCsv:
+      PrintCsv<kMaxRows, kColumns>(headers, data, rows);
+      break;
+    case PrettyPrintType::kTable:
+      PrintTable<kMaxRows, kColumns>(headers, data, rows);
+      break;
+  }
+}
+
+}  // namespace tflite
+
+#endif  // TFLM_BENCHMARK_INTERNAL_LOG_UTILS_H_
--- a/tensorflow/lite/micro/tools/benchmarking/metrics.cc
+++ b/tensorflow/lite/micro/tools/benchmarking/metrics.cc
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/micro/tools/benchmarking/metrics.h"
+
+#include <sys/types.h>
+
+#include <cstddef>
+
+#include "tensorflow/lite/micro/tools/benchmarking/log_utils.h"
+
+namespace tflite {
+
+void LogArenaAllocations(
+    const tflite::RecordingSingleArenaBufferAllocator* allocator,
+    const PrettyPrintType type) {
+  constexpr int kArenaRows = 3;
+  constexpr int kArenaCols = 3;
+
+  const size_t total_bytes = allocator->GetUsedBytes();
+
+  size_t allocations[kArenaRows] = {total_bytes,
+                                    allocator->GetNonPersistentUsedBytes(),
+                                    allocator->GetPersistentUsedBytes()};
+  char titles[kArenaRows][kMaxStringLength] = {"Total", "Head", "Tail"};
+  char headers[kArenaRows][kMaxStringLength] = {"Arena", "Bytes", "% Arena"};
+
+  char data[kArenaCols][kArenaRows][kMaxStringLength];
+  for (int i = 0; i < kArenaRows; ++i) {
+    MicroStrcpy(data[0][i], titles[i]);
+    FormatNumber<int32_t>(data[1][i], allocations[i]);
+    FormatAsPercentage(data[2][i], static_cast<int64_t>(allocations[i]),
+                       static_cast<int64_t>(total_bytes), 2);
+  }
+
+  PrintFormattedData<kArenaRows, kArenaCols>(headers, data, kArenaRows, type,
+                                             "Arena");
+}
+
+void LogAllocations(const tflite::RecordingMicroAllocator& allocator,
+                    const PrettyPrintType type) {
+  constexpr int kAllocationTypes = 7;
+  tflite::RecordedAllocationType types[kAllocationTypes] = {
+      tflite::RecordedAllocationType::kTfLiteEvalTensorData,
+      tflite::RecordedAllocationType::kPersistentTfLiteTensorData,
+      tflite::RecordedAllocationType::kPersistentTfLiteTensorQuantizationData,
+      tflite::RecordedAllocationType::kPersistentBufferData,
+      tflite::RecordedAllocationType::kTfLiteTensorVariableBufferData,
+      tflite::RecordedAllocationType::kNodeAndRegistrationArray,
+      tflite::RecordedAllocationType::kOpData};
+
+  char titles[kAllocationTypes][kMaxStringLength] = {
+      "Eval tensor data",
+      "Persistent tensor data",
+      "Persistent quantization data",
+      "Persistent buffer data",
+      "Tensor variable buffer data",
+      "Node and registration array",
+      "Operation data"};
+
+  constexpr int kColumns = 6;
+  const char headers[kColumns][kMaxStringLength] = {
+      "Allocation", "Id", "Used", "Requested", "Count", "% Memory"};
+
+  const size_t total_bytes =
+      allocator.GetSimpleMemoryAllocator()->GetUsedBytes();
+
+  char data[kColumns][kAllocationTypes][kMaxStringLength];
+  for (int i = 0; i < kAllocationTypes; ++i) {
+    tflite::RecordedAllocation allocation =
+        allocator.GetRecordedAllocation(types[i]);
+    MicroStrcpy(data[0][i], titles[i]);
+    FormatNumber<int32_t>(data[1][i], static_cast<int>(types[i]));
+    FormatNumber<int32_t>(data[2][i], allocation.used_bytes);
+    FormatNumber<int32_t>(data[3][i], allocation.requested_bytes);
+    FormatNumber<int32_t>(data[4][i], allocation.count);
+    FormatAsPercentage(data[5][i], static_cast<int64_t>(allocation.used_bytes),
+                       static_cast<int64_t>(total_bytes), 2);
+  }
+
+  PrintFormattedData<kAllocationTypes, kColumns>(
+      headers, data, kAllocationTypes, type, "Allocations");
+}
+
+void LogAllocatorEvents(const tflite::RecordingMicroAllocator& allocator,
+                        const PrettyPrintType type) {
+  LogArenaAllocations(allocator.GetSimpleMemoryAllocator(), type);
+  LogAllocations(allocator, type);
+}
+}  // namespace tflite
--- a/tensorflow/lite/micro/tools/benchmarking/metrics.h
+++ b/tensorflow/lite/micro/tools/benchmarking/metrics.h
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TFLM_BENCHMARK_INTERNAL_METRICS_H_
+#define TFLM_BENCHMARK_INTERNAL_METRICS_H_
+
+#include <stdio.h>
+
+#include <cmath>
+#include <cstdint>
+
+#include "tensorflow/lite/micro/micro_profiler.h"
+#include "tensorflow/lite/micro/recording_micro_allocator.h"
+#include "tensorflow/lite/micro/tools/benchmarking/log_utils.h"
+
+namespace tflite {
+
+// Logs the allocation events. Prints out two tables, one for the arena
+// allocations, and one for each type of TFLM allocation type.
+// Args:
+//   - allocator: The recording micro allocator used during the invocation
+//       process.
+//   - type: Which print format should be used to output the allocation data to
+//       stdout.
+void LogAllocatorEvents(const tflite::RecordingMicroAllocator& allocator,
+                        PrettyPrintType type);
+}  // namespace tflite
+
+#endif  // TFLM_BENCHMARK_INTERNAL_METRICS_H_
--- a/tensorflow/lite/micro/tools/benchmarking/op_resolver.h
+++ b/tensorflow/lite/micro/tools/benchmarking/op_resolver.h
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TFLM_BENCHMARK_OP_RESOLVER_H_
+#define TFLM_BENCHMARK_OP_RESOLVER_H_
+
+#include <memory>
+
+#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
+#include "tensorflow/lite/micro/micro_op_resolver.h"
+
+namespace tflite {
+
+inline TfLiteStatus CreateOpResolver(
+    tflite::MicroMutableOpResolver<96>& op_resolver) {
+  TF_LITE_ENSURE_STATUS(op_resolver.AddFullyConnected());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddAdd());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddAbs());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddAddN());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddArgMax());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddArgMin());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddAssignVariable());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddBatchToSpaceNd());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddBroadcastArgs());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddBroadcastTo());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddCallOnce());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddCast());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddCeil());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddCircularBuffer());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddConcatenation());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddCos());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddCumSum());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddDepthToSpace());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddDequantize());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddDiv());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddElu());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddEqual());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddEthosU());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddExp());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddExpandDims());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddFill());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddFloor());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddFloorDiv());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddFloorMod());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddGather());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddGatherNd());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddGreater());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddGreaterEqual());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddHardSwish());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddIf());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddL2Normalization());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddL2Pool2D());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddLeakyRelu());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddLess());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddLessEqual());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddLog());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddLogicalAnd());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddLogicalNot());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddLogicalOr());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddLogistic());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddLogSoftmax());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddMaximum());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddMirrorPad());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddMean());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddMinimum());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddNeg());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddNotEqual());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddPack());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddPadV2());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddPrelu());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddQuantize());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddReadVariable());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddReduceMax());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddRelu());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddRelu6());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddReshape());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddResizeBilinear());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddResizeNearestNeighbor());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddRound());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddRsqrt());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddSelectV2());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddShape());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddSin());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddSlice());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddSoftmax());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddSpaceToBatchNd());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddSpaceToDepth());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddSplit());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddSplitV());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddSqueeze());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddSqrt());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddSquare());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddSquaredDifference());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddStridedSlice());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddSub());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddSum());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddSvdf());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddTanh());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddTransposeConv());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddTranspose());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddUnpack());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddUnidirectionalSequenceLSTM());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddVarHandle());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddWhile());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddZerosLike());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddDepthwiseConv2D());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddConv2D());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddAveragePool2D());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddPad());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddMaxPool2D());
+  TF_LITE_ENSURE_STATUS(op_resolver.AddMul());
+  return kTfLiteOk;
+}
+}  // namespace tflite
+#endif  // TFLM_BENCHMARK_OP_RESOLVER_H_
--- a/tensorflow/lite/micro/tools/make/Makefile
+++ b/tensorflow/lite/micro/tools/make/Makefile
@@ -281,11 +281,11 @@ MICRO_LITE_INTEGRATION_TESTS += $(shell find $(TENSORFLOW_ROOT)tensorflow/lite/m
 MICRO_LITE_GEN_MUTABLE_OP_RESOLVER_TEST += \
  $(wildcard $(TENSORFLOW_ROOT)tensorflow/lite/micro/tools/gen_micro_mutable_op_resolver_test/person_detect/Makefile.inc)

-MICRO_LITE_BENCHMARKS := $(wildcard $(TENSORFLOW_ROOT)tensorflow/lite/micro/benchmarks/Makefile.inc)
+MICRO_LITE_BENCHMARKS := $(wildcard $(TENSORFLOW_ROOT)tensorflow/lite/micro/tools/benchmarking/Makefile.inc)

 # TODO(b/152645559): move all benchmarks to benchmarks directory.
 MICROLITE_BENCHMARK_SRCS := \
-$(wildcard $(TENSORFLOW_ROOT)tensorflow/lite/micro/benchmarks/*benchmark.cc)
+$(wildcard $(TENSORFLOW_ROOT)tensorflow/lite/micro/tools/benchmarking/*benchmark.cc)

 MICROLITE_TEST_SRCS := \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/fake_micro_context_test.cc \