Python script to track size (#850)

* Replace the body of create_size_log_x86.sh with a more readable python. BUG=https://issuetracker.google.com/211673155 * Detect size increase of specific binaries This can be used in Github action to keep a size history and file an issue if size increase exceeds threshold BUG=https://issuetracker.google.com/211673155Co-authored-by: N mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>

Python script to track size (#850)
* Replace the body of create_size_log_x86.sh with a more readable python. BUG=https://issuetracker.google.com/211673155 * Detect size increase of specific binaries This can be used in Github action to keep a size history and file an issue if size increase exceeds threshold BUG=https://issuetracker.google.com/211673155Co-authored-by: N mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
76ebfa57 · deqiangc · GitHub · f6888768 · 76ebfa57 · 76ebfa57
9 changed file
--- a/data/continuous_builds/size_profiling/linux_x86_64_release/baseline_memory_footprint.csv
+++ b/data/continuous_builds/size_profiling/linux_x86_64_release/baseline_memory_footprint.csv
-date, sha, text, data, bss, totoal
+date,sha,text,data,bss,total
-Fri Jan  7 05:50:20 PM PST 2022, 98dec0f847b64e96052e0de155568a406409db36, 1394, 520, 8, 1922
+2022-01-10 23:04:44.309131,c53927869b2ce15345c6c1751164ca6e4aa47c02,1394,520,8,1922
+2022-01-10 23:10:04.748906,3aab209ca2654013356ba96cd1480ec4b0b76010,1394,520,8,1922
+2022-01-11 09:32:16.986632,b252173284b23744a78d9a599106e75297b9ff81,1394,520,8,1922
+2022-01-11 14:59:16.261895,e2eed0b46142015e972cfd301d0e514eb0cf131e,1394,520,8,1922
+2022-01-11 15:03:24.298672,e2eed0b46142015e972cfd301d0e514eb0cf131e,1394,520,8,1922
--- a/data/continuous_builds/size_profiling/linux_x86_64_release/baseline_memory_footprint.png
+++ b/data/continuous_builds/size_profiling/linux_x86_64_release/baseline_memory_footprint.png
--- a/data/continuous_builds/size_profiling/linux_x86_64_release/interpreter_memory_footprint.csv
+++ b/data/continuous_builds/size_profiling/linux_x86_64_release/interpreter_memory_footprint.csv
-date, sha, text, data, bss, totoal
+date,sha,text,data,bss,total
-Fri Jan  7 05:50:20 PM PST 2022, 98dec0f847b64e96052e0de155568a406409db36, 22031, 1408, 24, 23463
+2022-01-10 23:04:44.309131,c53927869b2ce15345c6c1751164ca6e4aa47c02,22599,1464,24,24087
+2022-01-10 23:10:04.748906,3aab209ca2654013356ba96cd1480ec4b0b76010,22599,1464,24,24087
+2022-01-11 09:32:16.986632,b252173284b23744a78d9a599106e75297b9ff81,22599,1464,24,24087
+2022-01-11 14:59:16.261895,e2eed0b46142015e972cfd301d0e514eb0cf131e,22599,1464,24,24087
+2022-01-11 15:03:24.298672,e2eed0b46142015e972cfd301d0e514eb0cf131e,22599,1464,24,24087
--- a/data/continuous_builds/size_profiling/linux_x86_64_release/interpreter_memory_footprint.png
+++ b/data/continuous_builds/size_profiling/linux_x86_64_release/interpreter_memory_footprint.png
--- a/data/continuous_builds/size_profiling/linux_x86_64_release/keyword_benchmark.csv
+++ b/data/continuous_builds/size_profiling/linux_x86_64_release/keyword_benchmark.csv
-date, sha, text, data, bss, totoal
+date,sha,text,data,bss,total
-Fri Jan  7 05:44:44 PM PST 2022, 98dec0f847b64e96052e0de155568a406409db36, 81105, 1512, 22400, 105017
+2022-01-10 22:39:00.301510,c53927869b2ce15345c6c1751164ca6e4aa47c02,81657,1568,22400,105625
-Fri Jan  7 05:50:20 PM PST 2022, 98dec0f847b64e96052e0de155568a406409db36, 81105, 1512, 22400, 105017
+2022-01-10 23:03:56.696004,c53927869b2ce15345c6c1751164ca6e4aa47c02,81657,1568,22400,105625
+2022-01-10 23:04:44.309131,c53927869b2ce15345c6c1751164ca6e4aa47c02,81657,1568,22400,105625
+2022-01-10 23:10:04.748906,3aab209ca2654013356ba96cd1480ec4b0b76010,81657,1568,22400,105625
+2022-01-11 09:32:16.986632,b252173284b23744a78d9a599106e75297b9ff81,81657,1568,22448,105673
+2022-01-11 14:59:16.261895,e2eed0b46142015e972cfd301d0e514eb0cf131e,81657,1568,22448,105673
+2022-01-11 15:03:24.298672,e2eed0b46142015e972cfd301d0e514eb0cf131e,81657,1568,22448,105673
--- a/data/continuous_builds/size_profiling/linux_x86_64_release/keyword_benchmark.png
+++ b/data/continuous_builds/size_profiling/linux_x86_64_release/keyword_benchmark.png
--- a/tensorflow/lite/micro/tools/metrics/create_size_log.py
+++ b/tensorflow/lite/micro/tools/metrics/create_size_log.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Script to build the required binaries, profile their size and generate log.
+"""
+import argparse
+import datetime
+import os
+import pandas as pd
+import subprocess
+def _build_a_binary(root_dir, binary_name, makefile_options):
+  os.chdir(root_dir)
+  params_list = [
+      "make", "-f", "tensorflow/lite/micro/tools/make/Makefile", binary_name
+  ] + ["%s=%s" % (key, value) for (key, value) in makefile_options.items()]
+  process = subprocess.Popen(params_list,
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE)
+  stdout, stderr = process.communicate()
+  if process.returncode != 0:
+    raise RuntimeError("Building %s failed with \n\n %s" %
+                       (" ".join(params_list), stderr.decode()))
+def _profile_a_binary(root_dir, binary_name, makefile_options, build_info):
+  target_dir = "%s_%s_%s" % (makefile_options["TARGET"],
+                             makefile_options["TARGET_ARCH"],
+                             makefile_options["BUILD_TYPE"])
+  binary_path = os.path.join(root_dir, 'tensorflow/lite/micro/tools/make/gen/',
+                             target_dir, 'bin', binary_name)
+  csv_path = os.path.join(root_dir, 'data/continuous_builds/size_profiling',
+                          target_dir, "%s.csv" % binary_name)
+  # Run size command and extract the output
+  process = subprocess.Popen(["size", binary_path],
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE)
+  stdout, stderr = process.communicate()
+  if process.returncode != 0:
+    raise RuntimeError("size %s failed with \n\n %s" %
+                       (binary_name, stderr.decode()))
+  output_str = stdout.decode()
+  df = pd.DataFrame(
+      [line.split() for line in output_str.split('\n')[1:]],
+      columns=[title for title in output_str.split('\n')[0].split()])
+  # Append the output from the size to the CSV file
+  report = _create_or_read_csv(csv_path)
+  report.loc[len(report.index)] = [
+      build_info["date"], build_info['sha'], df['text'][0], df['data'][0],
+      df['bss'][0], df['dec'][0]
+  ]
+  report.to_csv(csv_path, index=False, header=False, mode='a')
+def _create_or_read_csv(csv_file_name):
+  if os.path.exists(csv_file_name) is not True:
+    csv_df = pd.DataFrame(
+        columns=['date', 'sha', 'text', 'data', 'bss', 'total'])
+    csv_df.to_csv(csv_file_name, index=False, mode='w')
+  csv_head = pd.read_csv(csv_file_name, index_col=False, nrows=0)
+  return csv_head
+def _get_build_info(root_dir):
+  os.chdir(root_dir)
+  current_time = str(datetime.datetime.now())
+  git_process = subprocess.Popen(["git", "rev-parse", "HEAD"],
+                                 stdout=subprocess.PIPE,
+                                 cwd=root_dir)
+  sha, err = git_process.communicate()
+  if git_process.returncode != 0:
+    raise RuntimeError("Git failed with %s" % err.decode())
+  return {'date': current_time, 'sha': sha.decode().strip('\n')}
+def _build_and_profile(root_dir, makefile_options, binary_names):
+  build_info = _get_build_info(root_dir)
+  for binary_name in binary_names:
+    _build_a_binary(root_dir, binary_name, makefile_options)
+    _profile_a_binary(root_dir, binary_name, makefile_options, build_info)
+if __name__ == '__main__':
+  parser = argparse.ArgumentParser()
+  default_binary_list_string = 'keyword_benchmark,baseline_memory_footprint,interpreter_memory_footprint'
+  parser.add_argument(
+      '--binary_list',
+      nargs='?',
+      const=default_binary_list_string,
+      default=default_binary_list_string,
+      help=
+      'binary list separated by comma (e.g. keyword_benchmark,baseline_memory_footprint)'
+  )
+  parser.add_argument('--build_type',
+                      nargs='?',
+                      const='release',
+                      default='release',
+                      help='build type (e.g. release)')
+  parser.add_argument('--target',
+                      nargs='?',
+                      const='linux',
+                      default='linux',
+                      help='host target (e.g. linux)')
+  parser.add_argument('--target_arch',
+                      nargs='?',
+                      const='x86_64',
+                      default='x86_64',
+                      help='target architecture (e.g x86_64)')
+  args = parser.parse_args()
+  makefile_options = {
+      "BUILD_TYPE": args.build_type,
+      "TARGET": args.target,
+      "TARGET_ARCH": args.target_arch
+  }
+  binary_names = args.binary_list.split(',')
+  script_path = os.path.dirname(os.path.realpath(__file__))
+  root_dir = os.path.join(script_path, '../../../../..')
+  _build_and_profile(root_dir, makefile_options, binary_names)
--- a/tensorflow/lite/micro/tools/metrics/create_size_log_x86.sh
+++ b/tensorflow/lite/micro/tools/metrics/create_size_log_x86.sh
@@ -16,116 +16,40 @@
 #
 # Measures the size of specified binaries and append the report to a log.
-# Utility function to build a target.
-# Parameters:
-# ${1}: binary target name such as keyworkd_benchmark
-# ${2}: build type such as RELEASE, DEFAULT
-# ${3}: target such as linux
-# ${4}: target architecture such as x86_64
-function build_target() {
-  local binary_target=$1
-  local build_type=$2
-  local target=$3
-  local target_arch=$4
-  readable_run make -j8 -f tensorflow/lite/micro/tools/make/Makefile build BUILD_TYPE=${build_type} TARGET=${target} TARGET_ARCH=${target_arch} ${binary_target}
-}
-# Utility function to profile a binary and report its size
-#Parameters:
-# ${1}: binary target path
-# ${2}: size log file name
-function profile_a_binary() {
-  local binary=${1}
-  local log=${2}
-  raw_size=$(size ${binary})
-  # Skip the title row
-  sizes=$(echo "${raw_size}" | sed -n '2 p')
-  text_size=$(echo "$sizes" | awk '{print $1}')
-  data_size=$(echo "$sizes" | awk '{print $2}')
-  bss_size=$(echo "$sizes" | awk '{print $3}')
-  total_size=$(echo "$sizes" | awk '{print $4}')
-  echo "${BUILD_TIME}, ${HEAD_SHA}, ${text_size}, ${data_size}, ${bss_size}, ${total_size}" >> ${log}
-}
-# Parameters:
-# ${1} - size log file name
-function start_size_report() {
-  local log=${1}  
-  if [[ ! -f ${log} ]]
-  then
-    echo "${CSV_HEADER}" >> ${log}
-  fi
-}
-# Parameters:
-# ${1}: binary target name such as keyworkd_benchmark
-# ${2}: build type such as RELEASE, DEFAULT
-# ${3}: target such as linux
-# ${4}: target architecture such as x86_64
-function report_size() {
-  local binary=$1
-  local build_type=$2
-  local target=$3
-  local target_arch=$4
-  local log="${LOG_ROOT_DIR}/${binary}.csv"
-  start_size_report ${log}
-  build_target ${binary} ${build_type} ${target} ${target_arch}
-  local build_result=$?
-  if [[ ${build_result} != 0 ]]
-  then
-    # Here release build failed so mark failures and return appropriate error
-    # code.
-    echo "${binary} fail to build,">> ${log}
-    return ${build_result}
-  fi
-  # If build is successful, profile the size.
-  local binary_path="${GEN_FILES_DIR}/${target}_${target_arch}_${build_type}/bin/${binary}"
-  profile_a_binary ${binary_path} ${log}
-}
-###################################################
-### Start of main
-###################################################
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 ROOT_DIR=${SCRIPT_DIR}/../../../../..
-GEN_FILES_DIR=${ROOT_DIR}/tensorflow/lite/micro/tools/make/gen/
 cd "${ROOT_DIR}"
 source tensorflow/lite/micro/tools/ci_build/helper_functions.sh
-CSV_HEADER="date, sha, text, data, bss, totoal"
-HEAD_SHA=`git rev-parse HEAD`
-BUILD_TIME=`date`
 TARGET="linux"
 TARGET_ARCH="x86_64"
 BUILD_TYPE="release"
-LOG_ROOT_DIR=${ROOT_DIR}/data/continuous_builds/size_profiling/${TARGET}_${TARGET_ARCH}_${BUILD_TYPE}
 # Clean the own build and download third party
 readable_run make -f tensorflow/lite/micro/tools/make/Makefile clean clean_downloads
 readable_run make -f tensorflow/lite/micro/tools/make/Makefile third_party_downloads
-report_size keyword_benchmark ${BUILD_TYPE} ${TARGET} ${TARGET_ARCH}
+BINARY_LIST="keyword_benchmark,baseline_memory_footprint,interpreter_memory_footprint"
-KEYWORD_BENCHMARK_STATUS=$?
+python3 tensorflow/lite/micro/tools/metrics/create_size_log.py --build_type=${BUILD_TYPE} --target=${TARGET} --target_arch=${TARGET_ARCH} --binary_list=${BINARY_LIST}
+LOG_GENERATION_STATUS=$?
+if [[ ${LOG_GENERATION_STATUS} != 0 ]]
+then
+  echo "Failure in profiling."
+  exit -1
+fi
-report_size baseline_memory_footprint ${BUILD_TYPE} ${TARGET} ${TARGET_ARCH}
+echo "Success in size log generation"
-BASELINE_MEMORY_FOOTPRINT_STATUS=$?
-report_size interpreter_memory_footprint ${BUILD_TYPE} ${TARGET} ${TARGET_ARCH}
+LOG_DIR="${ROOT_DIR}/data/continuous_builds/size_profiling/${TARGET}_${TARGET_ARCH}_${BUILD_TYPE}"
-INTERPRETER_MEMORY_FOOTPRINT_STATUS=$?
+python3 tensorflow/lite/micro/tools/metrics/detect_size_increase_and_plot_history.py --input_dir=${LOG_DIR} --output_dir=${LOG_DIR} --binary_list=${BINARY_LIST}
+SIZE_ALERT_STATUS=$?
-if [[ ${KEYWORD_BENCHMARK_STATUS} != 0 || ${BASELINE_MEMORY_FOOTPRINT_STATUS} != 0 || ${INTERPRETER_MEMORY_FOOTPRINT_STATUS} != 0 ]]
+if [[ ${SIZE_ALERT_STATUS} != 0 ]]
 then
-  echo "Failure in profiling."
+  echo "Size increase may exceed threshold"
  exit -1
 fi
-## TODO(b/213646558): run difference detection and also return error code if detecting large increase
+echo "Size does not increase or size increase does not exceed threshold"
-echo "Profiling succeed"
\ No newline at end of file
\ No newline at end of file
--- a/tensorflow/lite/micro/tools/metrics/detect_size_increase_and_plot_history.py
+++ b/tensorflow/lite/micro/tools/metrics/detect_size_increase_and_plot_history.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Script to whether the size exceeds a threshold and also plot size history as a graph.
+"""
+import argparse
+import pandas as pd
+from matplotlib import pyplot as plt
+# Limit the size history check for the past 60 days
+SIZE_HISTORY_DEPTH = 60
+# If a section of size log exceeds the below threshold, an error will be raised
+SIZE_THRESHOLD_SETTING = {
+    "text": 512,
+    "total": 512,
+}
+def _plot_and_detect_size_increase_for_binary(input_dir, output_dir,
+                                              binary_name, threshold):
+  csv_path = '%s/%s.csv' % (input_dir, binary_name)
+  size_log = pd.read_csv(csv_path, index_col=False, nrows=SIZE_HISTORY_DEPTH)
+  fig, axs = plt.subplots(3, 2)
+  fig.suptitle('Source: %s' % binary_name)
+  threshold_messages = []
+  for index, name in enumerate(['text', 'data', 'total']):
+    err_msg_or_none = _subplot_and_detect_size_increase(
+        axs, size_log, name, index, threshold)
+    if err_msg_or_none is not None:
+      threshold_messages.append('%s failure: %s' %
+                                (binary_name, err_msg_or_none))
+  fig_path = '%s/%s.png' % (output_dir, binary_name)
+  fig.tight_layout()
+  plt.savefig(fig_path)
+  plt.clf()
+  return threshold_messages
+def _subplot_and_detect_size_increase(subplot_axs, size_log, section_name, row,
+                                      threshold):
+  subplot_axs[row, 0].set_title(section_name)
+  subplot_axs[row, 0].plot(size_log[section_name], 'o-')
+  subplot_axs[row, 0].set_ylabel('Abs Sz(bytes)')
+  increased_size = size_log[section_name].diff()
+  subplot_axs[row, 1].plot(increased_size, 'o-')
+  subplot_axs[row, 1].set_ylabel('Incr Sz (bytes)')
+  if section_name in threshold and len(increased_size) > 1:
+    if increased_size[1] > threshold[section_name]:
+      return '%s size increases by %d and exceeds threshold %d' % (
+          section_name, increased_size[1], threshold[section_name])
+  # By default there is no size increase that exceeds the threshold
+  return None
+def _detect_size_increase_and_plot_history(input_dir, output_dir, binary_list,
+                                           threshold_setting):
+  threshold_messages = []
+  for binary_name in binary_list:
+    threshold_messages += _plot_and_detect_size_increase_for_binary(
+        input_dir, output_dir, binary_name, threshold_setting)
+  if len(threshold_messages) != 0:
+    raise RuntimeError(str(threshold_messages))
+if __name__ == '__main__':
+  parser = argparse.ArgumentParser()
+  default_binary_list_string = 'keyword_benchmark,baseline_memory_footprint,interpreter_memory_footprint'
+  parser.add_argument(
+      '--binary_list',
+      nargs='?',
+      const=default_binary_list_string,
+      default=default_binary_list_string,
+      help=
+      'binary list separated by comma (e.g. keyword_benchmark,baseline_memory_footprint)'
+  )
+  parser.add_argument('--input_dir',
+                      help='Path to the size log file (e.g. ~/size_log')
+  parser.add_argument('--output_dir', help='Path to save plot to (e.g. /tmp/)')
+  args = parser.parse_args()
+  binary_names = args.binary_list.split(',')
+  _detect_size_increase_and_plot_history(args.input_dir, args.output_dir,
+                                         binary_names, SIZE_THRESHOLD_SETTING)