未验证 提交 76ebfa57 编写于 作者: D deqiangc 提交者: GitHub

Python script to track size (#850)

* Replace the body of create_size_log_x86.sh with a more readable python.

BUG=https://issuetracker.google.com/211673155

* Detect size increase of specific binaries

This can be used in Github action to keep a size history and
file an issue if size increase exceeds threshold

BUG=https://issuetracker.google.com/211673155Co-authored-by: Nmergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
上级 f6888768
date, sha, text, data, bss, totoal date,sha,text,data,bss,total
Fri Jan 7 05:50:20 PM PST 2022, 98dec0f847b64e96052e0de155568a406409db36, 1394, 520, 8, 1922 2022-01-10 23:04:44.309131,c53927869b2ce15345c6c1751164ca6e4aa47c02,1394,520,8,1922
2022-01-10 23:10:04.748906,3aab209ca2654013356ba96cd1480ec4b0b76010,1394,520,8,1922
2022-01-11 09:32:16.986632,b252173284b23744a78d9a599106e75297b9ff81,1394,520,8,1922
2022-01-11 14:59:16.261895,e2eed0b46142015e972cfd301d0e514eb0cf131e,1394,520,8,1922
2022-01-11 15:03:24.298672,e2eed0b46142015e972cfd301d0e514eb0cf131e,1394,520,8,1922
date, sha, text, data, bss, totoal date,sha,text,data,bss,total
Fri Jan 7 05:50:20 PM PST 2022, 98dec0f847b64e96052e0de155568a406409db36, 22031, 1408, 24, 23463 2022-01-10 23:04:44.309131,c53927869b2ce15345c6c1751164ca6e4aa47c02,22599,1464,24,24087
2022-01-10 23:10:04.748906,3aab209ca2654013356ba96cd1480ec4b0b76010,22599,1464,24,24087
2022-01-11 09:32:16.986632,b252173284b23744a78d9a599106e75297b9ff81,22599,1464,24,24087
2022-01-11 14:59:16.261895,e2eed0b46142015e972cfd301d0e514eb0cf131e,22599,1464,24,24087
2022-01-11 15:03:24.298672,e2eed0b46142015e972cfd301d0e514eb0cf131e,22599,1464,24,24087
date, sha, text, data, bss, totoal date,sha,text,data,bss,total
Fri Jan 7 05:44:44 PM PST 2022, 98dec0f847b64e96052e0de155568a406409db36, 81105, 1512, 22400, 105017 2022-01-10 22:39:00.301510,c53927869b2ce15345c6c1751164ca6e4aa47c02,81657,1568,22400,105625
Fri Jan 7 05:50:20 PM PST 2022, 98dec0f847b64e96052e0de155568a406409db36, 81105, 1512, 22400, 105017 2022-01-10 23:03:56.696004,c53927869b2ce15345c6c1751164ca6e4aa47c02,81657,1568,22400,105625
2022-01-10 23:04:44.309131,c53927869b2ce15345c6c1751164ca6e4aa47c02,81657,1568,22400,105625
2022-01-10 23:10:04.748906,3aab209ca2654013356ba96cd1480ec4b0b76010,81657,1568,22400,105625
2022-01-11 09:32:16.986632,b252173284b23744a78d9a599106e75297b9ff81,81657,1568,22448,105673
2022-01-11 14:59:16.261895,e2eed0b46142015e972cfd301d0e514eb0cf131e,81657,1568,22448,105673
2022-01-11 15:03:24.298672,e2eed0b46142015e972cfd301d0e514eb0cf131e,81657,1568,22448,105673
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Script to build the required binaries, profile their size and generate log.
"""
import argparse
import datetime
import os
import pandas as pd
import subprocess
def _build_a_binary(root_dir, binary_name, makefile_options):
os.chdir(root_dir)
params_list = [
"make", "-f", "tensorflow/lite/micro/tools/make/Makefile", binary_name
] + ["%s=%s" % (key, value) for (key, value) in makefile_options.items()]
process = subprocess.Popen(params_list,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = process.communicate()
if process.returncode != 0:
raise RuntimeError("Building %s failed with \n\n %s" %
(" ".join(params_list), stderr.decode()))
def _profile_a_binary(root_dir, binary_name, makefile_options, build_info):
target_dir = "%s_%s_%s" % (makefile_options["TARGET"],
makefile_options["TARGET_ARCH"],
makefile_options["BUILD_TYPE"])
binary_path = os.path.join(root_dir, 'tensorflow/lite/micro/tools/make/gen/',
target_dir, 'bin', binary_name)
csv_path = os.path.join(root_dir, 'data/continuous_builds/size_profiling',
target_dir, "%s.csv" % binary_name)
# Run size command and extract the output
process = subprocess.Popen(["size", binary_path],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = process.communicate()
if process.returncode != 0:
raise RuntimeError("size %s failed with \n\n %s" %
(binary_name, stderr.decode()))
output_str = stdout.decode()
df = pd.DataFrame(
[line.split() for line in output_str.split('\n')[1:]],
columns=[title for title in output_str.split('\n')[0].split()])
# Append the output from the size to the CSV file
report = _create_or_read_csv(csv_path)
report.loc[len(report.index)] = [
build_info["date"], build_info['sha'], df['text'][0], df['data'][0],
df['bss'][0], df['dec'][0]
]
report.to_csv(csv_path, index=False, header=False, mode='a')
def _create_or_read_csv(csv_file_name):
if os.path.exists(csv_file_name) is not True:
csv_df = pd.DataFrame(
columns=['date', 'sha', 'text', 'data', 'bss', 'total'])
csv_df.to_csv(csv_file_name, index=False, mode='w')
csv_head = pd.read_csv(csv_file_name, index_col=False, nrows=0)
return csv_head
def _get_build_info(root_dir):
os.chdir(root_dir)
current_time = str(datetime.datetime.now())
git_process = subprocess.Popen(["git", "rev-parse", "HEAD"],
stdout=subprocess.PIPE,
cwd=root_dir)
sha, err = git_process.communicate()
if git_process.returncode != 0:
raise RuntimeError("Git failed with %s" % err.decode())
return {'date': current_time, 'sha': sha.decode().strip('\n')}
def _build_and_profile(root_dir, makefile_options, binary_names):
build_info = _get_build_info(root_dir)
for binary_name in binary_names:
_build_a_binary(root_dir, binary_name, makefile_options)
_profile_a_binary(root_dir, binary_name, makefile_options, build_info)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
default_binary_list_string = 'keyword_benchmark,baseline_memory_footprint,interpreter_memory_footprint'
parser.add_argument(
'--binary_list',
nargs='?',
const=default_binary_list_string,
default=default_binary_list_string,
help=
'binary list separated by comma (e.g. keyword_benchmark,baseline_memory_footprint)'
)
parser.add_argument('--build_type',
nargs='?',
const='release',
default='release',
help='build type (e.g. release)')
parser.add_argument('--target',
nargs='?',
const='linux',
default='linux',
help='host target (e.g. linux)')
parser.add_argument('--target_arch',
nargs='?',
const='x86_64',
default='x86_64',
help='target architecture (e.g x86_64)')
args = parser.parse_args()
makefile_options = {
"BUILD_TYPE": args.build_type,
"TARGET": args.target,
"TARGET_ARCH": args.target_arch
}
binary_names = args.binary_list.split(',')
script_path = os.path.dirname(os.path.realpath(__file__))
root_dir = os.path.join(script_path, '../../../../..')
_build_and_profile(root_dir, makefile_options, binary_names)
...@@ -16,116 +16,40 @@ ...@@ -16,116 +16,40 @@
# #
# Measures the size of specified binaries and append the report to a log. # Measures the size of specified binaries and append the report to a log.
# Utility function to build a target.
# Parameters:
# ${1}: binary target name such as keyworkd_benchmark
# ${2}: build type such as RELEASE, DEFAULT
# ${3}: target such as linux
# ${4}: target architecture such as x86_64
function build_target() {
local binary_target=$1
local build_type=$2
local target=$3
local target_arch=$4
readable_run make -j8 -f tensorflow/lite/micro/tools/make/Makefile build BUILD_TYPE=${build_type} TARGET=${target} TARGET_ARCH=${target_arch} ${binary_target}
}
# Utility function to profile a binary and report its size
#Parameters:
# ${1}: binary target path
# ${2}: size log file name
function profile_a_binary() {
local binary=${1}
local log=${2}
raw_size=$(size ${binary})
# Skip the title row
sizes=$(echo "${raw_size}" | sed -n '2 p')
text_size=$(echo "$sizes" | awk '{print $1}')
data_size=$(echo "$sizes" | awk '{print $2}')
bss_size=$(echo "$sizes" | awk '{print $3}')
total_size=$(echo "$sizes" | awk '{print $4}')
echo "${BUILD_TIME}, ${HEAD_SHA}, ${text_size}, ${data_size}, ${bss_size}, ${total_size}" >> ${log}
}
# Parameters:
# ${1} - size log file name
function start_size_report() {
local log=${1}
if [[ ! -f ${log} ]]
then
echo "${CSV_HEADER}" >> ${log}
fi
}
# Parameters:
# ${1}: binary target name such as keyworkd_benchmark
# ${2}: build type such as RELEASE, DEFAULT
# ${3}: target such as linux
# ${4}: target architecture such as x86_64
function report_size() {
local binary=$1
local build_type=$2
local target=$3
local target_arch=$4
local log="${LOG_ROOT_DIR}/${binary}.csv"
start_size_report ${log}
build_target ${binary} ${build_type} ${target} ${target_arch}
local build_result=$?
if [[ ${build_result} != 0 ]]
then
# Here release build failed so mark failures and return appropriate error
# code.
echo "${binary} fail to build,">> ${log}
return ${build_result}
fi
# If build is successful, profile the size.
local binary_path="${GEN_FILES_DIR}/${target}_${target_arch}_${build_type}/bin/${binary}"
profile_a_binary ${binary_path} ${log}
}
###################################################
### Start of main
###################################################
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT_DIR=${SCRIPT_DIR}/../../../../.. ROOT_DIR=${SCRIPT_DIR}/../../../../..
GEN_FILES_DIR=${ROOT_DIR}/tensorflow/lite/micro/tools/make/gen/
cd "${ROOT_DIR}" cd "${ROOT_DIR}"
source tensorflow/lite/micro/tools/ci_build/helper_functions.sh source tensorflow/lite/micro/tools/ci_build/helper_functions.sh
CSV_HEADER="date, sha, text, data, bss, totoal"
HEAD_SHA=`git rev-parse HEAD`
BUILD_TIME=`date`
TARGET="linux" TARGET="linux"
TARGET_ARCH="x86_64" TARGET_ARCH="x86_64"
BUILD_TYPE="release" BUILD_TYPE="release"
LOG_ROOT_DIR=${ROOT_DIR}/data/continuous_builds/size_profiling/${TARGET}_${TARGET_ARCH}_${BUILD_TYPE}
# Clean the own build and download third party # Clean the own build and download third party
readable_run make -f tensorflow/lite/micro/tools/make/Makefile clean clean_downloads readable_run make -f tensorflow/lite/micro/tools/make/Makefile clean clean_downloads
readable_run make -f tensorflow/lite/micro/tools/make/Makefile third_party_downloads readable_run make -f tensorflow/lite/micro/tools/make/Makefile third_party_downloads
report_size keyword_benchmark ${BUILD_TYPE} ${TARGET} ${TARGET_ARCH} BINARY_LIST="keyword_benchmark,baseline_memory_footprint,interpreter_memory_footprint"
KEYWORD_BENCHMARK_STATUS=$? python3 tensorflow/lite/micro/tools/metrics/create_size_log.py --build_type=${BUILD_TYPE} --target=${TARGET} --target_arch=${TARGET_ARCH} --binary_list=${BINARY_LIST}
LOG_GENERATION_STATUS=$?
if [[ ${LOG_GENERATION_STATUS} != 0 ]]
then
echo "Failure in profiling."
exit -1
fi
report_size baseline_memory_footprint ${BUILD_TYPE} ${TARGET} ${TARGET_ARCH} echo "Success in size log generation"
BASELINE_MEMORY_FOOTPRINT_STATUS=$?
report_size interpreter_memory_footprint ${BUILD_TYPE} ${TARGET} ${TARGET_ARCH} LOG_DIR="${ROOT_DIR}/data/continuous_builds/size_profiling/${TARGET}_${TARGET_ARCH}_${BUILD_TYPE}"
INTERPRETER_MEMORY_FOOTPRINT_STATUS=$? python3 tensorflow/lite/micro/tools/metrics/detect_size_increase_and_plot_history.py --input_dir=${LOG_DIR} --output_dir=${LOG_DIR} --binary_list=${BINARY_LIST}
SIZE_ALERT_STATUS=$?
if [[ ${KEYWORD_BENCHMARK_STATUS} != 0 || ${BASELINE_MEMORY_FOOTPRINT_STATUS} != 0 || ${INTERPRETER_MEMORY_FOOTPRINT_STATUS} != 0 ]] if [[ ${SIZE_ALERT_STATUS} != 0 ]]
then then
echo "Failure in profiling." echo "Size increase may exceed threshold"
exit -1 exit -1
fi fi
## TODO(b/213646558): run difference detection and also return error code if detecting large increase echo "Size does not increase or size increase does not exceed threshold"
echo "Profiling succeed" \ No newline at end of file
\ No newline at end of file
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Script to whether the size exceeds a threshold and also plot size history as a graph.
"""
import argparse
import pandas as pd
from matplotlib import pyplot as plt
# Limit the size history check for the past 60 days
SIZE_HISTORY_DEPTH = 60
# If a section of size log exceeds the below threshold, an error will be raised
SIZE_THRESHOLD_SETTING = {
"text": 512,
"total": 512,
}
def _plot_and_detect_size_increase_for_binary(input_dir, output_dir,
binary_name, threshold):
csv_path = '%s/%s.csv' % (input_dir, binary_name)
size_log = pd.read_csv(csv_path, index_col=False, nrows=SIZE_HISTORY_DEPTH)
fig, axs = plt.subplots(3, 2)
fig.suptitle('Source: %s' % binary_name)
threshold_messages = []
for index, name in enumerate(['text', 'data', 'total']):
err_msg_or_none = _subplot_and_detect_size_increase(
axs, size_log, name, index, threshold)
if err_msg_or_none is not None:
threshold_messages.append('%s failure: %s' %
(binary_name, err_msg_or_none))
fig_path = '%s/%s.png' % (output_dir, binary_name)
fig.tight_layout()
plt.savefig(fig_path)
plt.clf()
return threshold_messages
def _subplot_and_detect_size_increase(subplot_axs, size_log, section_name, row,
threshold):
subplot_axs[row, 0].set_title(section_name)
subplot_axs[row, 0].plot(size_log[section_name], 'o-')
subplot_axs[row, 0].set_ylabel('Abs Sz(bytes)')
increased_size = size_log[section_name].diff()
subplot_axs[row, 1].plot(increased_size, 'o-')
subplot_axs[row, 1].set_ylabel('Incr Sz (bytes)')
if section_name in threshold and len(increased_size) > 1:
if increased_size[1] > threshold[section_name]:
return '%s size increases by %d and exceeds threshold %d' % (
section_name, increased_size[1], threshold[section_name])
# By default there is no size increase that exceeds the threshold
return None
def _detect_size_increase_and_plot_history(input_dir, output_dir, binary_list,
threshold_setting):
threshold_messages = []
for binary_name in binary_list:
threshold_messages += _plot_and_detect_size_increase_for_binary(
input_dir, output_dir, binary_name, threshold_setting)
if len(threshold_messages) != 0:
raise RuntimeError(str(threshold_messages))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
default_binary_list_string = 'keyword_benchmark,baseline_memory_footprint,interpreter_memory_footprint'
parser.add_argument(
'--binary_list',
nargs='?',
const=default_binary_list_string,
default=default_binary_list_string,
help=
'binary list separated by comma (e.g. keyword_benchmark,baseline_memory_footprint)'
)
parser.add_argument('--input_dir',
help='Path to the size log file (e.g. ~/size_log')
parser.add_argument('--output_dir', help='Path to save plot to (e.g. /tmp/)')
args = parser.parse_args()
binary_names = args.binary_list.split(',')
_detect_size_increase_and_plot_history(args.input_dir, args.output_dir,
binary_names, SIZE_THRESHOLD_SETTING)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册