未验证 提交 9b1835c0 编写于 作者: H Hans Zeller 提交者: GitHub

Adding options to generate diff files to plan comparison script (#7349)

The script used in pipelines that search for explain plan changes
lists those queries that have cost, row or plan changes. In many
cases the user will want to investigate those changes further.

A new set of options generates two directories that are easy to
compare, one contains the baseline plans, one file per plan, and
the other contains the changed plans of the test.

$ ~/workspace/gpdb/concourse/scripts/perfsummary.py --help
usage: perfsummary.py [-h] [--baseLog BASELOG] [--diffDir DIFFDIR]
                      [--diffThreshold DIFFTHRESHOLD] [--diffLevel DIFFLEVEL]
                      [log_file]

Summarize the test suite execute and explain log

positional arguments:
  log_file              log file with explain/execute output

optional arguments:
  -h, --help            show this help message and exit
  --baseLog BASELOG     specify a log file from a base version to compare to
  --diffDir DIFFDIR     request diff files to be created and specify a
                        directory to place diffs into
  --diffThreshold DIFFTHRESHOLD
                        specify a numerical threshold to record plan diffs
                        with a performance regression of more than n percent
  --diffLevel DIFFLEVEL
                        specify which diff files to generate: 1 = all diffs, 2
                        = ignore cost diffs, 3 = plan diffs only
上级 24ea1af7
......@@ -24,6 +24,14 @@
import sys
import re
import argparse
import os
# types of diffs in plans, by increasing severity
NO_CHANGES = 0
COST_CHANGES = 1
ROWS_CHANGES = 2
PLAN_CHANGES = 3
planDiffText = { NO_CHANGES: "", COST_CHANGES: "cost change found", ROWS_CHANGES: "row change found", PLAN_CHANGES: "plan change found" }
# the state of multiple test suite queries executed in a log file
class FileState:
......@@ -84,7 +92,11 @@ class FileState:
self.query_comment_map[query_id] = self.comment1
else:
self.query_comment_map[query_id] = self.comment2
self.query_explain_plan_map[query_id] = self.plans[seq_num]
if len(self.plans) > seq_num:
self.query_explain_plan_map[query_id] = self.plans[seq_num]
else:
self.query_explain_plan_map[query_id] = "error - no plan found"
self.query_comment_map[query_id] += "error - no plan found"
# process a single line of a log file
def processLogFileLine(self, line_lf):
......@@ -164,7 +176,7 @@ class FileState:
if len(myPlan) != len(basePlan):
# plans are different (different number of lines)
return "plan change found"
return PLAN_CHANGES
for l in range(len(myPlan)):
myLine = myPlan[l]
......@@ -187,14 +199,12 @@ class FileState:
plan_change_found = True
if plan_change_found:
return "plan change found"
elif cost_change_found and rows_change_found:
return "cost and cardinality change found"
return PLAN_CHANGES
elif rows_change_found:
return "row change found"
return ROWS_CHANGES
elif cost_change_found:
return "cost change found"
return ""
return COST_CHANGES
return NO_CHANGES
# print header for CSV file
def printHeader(self, numFiles):
......@@ -209,11 +219,24 @@ class FileState:
print "%s, %s, %s, %s" % (q, self.query_explain_time_map[q], self.query_exe_time_map[q], self.query_comment_map[q])
# print a CSV file with a comparison between a base file and a test file
def printComparison(self, base):
def printComparison(self, base, diffDir, diffThreshold, diffLevel):
for q in self.query_id_list:
planDiffs = self.comparePlans(base, q)
print "%s, %s, %s, %s, %s, %s, %s, %s" % (q, base.query_explain_time_map[q], self.query_explain_time_map[q], base.query_exe_time_map[q], self.query_exe_time_map[q], planDiffs, base.query_comment_map[q], self.query_comment_map[q])
print "%s, %s, %s, %s, %s, %s, %s, %s" % (q, base.query_explain_time_map[q], self.query_explain_time_map[q], base.query_exe_time_map[q], self.query_exe_time_map[q], planDiffText[planDiffs], base.query_comment_map[q], self.query_comment_map[q])
if int(diffLevel) <= int(planDiffs):
baseTime = float(base.query_exe_time_map[q])
testTime = float(self.query_exe_time_map[q])
if testTime > baseTime * (1+float(diffThreshold)/100.0) or testTime < 0:
baseFileName = diffDir + "/base/" + q + ".plan"
testFileName = diffDir + "/test/" + q + ".plan"
with open(baseFileName, 'w') as fb:
for line in base.query_explain_plan_map[q]:
fb.write(line)
fb.write("Execution time: %s\n" % base.query_exe_time_map[q])
with open(testFileName, 'w') as ft:
for line in self.query_explain_plan_map[q]:
ft.write(line)
ft.write("Execution time: %s\n" % self.query_exe_time_map[q])
def main():
......@@ -221,11 +244,42 @@ def main():
parser.add_argument('log_file', nargs = '?', help='log file with explain/execute output')
parser.add_argument('--baseLog',
help='specify a log file from a base version to compare to')
parser.add_argument('--diffDir',
help='request diff files to be created and specify a directory to place diffs into')
parser.add_argument('--diffThreshold',
help='specify a numerical threshold to record plan diffs with a performance regression of more than n percent')
parser.add_argument('--diffLevel',
help='specify which diff files to generate: 1 = all diffs, 2 = ignore cost diffs, 3 = plan diffs only')
args = parser.parse_args()
inputfile = args.log_file
basefile = args.baseLog
makeDiffs = (args.diffDir != None)
diffDir = ""
diffThreshold = -100
diffLevel = 4
if makeDiffs:
# remove trailing slash, if it exists
diffDir = re.sub(r'(.*)/$','\1', args.diffDir)
try:
os.mkdir(diffDir)
os.mkdir(diffDir + "/base")
os.mkdir(diffDir + "/test")
except:
print "Unable to create diff directory %s" % diffDir
exit(1)
if args.diffThreshold != None:
diffThreshold = args.diffThreshold
if args.diffLevel == None:
# if only diffThreshold is specified, then default diffLevel to COST_CHANGES
args.diffLevel = COST_CHANGES
if args.diffLevel != None:
diffLevel = args.diffLevel
else:
if (args.diffThreshold != None or args.diffLevel != None):
print "Please specify the --diffDir option with a directory name to request diff files\n"
exit(1)
if inputfile is None:
print "Expected the name of a log file with test suite queries\n"
......@@ -245,7 +299,7 @@ def main():
testState.printme()
else:
testState.printHeader(2)
testState.printComparison(baseState)
testState.printComparison(baseState, diffDir, diffThreshold, diffLevel)
if __name__== "__main__":
main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册