diff --git a/src/test/tinc/ext/atmsort.pl b/src/test/tinc/ext/atmsort.pl deleted file mode 100755 index eb06e4f750d2eb626637baff55381b5c6d0b3392..0000000000000000000000000000000000000000 --- a/src/test/tinc/ext/atmsort.pl +++ /dev/null @@ -1,1970 +0,0 @@ -#!/usr/bin/env perl -# -# $Header: //cdb2/feature/PARISTX/ORCA/cdb-pg/src/test/regress/atmsort.pl#1 $ -# -# copyright (c) 2007, 2008, 2009 GreenPlum. All rights reserved. -# Author: Jeffrey I Cohen -# -# -use Pod::Usage; -use Getopt::Long; -use Data::Dumper; -use strict; -use warnings; - -=head1 NAME - -B - [A] [T]est [M]echanism Sort: sort the contents of SQL log files to aid diff comparison - -=head1 SYNOPSIS - -B [options] logfile [logfile...] - -Options: - - -help brief help message - -man full documentation - -ignore_headers ignore header lines in query output - -ignore_plans ignore explain plan content in query output - -init load initialization file - -do_equiv construct or compare equivalent query regions - -=head1 OPTIONS - -=over 8 - -=item B<-help> - - Print a brief help message and exits. - -=item B<-man> - - Prints the manual page and exits. - -=item B<-ignore_headers> - -gpdiff/atmsort expect Postgresql "psql-style" output for SELECT -statements, with a two line header composed of the column names, -separated by vertical bars (|), and a "separator" line of dashes and -pluses beneath, followed by the row output. The psql utility performs -some formatting to adjust the column widths to match the size of the -row output. Setting this parameter causes gpdiff to ignore any -differences in the column naming and format widths globally. - -=item B<-ignore_plans> - -Specify this option to ignore any explain plan diffs between the -input files. This will completely ignore any plan content in -the input files thus masking differences in plans between the input files. - -For example, for the following plan: -explain select i from foo where i > 10; - QUERY PLAN ------------------------------------------------------------------------------ - Gather Motion 2:1 (slice1; segments: 2) (cost=0.00..2.72 rows=45 width=4) - -> Table Scan on foo (cost=0.00..1.55 rows=45 width=4) - Filter: i > 10 - Settings: optimizer=on -(4 rows) - -atmsort.pl -ignore_plans will reduce this to: - -explain select i from foo where i > 10; -QUERY PLAN -___________ -GP_IGNORE:{ -GP_IGNORE: 'child' => [ -GP_IGNORE: { -GP_IGNORE: 'id' => 2, -GP_IGNORE: 'parent' => 1, -GP_IGNORE: 'short' => 'Table Scan on foo' -GP_IGNORE: } -GP_IGNORE: ], -GP_IGNORE: 'id' => 1, -GP_IGNORE: 'short' => 'Gather Motion' -GP_IGNORE:} -GP_IGNORE:(4 rows) - - -=item B<-init> - -Specify an initialization file containing a series of directives -(mainly for match_subs) that get applied to the input files. To -specify multiple initialization files, use multiple init arguments, -eg: - - -init file1 -init file2 - - -=item B<-do_equiv> - - Choose one of the following options - -=over 12 - -=item ignore_all: - -(default) ignore all content in a start_equiv/end_equiv block. - -=item make: - -replace the query output of all queries in a start_equiv/end_equiv -block with the output of the first query, processed according to any -formatting directives for each query. - - -=item compare: - -process the query output of all queries in a start_equiv/end_equiv -block (versus prefixing the entire block with GP_IGNORE). - -=back - -=back - -=head1 DESCRIPTION - -atmsort reads sql log files from STDIN and sorts the query output for -all SELECT statements that do *not* have an ORDER BY, writing the -result to STDOUT. This change to the log facilitates diff comparison, -since unORDERed query output does not have a guaranteed order. Note -that for diff to work correctly, statements that do use ORDER BY must -have a fully-specified order. The utility gpdiff.pl invokes atmsort -in order to compare the Greenplum test results against standard -Postgresql. - -The log content must look something like: - - SELECT a, b, c, d - from foo - ORDER BY 1,2,3,4; - a | b | c | d - ------------+-----------------+-----------+--------------- - 1 | 1 | 1 | 1 - 1 | 1 | 1 | 2 - 3 | 2 | 2 | 5 - (3 rows) - -The log file must contain SELECT statements, followed by the query -output in the standard Postgresql format, ie a set of named columns, a -separator line constructed of dashes and plus signs, and the rows, -followed by an "(N rows)" row count. The SELECT statement must be -unambiguous, eg no embedded SQL keywords like INSERT, UPDATE, or -DELETE, and it must be terminated with a semicolon. Normally, the -query output is sorted, but if the statement contains an ORDER BY -clause the query output for that query is not sorted. - -=head2 EXPLAIN PLAN - -atmsort can also use explain.pl to process EXPLAIN and EXPLAIN ANALYZE -output in a configuration-independent way. It strips out all timing, -segment, and slice information, reducing the plan to a simple nested -perl structure. For example, for the following plan: - -explain analyze select * from customer; - - QUERY PLAN ------------------------------------------------------------------------- - Gather Motion 2:1 (slice1) (cost=0.00..698.88 rows=25088 width=550) - Rows out: 150000 rows at destination with 0.230 ms to first row, - 386 ms to end, start offset by 8.254 ms. - -> Seq Scan on customer (cost=0.00..698.88 rows=25088 width=550) - Rows out: Avg 75000.0 rows x 2 workers. Max 75001 rows (seg0) - with 0.056 ms to first row, 26 ms to end, start offset by 7.332 ms. - Slice statistics: - (slice0) Executor memory: 186K bytes. - (slice1) Executor memory: 130K bytes avg x 2 workers, - 130K bytes max (seg0). - Total runtime: 413.401 ms -(8 rows) - -atmsort reduces the plan to: - - QUERY PLAN ------------------------------------------------------------------------- -{ - 'child' => [ - { - 'id' => 2, - 'parent' => 1, - 'short' => 'Seq Scan on customer' - } - ], - 'id' => 1, - 'short' => 'Gather Motion' - } -(8 rows) - - -=head2 Advanced Usage - -atmsort supports several "commands" that allow finer-grained control -over the comparison process for SELECT queries. These commands are -specified in comments in the following form: - - -- - -- order 1 - -- - SELECT a, b, c, d - from foo - ORDER BY 1; - -or - - SELECT a, b, c, d - from foo - ORDER BY 1; -- order 1 - -The supported commands are: - -=over 12 - -=item -- order column number[, column number...] - - The order directive is used to compare - "partially-ordered" query - output. The specified columns are assumed - to be ordered, and the remaining columns are - sorted to allow for deterministic comparison. - -=item -- ignore - -The ignore directive prefixes the SELECT output with GP_IGNORE. The -diff command can use the -I flag to ignore lines with this prefix. - -=item -- mvd colnum[, colnum...] -> colnum[, colnum...] [; ] - -mvd is designed to support Multi-Value Dependencies for OLAP queries. -The syntax "col1,col2->col3,col4" indicates that the col1 and col2 -values determine the col3, col4 result order. - -=item -- start_ignore - -Ignore all results until the next "end_ignore" directive. The -start_ignore directive prefixes all subsequent output with GP_IGNORE, -and all other formatting directives are ignored as well. The diff -command can use the -I flag to ignore lines with this prefix. - -=item -- end_ignore - - Ends the ignored region that started with "start_ignore" - -=item -- start_headers_ignore - -Similar to the command-line "ignore_headers", ignore differences in -column naming and format widths. - -=item -- end_headers_ignore - - Ends the "headers ignored" region that started with "start_headers_ignore" - -=item -- start_equiv - -Begin an "equivalent" region, and treat contents according to the -specified --do_equiv option. Normally, the results are ignored. The -"--do_equiv=make" option replaces the contents of all queries in the -equivalent region with the results of the first query. If -"--do_equiv=compare" option is specified, the region is processed -according to the standard query formatting rules. - -=item -- end_equiv - - Ends the equivalent region that started with "start_equiv" - -=item -- copy_stdout - -Marks the start of a "copy" command that writes to stdout. In this -case, the directive must be on the same line as the command, e.g: - - copy mystuff to stdout ; -- copy_stdout - -=item -- start_matchsubs - -Starts a list of match/substitution expressions, where the match and -substitution are specified as perl "m" and "s" operators for a single -line of input. atmsort will compile the expressions and use them to -process the current input file. The format is: - - -- start_matchsubs - -- - -- # first, a match expression - -- m/match this/ - -- # next, a substitute expression - -- s/match this/substitute this/ - -- - -- # and can have more matchsubs after this... - -- - -- end_matchsubs - - Blank lines are ignored, and comments may be used if they are - prefixed with "#", the perl comment character, eg: - - -- # this is a comment - - Multiple match and substitute pairs may be specified. See "man - perlre" for more information on perl regular expressions. - -=item -- end_matchsubs - - Ends the match/substitution region that started with "start_matchsubs" - -=item -- start_matchignore - -Similar to matchsubs, starts a list of match/ignore expressions as a -set of perl match operators. Each line that matches one of the -specified expressions is elided from the atmsort output. Note that -there isn't an "ignore" expression -- just a list of individual match -operators. - -=item -- end_matchignore - - Ends the match/ignore region that started with "start_matchignore" - -=item -- force_explain - -Normally, atmsort can detect that a SQL query is being EXPLAINed, and -the expain processing will happen automatically. However, if the -query is complex, you may need to tag it with a comment to force the -explain. Using this command for non-EXPLAIN statements is -inadvisable. - -=back - -Note that you can combine the directives for a single query, but each -directive must be on a separate line. Multiple mvd specifications -must be on a single mvd line, separated by semicolons. Note that -start_ignore overrides all directives until the next end_ignore. - -=head1 CAVEATS/LIMITATIONS - -atmsort cannot handle "unsorted" SELECT queries where the output has -strings with embedded newlines or pipe ("|") characters due to -limitations with the parser in the "tablelizer" function. Queries -with these characteristics must have an ORDER BY clause to avoid -potential erroneous comparison. - -=head1 AUTHORS - -Jeffrey I Cohen - -Copyright (c) 2007, 2008, 2009 GreenPlum. All rights reserved. - -Address bug reports and comments to: jcohen@greenplum.com - - -=cut - -my $glob_id = ""; - -# optional set of prefixes to identify sql statements, query output, -# and sorted lines (for testing purposes) -#my $apref = 'a: '; -#my $bpref = 'b: '; -#my $cpref = 'c: '; -#my $dpref = 'S: '; -my $apref = ''; -my $bpref = ''; -my $cpref = ''; -my $dpref = ''; - -my $glob_compare_equiv; -my $glob_make_equiv_expected; -my $glob_ignore_headers; -my $glob_ignore_plans; -my $glob_ignore_whitespace; -my $glob_init; - -my $glob_orderwarn; -my $glob_verbose; -my $glob_fqo; - -# array of "expected" rows from first query of equiv region -my $equiv_expected_rows; - -BEGIN -{ - $glob_compare_equiv = 0; - $glob_make_equiv_expected = 0; - $glob_ignore_headers = 0; - $glob_ignore_plans = 0; - $glob_ignore_whitespace = 0; - $glob_init = []; - - $glob_orderwarn = 0; - $glob_verbose = 0; - $glob_fqo = {count => 0}; -} - -BEGIN { - my $man = 0; - my $help = 0; - my $compare_equiv = 0; - my $make_equiv_expected = 0; - my $do_equiv; - my $ignore_headers; - my $ignore_plans; - my @init_file; - my $verbose; - my $orderwarn; - - GetOptions( - 'help|?' => \$help, man => \$man, - 'gpd_ignore_headers|gp_ignore_headers|ignore_headers' => \$ignore_headers, - 'gpd_ignore_plans|gp_ignore_plans|ignore_plans' => \$ignore_plans, - 'gpd_init|gp_init|init:s' => \@init_file, - 'do_equiv:s' => \$do_equiv, - 'order_warn|orderwarn' => \$orderwarn, - 'verbose' => \$verbose - ) - or pod2usage(2); - - if (defined($do_equiv)) - { - if ($do_equiv =~ m/^(ignore)/i) - { - # ignore all - default - } - elsif ($do_equiv =~ m/^(compare)/i) - { - # compare equiv region - $compare_equiv = 1; - } - elsif ($do_equiv =~ m/^(make)/i) - { - # make equiv expected output - $make_equiv_expected = 1; - } - else - { - $glob_id = "unknown do_equiv option: $do_equiv\nvalid options are:\n\tdo_equiv=compare\n\tdo_equiv=make"; - $help = 1; - } - - } - - pod2usage(-msg => $glob_id, -exitstatus => 1) if $help; - pod2usage(-msg => $glob_id, -exitstatus => 0, -verbose => 2) if $man; - - $glob_compare_equiv = $compare_equiv; - $glob_make_equiv_expected = $make_equiv_expected; - $glob_ignore_headers = $ignore_headers; - $glob_ignore_plans = $ignore_plans; - - $glob_ignore_whitespace = $ignore_headers; # XXX XXX: for now - - # ENGINF-200: allow multiple init files - push @{$glob_init}, @init_file; - - $glob_orderwarn = $orderwarn; - $glob_verbose = $verbose; - -} - -my $glob_match_then_sub_fnlist; - -sub _build_match_subs -{ - my ($here_matchsubs, $whomatch) = @_; - - my $stat = [1]; - - # filter out the comments and blank lines - $here_matchsubs =~ s/^\s*\#.*$//gm; - $here_matchsubs =~ s/^\s+$//gm; - -# print $here_matchsubs; - - # split up the document into separate lines - my @foo = split(/\n/, $here_matchsubs); - - my $ii = 0; - - my $matchsubs_arr = []; - my $msa; - - # build an array of arrays of match/subs pairs - while ($ii < scalar(@foo)) - { - my $lin = $foo[$ii]; - - if ($lin =~ m/^\s*$/) # skip blanks - { - $ii++; - next; - } - - if (defined($msa)) - { - push @{$msa}, $lin; - - push @{$matchsubs_arr}, $msa; - - undef $msa; - } - else - { - $msa = [$lin]; - } - $ii++; - next; - } # end while - -# print Data::Dumper->Dump($matchsubs_arr); - - my $bigdef; - - my $fn1; - - # build a lambda function for each expression, and load it into an - # array - my $mscount = 1; - - for my $defi (@{$matchsubs_arr}) - { - unless (2 == scalar(@{$defi})) - { - my $err1 = "bad definition: " . Data::Dumper->Dump([$defi]); - $stat->[0] = 1; - $stat->[1] = $err1; - return $stat; - } - - $bigdef = '$fn1 = sub { my $ini = shift; '. "\n"; - $bigdef .= 'if ($ini =~ ' . $defi->[0]; - $bigdef .= ') { ' . "\n"; -# $bigdef .= 'print "match\n";' . "\n"; - $bigdef .= '$ini =~ ' . $defi->[1]; - $bigdef .= '; }' . "\n"; - $bigdef .= 'return $ini; }' . "\n"; - -# print $bigdef; - - if (eval $bigdef) - { - my $cmt = $whomatch . " matchsubs \#" . $mscount; - $mscount++; - - # store the function pointer and the text of the function - # definition - push @{$glob_match_then_sub_fnlist}, - [$fn1, $bigdef, $cmt, $defi->[0], $defi->[1]]; - - if ($glob_verbose) - { - print "GP_IGNORE: Defined $cmt\t$defi->[0]\t$defi->[1]\n" - } - } - else - { - my $err1 = "bad eval: $bigdef"; - $stat->[0] = 1; - $stat->[1] = $err1; - return $stat; - } - - } - -# print Data::Dumper->Dump($glob_match_then_sub_fnlist); - - return $stat; - -} # end _build_match_subs - -# list of all the match/substitution expressions -BEGIN -{ - my $here_matchsubs; - - -# construct a "HERE" document of match expressions followed by -# substitution expressions. Embedded comments and blank lines are ok -# (they get filtered out). - - $here_matchsubs = << 'EOF_matchsubs'; - -# some cleanup of greenplum-specific messages -m/\s+(\W)?(\W)?\(seg.*pid.*\)/ -s/\s+(\W)?(\W)?\(seg.*pid.*\)// - -m/WARNING:\s+foreign key constraint \".*\" will require costly sequential scans/ -s/\".*\"/\"dummy key\"/ - -m/CONTEXT:.*\s+of this segment db input data/ -s/\s+of this segment db input data// - -# distributed transactions -m/(ERROR|WARNING|CONTEXT|NOTICE):.*gid\s+=\s+(\d+)/ -s/gid.*/gid DUMMY/ - -m/(ERROR|WARNING|CONTEXT|NOTICE):.*DTM error.*gathered (\d+) results from cmd.*/ -s/gathered.*results/gathered SOME_NUMBER_OF results/ - -# fix code locations eg "(xact.c:1458)" to "(xact.c:SOME_LINE)" -m/(ERROR|WARNING|CONTEXT|NOTICE):\s+Raise an error as directed by/ -s/\.c\:\d+\)/\.c\:SOME_LINE\)/ - -m/(DETAIL|ERROR|WARNING|CONTEXT|NOTICE):\s+Raise .* for debug_dtm_action\s*\=\s* \d+/ -s/\.c\:\d+\)/\.c\:SOME_LINE\)/ - -m/(ERROR|WARNING|CONTEXT|NOTICE):\s+Could not .* savepoint/ -s/\.c\:\d+\)/\.c\:SOME_LINE\)/ - -m/(ERROR|WARNING|CONTEXT|NOTICE):.*connection.*failed.*(http|gpfdist)/ -s/connection.*failed.*(http|gpfdist).*/connection failed dummy_protocol\:\/\/DUMMY_LOCATION/ - -# the EOF ends the HERE document -EOF_matchsubs - - $glob_match_then_sub_fnlist = []; - - my $stat = _build_match_subs($here_matchsubs, "DEFAULT"); - - if (scalar(@{$stat}) > 1) - { - die $stat->[1]; - } - -} - -sub match_then_subs -{ - my $ini = shift; - - for my $ff (@{$glob_match_then_sub_fnlist}) - { - - # get the function and execute it - my $fn1 = $ff->[0]; - if (!$glob_verbose) - { - $ini = &$fn1($ini); - } - else - { - my $subs = &$fn1($ini); - - unless ($subs eq $ini) - { - print "GP_IGNORE: was: $ini"; - print "GP_IGNORE: matched $ff->[-3]\t$ff->[-2]\t$ff->[-1]\n" - } - - $ini = &$fn1($ini); - } - - } - return $ini; -} - -my $glob_match_then_ignore_fnlist; - -sub _build_match_ignores -{ - my ($here_matchignores, $whomatch) = @_; - - my $stat = [1]; - - # filter out the comments and blank lines - $here_matchignores =~ s/^\s*\#.*$//gm; - $here_matchignores =~ s/^\s+$//gm; - -# print $here_matchignores; - - # split up the document into separate lines - my @foo = split(/\n/, $here_matchignores); - - my $matchignores_arr = []; - - # build an array of match expressions - for my $lin (@foo) - { - next - if ($lin =~ m/^\s*$/); # skip blanks - - push @{$matchignores_arr}, $lin; - } - -# print Data::Dumper->Dump($matchignores_arr); - - my $bigdef; - - my $fn1; - - # build a lambda function for each expression, and load it into an - # array - my $mscount = 1; - - for my $defi (@{$matchignores_arr}) - { - $bigdef = '$fn1 = sub { my $ini = shift; '. "\n"; - $bigdef .= 'return ($ini =~ ' . $defi; - $bigdef .= ') ; } ' . "\n"; -# print $bigdef; - - if (eval $bigdef) - { - my $cmt = $whomatch . " matchignores \#" . $mscount; - $mscount++; - - # store the function pointer and the text of the function - # definition - push @{$glob_match_then_ignore_fnlist}, - [$fn1, $bigdef, $cmt, $defi, "(ignore)"]; - if ($glob_verbose) - { - print "GP_IGNORE: Defined $cmt\t$defi\n" - } - - } - else - { - my $err1 = "bad eval: $bigdef"; - $stat->[0] = 1; - $stat->[1] = $err1; - return $stat; - } - - } - -# print Data::Dumper->Dump($glob_match_then_ignore_fnlist); - - return $stat; - -} # end _build_match_ignores - -# list of all the match/ignore expressions -BEGIN -{ - my $here_matchignores; - -# construct a "HERE" document of match expressions to ignore in input. -# Embedded comments and blank lines are ok (they get filtered out). - - $here_matchignores = << 'EOF_matchignores'; - - # XXX XXX: note the discrepancy in the NOTICE messages - # 'distributed by' vs 'DISTRIBUTED BY' -m/NOTICE:\s+Table doesn\'t have \'distributed by\' clause\, and no column type is suitable/i - -m/NOTICE:\s+Table doesn\'t have \'DISTRIBUTED BY\' clause/i - -m/NOTICE:\s+Dropping a column that is part of the distribution policy/ - -m/NOTICE:\s+Table has parent\, setting distribution columns to match parent table/ - -m/HINT:\s+The \'DISTRIBUTED BY\' clause determines the distribution of data/ - -m/WARNING:\s+Referential integrity \(.*\) constraints are not supported in Greenplum Database/ - - -m/^\s*Distributed by:\s+\(.*\)\s*$/ - - # ignore notices for DROP sqlobject IF EXISTS "objectname" - # eg NOTICE: table "foo" does not exist, skipping - # - # the NOTICE is different from the ERROR case, which does not - # end with "skipping" -m/^NOTICE:\s+\w+\s+\".*\"\s+does not exist\,\s+skipping\s*$/ - - -# the EOF ends the HERE document -EOF_matchignores - - $glob_match_then_ignore_fnlist = []; - - my $stat = _build_match_ignores($here_matchignores, "DEFAULT"); - - if (scalar(@{$stat}) > 1) - { - die $stat->[1]; - } - -} - -# if the input matches, return 1 (ignore), else return 0 (keep) -sub match_then_ignore -{ - my $ini = shift; - - for my $ff (@{$glob_match_then_ignore_fnlist}) - { - # get the function and execute it - my $fn1 = $ff->[0]; - - if (&$fn1($ini)) - { - if ($glob_verbose) - { - print "GP_IGNORE: matched $ff->[-3]\t$ff->[-2]\t$ff->[-1]\n" - } - return 1; # matched - } - } - return 0; # no match -} - -# convert a postgresql psql formatted table into an array of hashes -sub tablelizer -{ - my ($ini, $got_line1) = @_; - - # first, split into separate lines, the find all the column headings - - my @lines = split(/\n/, $ini); - - return undef - unless (scalar(@lines)); - - # if the first line is supplied, then it has the column headers, - # so don't try to find them (or the ---+---- separator) in - # "lines" - my $line1 = $got_line1; - $line1 = shift @lines - unless (defined($got_line1)); - - # look for | - my @colheads = split(/\s+\|\s+/, $line1); - - # fixup first, last column head (remove leading,trailing spaces) - - $colheads[0] =~ s/^\s+//; - $colheads[0] =~ s/\s+$//; - $colheads[-1] =~ s/^\s+//; - $colheads[-1] =~ s/\s+$//; - - return undef - unless (scalar(@lines)); - - shift @lines # skip dashed separator (unless it was skipped already) - unless (defined($got_line1)); - - my @rows; - - for my $lin (@lines) - { - my @cols = split(/\|/, $lin, scalar(@colheads)); - last - unless (scalar(@cols) == scalar(@colheads)); - - my $rowh = {}; - - for my $colhdcnt (0..(scalar(@colheads)-1)) - { - my $rawcol = shift @cols; - - $rawcol =~ s/^\s+//; - $rawcol =~ s/\s+$//; - - my $colhd = $colheads[$colhdcnt]; - $rowh->{($colhdcnt+1)} = $rawcol; - } - push @rows, $rowh; - } - - return \@rows; -} -# reformat the EXPLAIN output according to the directive hash -sub format_explain -{ - my ($outarr, $directive) = @_; - my $prefix = ""; - my $xopt = "perl"; # normal case - my $psuffix = ""; - - $directive = {} unless (defined($directive)); - - # Ignore plan content if its between start_ignore and end_ignore blocks - # or if -ignore_plans is specified. - $prefix = "GP_IGNORE:" - if (exists($directive->{ignore})) || ($glob_ignore_plans); - - { - use IO::File; - use POSIX qw(tmpnam); - - my ($tmpnam, $tmpfh); - - for (;;) { - $tmpnam = tmpnam(); - sysopen($tmpfh, $tmpnam, O_RDWR | O_CREAT | O_EXCL) && last; - } - - if (scalar(@{$outarr})) - { - print $tmpfh "QUERY PLAN\n"; - # explain.pl expects a long string of dashes - print $tmpfh "-" x 71, "\n"; - for my $lin (@{$outarr}) - { - print $tmpfh $lin; - } - print $tmpfh "(111 rows)\n"; - } - - close $tmpfh; - - if (exists($directive->{explain}) - && ($directive->{explain} =~ m/operator/i)) - { - $xopt = "operator"; - $psuffix = " | sort "; - } - - my $plantxt = "explain.pl -opt $xopt -prune heavily < $tmpnam $psuffix"; - - - my $xplan = `$plantxt`; - - - unlink $tmpnam; - - if (defined($prefix) && length($prefix)) - { - $xplan =~ s/^/$prefix/gm; - } - - - print $xplan; - - # for "force_explain operator", replace the outarr with the - # processed output (for equivalence regions ) - if (scalar(@{$outarr}) - && exists($directive->{explain}) - && ($directive->{explain} =~ m/operator/i)) - { - my @foo = split (/\n/, $xplan); - - # gross -- need to add the carriage return back! - for my $ii (0..(scalar(@foo)-1)) - { - $foo[$ii] .= "\n"; - } - - return \@foo; - } - - } -} - -# reformat the query output according to the directive hash -sub format_query_output -{ - my ($fqostate, $has_order, $outarr, $directive) = @_; - my $prefix = ""; - - $directive = {} unless (defined($directive)); - - $fqostate->{count} += 1; - - if ($glob_verbose) - { - print "GP_IGNORE: start fqo $fqostate->{count}\n"; - } - - if (exists($directive->{make_equiv_expected})) - { - # special case for EXPLAIN PLAN as first "query" - if (exists($directive->{explain})) - { - my $stat = format_explain($outarr, $directive); - - # save the first query output from equiv as "expected rows" - - if ($stat) - { - push @{$equiv_expected_rows}, @{$stat}; - } - else - { - push @{$equiv_expected_rows}, @{$outarr}; - } - - if ($glob_verbose) - { - print "GP_IGNORE: end fqo $fqostate->{count}\n"; - } - - return ; - - } - - # save the first query output from equiv as "expected rows" - push @{$equiv_expected_rows}, @{$outarr}; - } - elsif (defined($equiv_expected_rows) - && scalar(@{$equiv_expected_rows})) - { - # reuse equiv expected rows if you have them - $outarr = []; - push @{$outarr}, @{$equiv_expected_rows}; - } - - # explain (if not in an equivalence region) - if (exists($directive->{explain})) - { - format_explain($outarr, $directive); - if ($glob_verbose) - { - print "GP_IGNORE: end fqo $fqostate->{count}\n"; - } - return; - } - - $prefix = "GP_IGNORE:" - if (exists($directive->{ignore})); - - if (exists($directive->{sortlines})) - { - my $firstline = $directive->{firstline}; - my $ordercols = $directive->{order}; - my $mvdlist = $directive->{mvd}; - - # lines already have newline terminator, so just rejoin them. - my $lines = join ("", @{$outarr}); - - my $ah1 = tablelizer($lines, $firstline); - - unless (defined($ah1) && scalar(@{$ah1})) - { -# print "No tablelizer hash for $lines, $firstline\n"; -# print STDERR "No tablelizer hash for $lines, $firstline\n"; - - if ($glob_verbose) - { - print "GP_IGNORE: end fqo $fqostate->{count}\n"; - } - - return; - } - - my @allcols = sort (keys(%{$ah1->[0]})); - - my @presortcols; - if (defined($ordercols) && length($ordercols)) - { -# $ordercols =~ s/^.*order\s*//; - $ordercols =~ s/\n//gm; - $ordercols =~ s/\s//gm; - - @presortcols = split(/\s*\,\s*/, $ordercols); - } - - my @mvdcols; - my @mvd_deps; - my @mvd_nodeps; - my @mvdspec; - if (defined($mvdlist) && length($mvdlist)) - { - $mvdlist =~ s/\n//gm; - $mvdlist =~ s/\s//gm; - - # find all the mvd specifications (separated by semicolons) - my @allspecs = split(/\;/, $mvdlist); - -# print "allspecs:", Data::Dumper->Dump(\@allspecs); - - for my $item (@allspecs) - { - my $realspec; - # split the specification list, separating the - # specification columns on the left hand side (LHS) - # from the "dependent" columns on the right hand side (RHS) - my @colset = split(/\-\>/, $item, 2); - unless (scalar(@colset) == 2) - { - print "invalid colset for $item\n"; - print STDERR "invalid colset for $item\n"; - next; - } - # specification columns (LHS) - my @scols = split(/\,/, $colset[0]); - unless (scalar(@scols)) - { - print "invalid dependency specification: $colset[0]\n"; - print STDERR - "invalid dependency specification: $colset[0]\n"; - next; - } - # dependent columns (RHS) - my @dcols = split(/\,/, $colset[1]); - unless (scalar(@dcols)) - { - print "invalid specified dependency: $colset[1]\n"; - print STDERR "invalid specified dependency: $colset[1]\n"; - next; - } - $realspec = {}; - my $scol2 = []; - my $dcol2 = []; - my $sdcol = []; - $realspec->{spec} = $item; - push @{$scol2}, @scols; - push @{$dcol2}, @dcols; - push @{$sdcol}, @scols, @dcols; - $realspec->{scol} = $scol2; - $realspec->{dcol} = $dcol2; - $realspec->{allcol} = $sdcol; - - push @mvdcols, @scols, @dcols; - # find all the dependent columns - push @mvd_deps, @dcols; - push @mvdspec, $realspec; - } - - # find all the mvd cols which are *not* dependent. Need - # to handle the case of self-dependency, eg "mvd 1->1", so - # must build set of all columns, then strip out the - # "dependent" cols. So this is the set of all LHS columns - # which are never on the RHS. - my %get_nodeps; - - for my $col (@mvdcols) - { - $get_nodeps{$col} = 1; - } - - # remove dependent cols - for my $col (@mvd_deps) - { - if (exists($get_nodeps{$col})) - { - delete $get_nodeps{$col}; - } - } - # now sorted and unique, with no dependents - @mvd_nodeps = sort (keys(%get_nodeps)); -# print "mvdspec:", Data::Dumper->Dump(\@mvdspec); -# print "mvd no deps:", Data::Dumper->Dump(\@mvd_nodeps); - } - - my %unsorth; - - for my $col (@allcols) - { - $unsorth{$col} = 1; - } - - # clear sorted column list if just "order 0" - if ((1 == scalar(@presortcols)) - && ($presortcols[0] eq "0")) - { - @presortcols = (); - } - - - for my $col (@presortcols) - { - if (exists($unsorth{$col})) - { - delete $unsorth{$col}; - } - } - for my $col (@mvdcols) - { - if (exists($unsorth{$col})) - { - delete $unsorth{$col}; - } - } - my @unsortcols = sort(keys(%unsorth)); - -# print Data::Dumper->Dump([$ah1]); - - if (scalar(@presortcols)) - { - my $hd1 = "sorted columns " . join(", ", @presortcols); - - print $hd1, "\n", "-"x(length($hd1)), "\n"; - - for my $h_row (@{$ah1}) - { - my @collist; - - @collist = (); - -# print "hrow:",Data::Dumper->Dump([$h_row]), "\n"; - - for my $col (@presortcols) - { -# print "col: ($col)\n"; - if (exists($h_row->{$col})) - { - push @collist, $h_row->{$col}; - } - else - { - my $maxcol = scalar(@allcols); - my $errstr = - "specified ORDER column out of range: $col vs $maxcol\n"; - print $errstr; - print STDERR $errstr; - last; - } - } - print $prefix, join(' | ', @collist), "\n"; - } - } - - if (scalar(@mvdspec)) - { - my @outi; - - my $hd1 = "multivalue dependency specifications"; - - print $hd1, "\n", "-"x(length($hd1)), "\n"; - - for my $mspec (@mvdspec) - { - $hd1 = $mspec->{spec}; - print $hd1, "\n", "-"x(length($hd1)), "\n"; - - for my $h_row (@{$ah1}) - { - my @collist; - - @collist = (); - -# print "hrow:",Data::Dumper->Dump([$h_row]), "\n"; - - for my $col (@{$mspec->{allcol}}) - { -# print "col: ($col)\n"; - if (exists($h_row->{$col})) - { - push @collist, $h_row->{$col}; - } - else - { - my $maxcol = scalar(@allcols); - my $errstr = - "specified MVD column out of range: $col vs $maxcol\n"; - print $errstr; - print STDERR $errstr; - last; - } - - } - push @outi, join(' | ', @collist); - } - my @ggg= sort @outi; - - for my $line (@ggg) - { - print $prefix, $line, "\n"; - } - @outi = (); - } - } - my $hd2 = "unsorted columns " . join(", ", @unsortcols); - - # the "unsorted" comparison must include all columns which are - # not sorted or part of an mvd specification, plus the sorted - # columns, plus the non-dependent mvd columns which aren't - # already in the list - if ((scalar(@presortcols)) - || scalar(@mvd_nodeps)) - { - if (scalar(@presortcols)) - { - if (scalar(@mvd_deps)) - { - my %get_presort; - - for my $col (@presortcols) - { - $get_presort{$col} = 1; - } - # remove "dependent" (RHS) columns - for my $col (@mvd_deps) - { - if (exists($get_presort{$col})) - { - delete $get_presort{$col}; - } - } - # now sorted and unique, minus all mvd dependent cols - @presortcols = sort (keys(%get_presort)); - - } - - if (scalar(@presortcols)) - { - $hd2 .= " ( " . join(", ", @presortcols) . ")"; - # have to compare all columns as unsorted - push @unsortcols, @presortcols; - } - } - if (scalar(@mvd_nodeps)) - { - my %get_nodeps; - - for my $col (@mvd_nodeps) - { - $get_nodeps{$col} = 1; - } - # remove "nodeps" which are already in the output list - for my $col (@unsortcols) - { - if (exists($get_nodeps{$col})) - { - delete $get_nodeps{$col}; - } - } - # now sorted and unique, minus all unsorted/sorted cols - @mvd_nodeps = sort (keys(%get_nodeps)); - if (scalar(@mvd_nodeps)) - { - $hd2 .= " (( " . join(", ", @mvd_nodeps) . "))"; - # have to compare all columns as unsorted - push @unsortcols, @mvd_nodeps; - } - - } - - } - - print $hd2, "\n", "-"x(length($hd2)), "\n"; - - my @finalunsort; - - if (scalar(@unsortcols)) - { - for my $h_row (@{$ah1}) - { - my @collist; - - @collist = (); - - for my $col (@unsortcols) - { - if (exists($h_row->{$col})) - { - push @collist, $h_row->{$col}; - } - else - { - my $maxcol = scalar(@allcols); - my $errstr = - "specified UNSORT column out of range: $col vs $maxcol\n"; - print $errstr; - print STDERR $errstr; - last; - } - - } - push @finalunsort, join(' | ', @collist); - } - my @ggg= sort @finalunsort; - - for my $line (@ggg) - { - print $prefix, $line, "\n"; - } - } - - if ($glob_verbose) - { - print "GP_IGNORE: end fqo $fqostate->{count}\n"; - } - - return; - } # end order - - - if ($has_order) - { - my @ggg= @{$outarr}; - - if ($glob_ignore_whitespace) - { - my @ggg2; - - for my $line (@ggg) - { - # remove all leading, trailing whitespace (changes sorting) - # and whitespace around column separators - $line =~ s/^\s+//; - $line =~ s/\s+$//; - $line =~ s/\|\s+/\|/gm; - $line =~ s/\s+\|/\|/gm; - - $line .= "\n" # replace linefeed if necessary - unless ($line =~ m/\n$/); - - push @ggg2, $line; - } - @ggg= @ggg2; - } - - if ($glob_orderwarn) - { - # if no ordering cols specified (no directive), and - # SELECT has ORDER BY, see if number of order - # by cols matches all cols in selected lists - if (exists($directive->{sql_statement}) - && (defined($directive->{sql_statement})) - && ($directive->{sql_statement} =~ m/select.*order.*by/is)) - { - my $fl2 = $directive->{firstline}; - my $sql_statement = $directive->{sql_statement}; - $sql_statement =~ s/\n/ /gm; - my @ocols = - ($sql_statement =~ m/select.*order.*by\s+(.*)\;/is); - -# print Data::Dumper->Dump(\@ocols); - - # lines already have newline terminator, so just rejoin them. - my $line2 = join ("", @{$outarr}); - - my $ah2 = tablelizer($line2, $fl2); - my @allcols2; - -# print Data::Dumper->Dump([$ah2]); - - @allcols2 = (keys(%{$ah2->[0]})) - if (defined($ah2) && scalar(@{$ah2})); - - # treat the order by cols as a column separated list, - # and count them. works ok for simple ORDER BY clauses - if (scalar(@ocols)) - { - my $ocolstr = shift @ocols; - my @ocols2 = split (/\,/, $ocolstr); - - if (scalar(@ocols2) < scalar(@allcols2)) - { - print "GP_IGNORE: ORDER_WARNING: OUTPUT ", - scalar(@allcols2), " columns, but ORDER BY on ", - scalar(@ocols2), " \n"; - } - } - } - } # end if $glob_orderwarn - - for my $line (@ggg) - { - print $dpref, $prefix, $line; - } - } - else - { - my @ggg= sort @{$outarr}; - - if ($glob_ignore_whitespace) - { - my @ggg2; - - for my $line (@ggg) - { - # remove all leading, trailing whitespace (changes sorting) - # and whitespace around column separators - $line =~ s/^\s+//; - $line =~ s/\s+$//; - $line =~ s/\|\s+/\|/gm; - $line =~ s/\s+\|/\|/gm; - - $line .= "\n" # replace linefeed if necessary - unless ($line =~ m/\n$/); - - push @ggg2, $line; - } - @ggg= sort @ggg2; - } - for my $line (@ggg) - { - print $bpref, $prefix, $line; - } - } - - if ($glob_verbose) - { - print "GP_IGNORE: end fqo $fqostate->{count}\n"; - } -} - - -sub bigloop -{ - my $sql_statement = ""; - my @outarr; - - my $getrows = 0; - my $getstatement = 0; - my $has_order = 0; - my $copy_select = 0; - my $directive = {}; - my $big_ignore = 0; - my $define_match_expression = undef; - my $error_detail_exttab_trifecta_skip = 0; # don't ask! - my $verzion = "unknown"; - - if (q$Revision: #1 $ =~ /\d+/) - { - $verzion = do { my @r = (q$Revision: #1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; # must be all one line, for MakeMaker - } - my $format_fix = << "EOF_formatfix"; - ))} -EOF_formatfix - # NOTE: define $format_fix with HERE document just to fix emacs - # indenting due to comment char in Q expression... - - $verzion = $0 . " version " . $verzion; - print "GP_IGNORE: formatted by $verzion\n"; - - my $do_equiv = $glob_compare_equiv || $glob_make_equiv_expected; - - L_bigwhile: - while (<>) # big while - { - my $ini = $_; - - if ($error_detail_exttab_trifecta_skip) - { - $error_detail_exttab_trifecta_skip = 0; - next; - } - - # look for match/substitution or match/ignore expressions - if (defined($define_match_expression)) - { - unless (($ini =~ m/\-\-\s*end\_match(subs|ignore)\s*$/i)) - { - $define_match_expression .= $ini; - goto L_push_outarr; - } - - my @foo = split(/\n/, $define_match_expression, 2); - - unless (2 == scalar(@foo)) - { - $ini .= "GP_IGNORE: bad match definition\n"; - undef $define_match_expression; - goto L_push_outarr; - } - - my $stat; - - my $doc1 = $foo[1]; - - # strip off leading comment characters - $doc1 =~ s/^\s*\-\-//gm; - - if ($foo[0] =~ m/subs/) - { - $stat = _build_match_subs($doc1, "USER"); - } - else - { - $stat = _build_match_ignores($doc1, "USER"); - } - - if (scalar(@{$stat}) > 1) - { - my $outi = $stat->[1]; - - # print a message showing the error - $outi =~ s/^(.*)/GP_IGNORE: ($1)/gm; - $ini .= $outi; - } - else - { - $ini .= "GP_IGNORE: defined new match expression\n"; - } - - undef $define_match_expression; - goto L_push_outarr; - } # end defined match expression - - if ($big_ignore > 0) - { - if (($ini =~ m/\-\-\s*end\_equiv\s*$/i) && !($do_equiv)) - { - $big_ignore -= 1; - } - if ($ini =~ m/\-\-\s*end\_ignore\s*$/i) - { - $big_ignore -= 1; - } - print "GP_IGNORE:", $ini; - next; - } - elsif (($ini =~ m/\-\-\s*end\_equiv\s*$/i) && $do_equiv) - { - $equiv_expected_rows = undef; - } - - if ($ini =~ m/\-\-\s*end\_head(er|ers|ing|ings)\_ignore\s*$/i) - { - $glob_ignore_headers = 0; - } - - if ($getrows) # getting rows from SELECT output - { - # special case for copy select - if ($copy_select && - ($ini =~ m/(\-\-)|(ERROR)/)) - { - my @ggg= sort @outarr; - for my $line (@ggg) - { - print $bpref, $line; - } - - @outarr = (); - $getrows = 0; - $has_order = 0; - $copy_select = 0; - next; - } - - - # regex example: (5 rows) - if ($ini =~ m/^\s*\(\d+\s+row(s)*\)\s*$/) - { - - - format_query_output($glob_fqo, - $has_order, \@outarr, $directive); - - - # Always ignore the rowcount for explain plan out as the skeleton plans might be the - # same even if the row counts differ because of session level GUCs. - if (exists($directive->{explain})) - { - $ini = "GP_IGNORE:" . $ini; - } - - $directive = {}; - @outarr = (); - $getrows = 0; - $has_order = 0; - } - } - else # finding SQL statement or start of SELECT output - { - if (($ini =~ m/\-\-\s*start\_match(subs|ignore)\s*$/i)) - { - $define_match_expression = $ini; - goto L_push_outarr; - } - if (($ini =~ m/\-\-\s*start\_ignore\s*$/i) || - (($ini =~ m/\-\-\s*start\_equiv\s*$/i) && !($do_equiv))) - { - $big_ignore += 1; - - for my $line (@outarr) - { - print $apref, $line; - } - @outarr = (); - - print "GP_IGNORE:", $ini; - next; - } - elsif (($ini =~ m/\-\-\s*start\_equiv\s*$/i) && - $glob_make_equiv_expected) - { - $equiv_expected_rows = []; - $directive->{make_equiv_expected} = 1; - } - - if ($ini =~ m/\-\-\s*start\_head(er|ers|ing|ings)\_ignore\s*$/i) - { - $glob_ignore_headers = 1; - } - - # Note: \d is for the psql "describe" - if ($ini =~ m/(insert|update|delete|select|\\d|copy)/i) - { - $copy_select = 0; - $has_order = 0; - $sql_statement = ""; - - if ($ini =~ m/explain.*(insert|update|delete|select)/i) - { - $directive->{explain} = "normal"; - } - - } - - if ($ini =~ m/\-\-\s*force\_explain\s+operator.*$/i) - { - # ENGINF-137: force_explain - $directive->{explain} = "operator"; - } - if ($ini =~ m/\-\-\s*force\_explain\s*$/i) - { - # ENGINF-137: force_explain - $directive->{explain} = "normal"; - } - if ($ini =~ m/\-\-\s*ignore\s*$/i) - { - $directive->{ignore} = "ignore"; - } - if ($ini =~ m/\-\-\s*order\s+\d+.*$/i) - { - my $olist = $ini; - $olist =~ s/^.*\-\-\s*order//; - $directive->{order} = $olist; - } - if ($ini =~ m/\-\-\s*mvd\s+\d+.*$/i) - { - my $olist = $ini; - $olist =~ s/^.*\-\-\s*mvd//; - $directive->{mvd} = $olist; - } - - if ($ini =~ m/select/i) - { - $getstatement = 1; - } - if ($getstatement) - { - $sql_statement .= $ini; - } - if ($ini =~ m/\;/) # statement terminator - { - $getstatement = 0; - } - - # prune notices with segment info if they are duplicates -# if ($ini =~ m/^\s*(NOTICE|ERROR|HINT|DETAIL|WARNING)\:.*\s+\(seg.*pid.*\)/) - if ($ini =~ m/^\s*(NOTICE|ERROR|HINT|DETAIL|WARNING)\:/) - { - $ini =~ s/\s+(\W)?(\W)?\(seg.*pid.*\)//; - - # also remove line numbers from errors - $ini =~ s/\s+(\W)?(\W)?\(\w+\.[ch]:\d+\)/ (SOMEFILE:SOMEFUNC)/; - my $outsize = scalar(@outarr); - - my $lastguy = -1; - - L_checkfor: - for my $jj (1..$outsize) - { - my $checkstr = $outarr[$lastguy]; - - #remove trailing spaces for comparison - $checkstr =~ s/\s+$//; - - my $skinny = $ini; - $skinny =~ s/\s+$//; - - # stop when no more notices - last L_checkfor - if ($checkstr !~ m/^\s*(NOTICE|ERROR|HINT|DETAIL|WARNING)\:/); - - # discard this line if matches a previous notice - if ($skinny eq $checkstr) - { - if (0) # debug code - { - $ini = "DUP: " . $ini; - last L_checkfor; - } - next L_bigwhile; - } - $lastguy--; - } # end for - - - - } # end if pruning notices - - # MPP-1492 allow: - # copy (select ...) to stdout - # \copy (select ...) to stdout - # and special case these guys: - # copy test1 to stdout - # \copy test1 to stdout - # - # ENGINF-129: - # and "copy...; -- copy_stdout " for copy.out - my $copysel_regex = - '^\s*((.copy.*test1.*to stdout)|(copy.*test1.*to stdout\;)|(copy.*\;\s*\-\-\s*copy\_stdout))'; - - # regex example: ---- or ---+--- - # need at least 3 dashes to avoid confusion with "--" comments - if (($ini =~ m/^\s*((\-\-)(\-)+(\+(\-)+)*)+\s*$/) - # special case for copy select - || (($ini =~ m/$copysel_regex/i) - && ($ini !~ m/order\s+by/i))) - { # sort this region - - $directive->{firstline} = $outarr[-1]; - - if (exists($directive->{order}) || - exists($directive->{mvd})) - { - $directive->{sortlines} = $outarr[-1]; - } - - # special case for copy select - if ($ini =~ m/$copysel_regex/i) - { -# print "copy select: $ini\n"; - $copy_select = 1; - $sql_statement = ""; - } - # special case for explain - if (exists($directive->{explain}) && - ($ini =~ m/^\s*((\-\-)(\-)+(\+(\-)+)*)+\s*$/) && - ($outarr[-1] =~ m/QUERY PLAN/)) - { - # ENGINF-88: fixup explain headers - $outarr[-1] = "QUERY PLAN\n"; - $ini = ("_" x length($outarr[-1])) . "\n"; - - if ($glob_ignore_headers) - { - $ini = "GP_IGNORE:" . $ini; - } - } - - $getstatement = 0; - - # ENGINF-180: ignore header formatting - # the last line of the outarr is the first line of the header - if ($glob_ignore_headers && $outarr[-1]) - { - $outarr[-1] = "GP_IGNORE:" . $outarr[-1]; - } - - for my $line (@outarr) - { - print $apref, $line; - } - @outarr = (); - - # ENGINF-180: ignore header formatting - # the current line is the last line of the header - if ($glob_ignore_headers - && ($ini =~ m/^\s*((\-\-)(\-)+(\+(\-)+)*)+\s*$/)) - { - $ini = "GP_IGNORE:" . $ini; - } - - print $apref, $ini; - - if (defined($sql_statement) - && length($sql_statement) - # multiline match - && ($sql_statement =~ m/select.*order.*by/is)) - { - $has_order = 1; # so do *not* sort output - -# $sql_statement =~ s/\n/ /gm; -# print "has order: ", $sql_statement, "\n"; - $directive->{sql_statement} = $sql_statement; - } - else - { - $has_order = 0; # need to sort query output - -# $sql_statement =~ s/\n/ /gm; -# print "no order: ", $sql_statement, "\n"; - $directive->{sql_statement} = $sql_statement; - } - $sql_statement = ""; - - $getrows = 1; - next; - } # end sort this region - } # end finding SQL - - - # if MATCH then SUBSTITUTE - # see HERE document for definitions - $ini = match_then_subs($ini); - - if ($ini =~ m/External table .*line (\d)+/) - { - $ini =~ s/External table .*line (\d)+.*/External table DUMMY_EX, line DUMMY_LINE of DUMMY_LOCATION/; - $ini =~ s/\s+/ /; - # MPP-1557,AUTO-3: horrific ERROR DETAIL External Table trifecta - if ($glob_verbose) - { - print "GP_IGNORE: External Table ERROR DETAIL fixup\n"; - } - if ($ini !~ m/^DETAIL/) - { - # find a "blank" DETAIL tag preceding current line - if (scalar(@outarr) && ($outarr[-1] =~ m/^DETAIL:\s+$/)) - { - pop @outarr; - $ini = "DETAIL: " . $ini; - $ini =~ s/\s+/ /; - # need to skip the next record - $error_detail_exttab_trifecta_skip = 1; - } - } - if (scalar(@outarr) && - ($outarr[-1] =~ m/^ERROR:\s+missing\s+data\s+for\s+column/)) - { - $outarr[-1] = "ERROR: missing data for column DUMMY_COL\n"; - } - - } - - # if MATCH then IGNORE - # see HERE document for definitions - if ( match_then_ignore($ini)) - { - next; # ignore matching lines - } - -L_push_outarr: - - push @outarr, $ini; - - } # end big while - - for my $line (@outarr) - { - print $cpref, $line; - } -} # end bigloop - -if (1) -{ - goto l_big_bigloop - unless (defined($glob_init) && scalar(@{$glob_init})); - - # ENGINF-200: allow multiple init files - for my $init_file (@{$glob_init}) - { - die "no such file: $init_file" - unless (-e $init_file); - - # redirect stdin and stdout to perform initialization from - # the init_file file - - open my $oldout, ">&STDOUT" or die "Can't dup STDOUT: $!"; - - close STDOUT; - open (STDOUT, "> /dev/null" ) or die "can't open STDOUT: $!"; - - open my $oldin, "<&STDIN" or die "Can't dup STDIN: $!"; - close STDIN; - open STDIN, "< $init_file" or die "could not open $init_file: $!"; - - # run the standard loop - - bigloop(); - - # reset stdin, stdout - - close STDIN; - open STDIN, "<&", $oldin or die "Can't dup \$oldin: $!"; - - close STDOUT; - open STDOUT, ">&", $oldout or die "Can't dup \$oldout: $!"; - } - -l_big_bigloop: - # loop over existing stdin - bigloop(); -} - - -exit(); diff --git a/src/test/tinc/ext/explain.pl b/src/test/tinc/ext/explain.pl deleted file mode 100755 index a627e04ca3b6647f57103f1d36e231f49b3008a7..0000000000000000000000000000000000000000 --- a/src/test/tinc/ext/explain.pl +++ /dev/null @@ -1,2341 +0,0 @@ -#!/usr/bin/env perl -# -# $Header: //cdb2/main/cdb-pg/src/test/regress/explain.pl#21 $ -# -# copyright (c) 2006, 2007, 2008, 2009 -# Author: Jeffrey I Cohen -# -# -use Pod::Usage; -use Getopt::Long; -use Data::Dumper; -use strict; -use warnings; - -=head1 NAME - -B - parse and reformat Postgres EXPLAIN output - -=head1 SYNOPSIS - -B [options] filename - -Options: - - -help brief help message - -man full documentation - -option formatting option: perl, yaml, dot, query, jpg, json - -querylist list of queries - -direction direction of query plan graph: LR, RL, TB or BT. - -colorscheme graph color scheme - -timeline rank nodes by start offset time (experimental) - -prune prune tree attributes - -output output filename (else output to STDOUT). - -statcolor statistics coloring (experimental) - -edge edge decorations - -=head1 OPTIONS - -=over 8 - -=item B<-help> - - Print a brief help message and exits. - -=item B<-man> - - Prints the manual page and exits. - -=item B<-option> - - Choose the output format option. Several formats are supported. - -=over 12 - -=item perl: output in perl L format. - -=item yaml: output in L machine and human-readable format - -=item dot: output in dot graphical language for L graphing tool. - -=item querytext: output the text of the query - -=item jpg: pipe the dot output thru the dot formatter (if it is installed) to get jpg output directly. (May also support bmp, ps, pdf, png) - -=item json: output in L machine and human-readable format - -=back - - -=item B<-querylist> - - A list of queries to process. The query numbering is 1-based. Some - valid forms are: - - -querylist 1 - -querylist=2 - --ql=3,4,5 - --ql=6-9 - - or some combination. By default, all queries are processed. - - -=item B<-direction> - - Direction of data flow in query plan graph. Valid entries are: - -=over 12 - -=item BT (default): bottom to top - -=item TB: top to bottom - -=item LR: left to right - -=item RL: right to left - -=back - -=item B<-colorscheme> - - One of the supported ColorBrewer(TM) color schemes. Use - -color ? - to get a list of the valid schemes, and - -color dump - to output a dot file displaying all the valid schemes. - - Colors from www.ColorBrewer.org by Cynthia A. Brewer, Geography, - Pennsylvania State University. - -=item B<-prune> - - Prune tree attributes. The only supported option is "stats" to - remove the to_end and to_first timing information. - -=item B<-output> - - Output file name. If multiple queries are processed, the filename - is used as a template to generate multiple files. If the filename - has an extension, it is preserved, else an extension is supplied - based upon the formatting option. The filename template inserts - the query number before the "dot" (.) if more than one query was - processed. - -=item B<-statcolor> - - For an EXPLAIN ANALYZE plan, color according to the time spent in - node. Red is greatest, and blue is least. For statcolor=ts (default), - the node edge is colored by time, and the node interior is filled - by slice color. For statcolor=st, the color scheme is reversed. - For statcolor=t (timing only), the entire node is colored according - to the time spent. - -=item B<-edge> - - Decorate graph edges with row count if available. Valid entries are: - -=over 12 - -=item long - print average rows and number of workers - -=item medium - print average rows and number of workers compactly - -=item short - print total row counts - -=back - - -=back - -=head1 DESCRIPTION - -explain.pl reads EXPLAIN output from a text file (or standard -input) and formats it in several ways. The text file must contain -output in one of the the following formats. The first is a regular -EXPLAIN format, starting the the QUERY PLAN header and ending with the -number of rows in parentheses. Indenting must be on: - - QUERY PLAN - ---------------------------------------------------------------------------------------------------------- - Gather Motion 64:1 (slice2) (cost=6007722.78..6007722.79 rows=6 width=51) - Merge Key: partial_aggregation.l_returnflag, partial_aggregation.l_linestatus - -> Sort (cost=6007722.78..6007722.79 rows=6 width=51) - Sort Key: partial_aggregation.l_returnflag, partial_aggregation.l_linestatus - -> HashAggregate (cost=6007722.52..6007722.70 rows=6 width=51) - Group By: lineitem.l_returnflag, lineitem.l_linestatus - -> Redistribute Motion 64:64 (slice1) (cost=6007721.92..6007722.31 rows=6 width=51) - Hash Key: lineitem.l_returnflag, lineitem.l_linestatus - -> HashAggregate (cost=6007721.92..6007722.19 rows=6 width=51) - Group By: lineitem.l_returnflag, lineitem.l_linestatus - -> Seq Scan on lineitem (cost=0.00..3693046.50 rows=92587017 width=51) - Filter: l_shipdate <= '1998-09-08 00:00:00'::timestamp without time zone - (12 rows) - - -The second acceptable format is EXPLAIN ANALYZE, listing -each query followed by the EXPLAIN output delineated by vertical bars -('|', e.g. |QUERY PLAN| ): - - EXPLAIN ANALYZE - - - select - l_returnflag, - l_linestatus, - sum(l_quantity) as sum_qty, - sum(l_extendedprice) as sum_base_price, - sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, - sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, - avg(l_quantity) as avg_qty, - avg(l_extendedprice) as avg_price, - avg(l_discount) as avg_disc, - count(*) as count_order - from - lineitem - where - l_shipdate <= date '1998-12-01' - interval '106 day' - group by - l_returnflag, - l_linestatus - order by - l_returnflag, - l_linestatus; - - - Query 1 complete, 19 rows returned - - - |QUERY PLAN| - |Gather Motion 64:1 (slice2) (cost=5990545.19..5990545.21 rows=6 width=51)| - | recv: Total 4 rows with 1294937 ms to end.| - | Merge Key: partial_aggregation.junk_attr_1, partial_aggregation.junk_attr_2| - | -> Sort (cost=5990545.19..5990545.21 rows=6 width=51)| - | Avg 1.00 rows x 4 workers. Max 1 rows (seg49) with 1294938 ms to end.| - | Sort Key: partial_aggregation.junk_attr_1, partial_aggregation.junk_attr_2| - | -> HashAggregate (cost=5990544.94..5990545.12 rows=6 width=51)| - | Avg 1.00 rows x 4 workers. Max 1 rows (seg49) with 1294933 ms to end.| - | Group By: lineitem.l_returnflag, lineitem.l_linestatus| - | -> Redistribute Motion 64:64 (slice1) (cost=5990544.34..5990544.73 rows=6 width=51)| - | recv: Avg 64.00 rows x 4 workers. Max 64 rows (seg49) with 1277197 ms to first row, 1294424 ms to end.| - | Hash Key: lineitem.l_returnflag, lineitem.l_linestatus| - | -> HashAggregate (cost=5990544.34..5990544.61 rows=6 width=51)| - | Avg 4.00 rows x 64 workers. Max 4 rows (seg44) with 1292222 ms to end.| - | Group By: lineitem.l_returnflag, lineitem.l_linestatus| - | -> Seq Scan on lineitem (cost=0.00..3693046.50 rows=91899913 width=51)| - | Avg 91914578.95 rows x 64 workers. Max 91914598 rows (seg13) with 14.694 ms to first row, 258614 ms to end.| - | Filter: l_shipdate <= '1998-08-17 00:00:00'::timestamp without time zone| - |1295317.560 ms elapsed| - - Time was 1295.33 seconds. Query ended at Thu Oct 12 12:09:27 2006 - -=head1 CAVEATS/LIMITATIONS - -If explain.pl uses Graphviz to graph the query plan, it may flip the -left and right children of a join to obtain a more balanced pictorial -representation. Use the -edge option to label graph edges to -correctly identify the left and right children. - -=head1 AUTHORS - -Jeffrey I Cohen - -Copyright (c) 2006, 2007, 2008, 2009 GreenPlum. All rights reserved. - -Address bug reports and comments to: jcohen@greenplum.com - - -=cut - -# IMPLEMENTATION NOTES: -# -# EXPLAIN ANALYZE final statistics in analyze_node: -# -# The final statistics look like this: -# -# Slice statistics: -# (slice0) Executor memory: 472K bytes. -# (slice1) Executor memory: 464K bytes avg x 2 workers, 464K bytes max (seg0). -# Settings: -# -# Total runtime: 52347.493 ms -# -# The "Settings" entry is optional (ie, it only exists if you change the -# settings in your session). If the "Settings" entry is missing explain.pl -# adds a dummy entry to the statistics. This technique is a bit easier -# than changing the parser to handle both cases. -# -# Parse_node: -# InitPlan entries in greenplum are in separate slices, so explain.pl -# prefixes them with an arrow (and adds a fake cost) to make them -# look like a top-level execution node. Again, this technique was -# easier than modifying the parser to special case InitPlan. -# -# Plan parsing in general: -# The original code only dealt with the formatted output: -# |QUERY PLAN| -# |Gather Motion 64:1 (slice2) (cost=5990545.19..5990545.21 rows=6 width=51)| -# | recv: Total 4 rows with 1294937 ms to end.| -# | Merge Key: partial_aggr.junk_attr_1, partial_aggr.junk_attr_2| -# | -> Sort (cost=5990545.19..5990545.21 rows=6 width=51)| -# | Avg 1.00 rows x 4 workers. Max 1 rows (seg49) with 1294 ms to end.| -# -# It was easier to modify the parser to wrap the input with missing bars -# than handle two cases (are you sensing a pattern here?). -# -# "Magic" Mode: -# This mode just adds an output filename option and constructs jpgs -# -# Output File Name: -# The guts of the formatting code always write to STDOUT, so this code -# resets STDOUT to the filename of choice. -# -# treemap: -# This routine applies a function over the entire parse tree -# -# OLAP fixups: -# OLAP queries have duplicate Shared Scan and Multi Slice Motion nodes. -# explain.pl only fixes them up for dot output, but not for yaml, perl, etc. -# The rationale is that dot handle digraphs nicely, but yaml and perl are -# more suitable for tree output. -# - -my $glob_id = ""; - -my $glob_optn; -my $glob_qlist; -my $glob_direction; -my $glob_timeline; -my $glob_prune; -my $glob_outi; -my $glob_statcolor; -my $glob_edge; - -my $GV_formats; # graphviz output formats - -my %glob_coltab; - -my %glob_divcol; - -my $glob_colorscheme; -BEGIN { - - $GV_formats = '^(jpg|bmp|ps|pdf|png)$'; - - # table of valid "qualitative" color schemes - - %glob_coltab = (set312 => [ - '#8DD3C7', - '#FFFFB3', - '#BEBADA', - '#FB8072', - '#80B1D3', - '#FDB462', - '#B3DE69', - '#FCCDE5', - '#D9D9D9', - '#BC80BD', - '#CCEBC5', - '#FFED6F' - ], - paired12 => [ - '#a6cee3', - '#1f78b4', - '#b2df8a', - '#33a02c', - '#fb9a99', - '#e31a1c', - '#fdbf6f', - '#ff7f00', - '#cab2d6', - '#6a3d9a', - '#ffff99', - '#b15928' - ], - pastel19 => [ - '#fbb4ae', - '#b3cde3', - '#ccebc5', - '#decbe4', - '#fed9a6', - '#ffffcc', - '#e5d8bd', - '#fddaec', - '#f2f2f2' - ], - pastel24 => [ - '#b3e2cd', - '#fdcdac', - '#cbd5e8', - '#f4cae4', - '#e6f5c9', - '#fff2ae', - '#f1e2cc', - '#cccccc' - ], - set19 => [ - '#e41a1c', - '#377eb8', - '#4daf4a', - '#984ea3', - '#ff7f00', - '#ffff33', - '#a65628', - '#f781bf', - '#999999' - ], - set28 => [ - '#66c2a5', - '#fc8d62', - '#8da0cb', - '#e78ac3', - '#a6d854', - '#ffd92f', - '#e5c494', - '#b3b3b3' - ], - original => [ - 'azure', - 'cornsilk', - 'lavender', - 'mintcream', - 'mistyrose', - 'lightgray', - 'salmon', - 'goldenrod', - 'cyan' - ], - ); - - # diverging color schemes - - %glob_divcol = ( - rdbu11 => [ - '#67001f', - '#b2182b', - '#d6604d', - '#f4a582', - '#fddac7', - '#f6f6f6', - '#d1e5f0', - '#92c5de', - '#4393c3', - '#2166ac', - '#053061' - ], - ); - -} - -sub qlist_fixup -{ - my $qlist = shift; - - my @outi; - - for my $qnum (@{$qlist}) - { - if ($qnum =~ m/^\d+$/) - { - push @outi, $qnum; - } - else - { - if ($qnum =~ m/^\d+\-\d+$/) - { - my $expr = $qnum; - $expr =~ s/\-/\.\./; - - eval "for my \$val ($expr) { push \@outi, \$val; }"; - } - else - { - die("Invalid format for querylist: \'$qnum\'\n"); - exit(1); - } - - } - - } - - return \@outi; - -} - - -# dump a nice graph listing all of the color schemes (neato is preferred) -sub dodumpcolor -{ - my $coltab = shift; - my $fh = shift; - - my @ggg = sort(keys(%{$coltab})); - - # centered, with lines (not arrows) - print $fh "digraph plan1 { graph [center=\"root\",root=\"root\"] ;\n edge [dir=none]\n"; - - # adjust the lengths to avoid overlap - for my $ii (0..(scalar(@ggg)-1)) - { - print $fh '"root" -> "' . $ii. '"' . " [len=2];\n"; - - my $jj = 0; - - for my $cc (@{$coltab->{$ggg[$ii]}}) - { - print $fh '"' . $ii. '" -> "' . $ii. "." . $jj. '"' - . " [len=1];\n"; - $jj++; - } - } - - print $fh '"root" [label="color schemes"]' . ";\n"; - - for my $ii (0..(scalar(@ggg)-1)) - { - print $fh '"' . $ii . '" [label="' . $ggg[$ii] . '"]' . ";\n"; - - my $jj = 0; - - for my $cc (@{$coltab->{$ggg[$ii]}}) - { - print $fh '"' . $ii . "." . $jj . - '" [label="", style=filled, ' . - 'fillcolor="' . $cc . '"]' . ";\n"; - $jj++; - } - } - print $fh "\n}\n"; - -} - - -BEGIN { - my $man = 0; - my $help = 0; - my $optn = "YAML"; - my $dir = "BT"; - my $DEFAULT_COLOR = "set28"; - my $colorscheme = $DEFAULT_COLOR; - my $timeline = ''; - my $prune; - my $outfile; - my $statcol; - my $edgescheme; - - my @qlst; - - GetOptions( - 'help|?' => \$help, man => \$man, - "querylist|ql|list:s" => \@qlst, - "option|operation=s" => \$optn, - "direction:s" => \$dir, - "colorscheme:s" => \$colorscheme, - "timeline" => \$timeline, - "prune:s" => \$prune, - "output:s" => \$outfile, - "statcolor:s" => \$statcol, - "edge:s" => \$edgescheme) - or pod2usage(2); - - - pod2usage(-msg => $glob_id, -exitstatus => 1) if $help; - pod2usage(-msg => $glob_id, -exitstatus => 0, -verbose => 2) if $man; - - $glob_optn = $optn; - $glob_optn = "jpg" if ($glob_optn =~ m/^jpeg/i); - - $glob_timeline = $timeline; - $glob_prune = $prune; - $glob_outi = $outfile; - $glob_statcolor = $statcol; - - $glob_edge = $edgescheme; - - if ($dir !~ m/^(TB|BT|LR|RL)$/i) - { - $glob_direction = "BT"; - } - else - { - $glob_direction = uc($dir); - } - - $colorscheme = lc($colorscheme); - -# print "color: $colorscheme\n"; - - if (exists($glob_coltab{$colorscheme})) - { - $glob_colorscheme = $colorscheme; - } - else - { - if ($colorscheme =~ m/list|dump/i) - { - use IO::File; - use POSIX qw(tmpnam); - - my ($tmpnam, $tmpfh); - - for (;;) { - $tmpnam = tmpnam(); - sysopen($tmpfh, $tmpnam, O_RDWR | O_CREAT | O_EXCL) && last; - } - - # write to a temporary file - dodumpcolor(\%glob_coltab, $tmpfh); - - close $tmpfh; - - my $catcmd = "cat $tmpnam"; - - # format with neato if jpg was specified - if ($glob_optn =~ m/$GV_formats/i) - { - my $dotapp = "/Applications/Graphviz.app/Contents/MacOS/neato"; - - if ($^O !~ m/darwin/) - { - $dotapp = `which neato`; - chomp($dotapp); - } - if (defined($dotapp) && length($dotapp) && (-e $dotapp)) - { - $catcmd .= " | $dotapp -T$glob_optn"; - } - } - - system($catcmd); - - unlink $tmpnam; - - exit(0); - } - else - { - my $colorschemelist = join("\n", sort(keys(%glob_coltab))) . "\n"; - - # identify the default color - $colorschemelist =~ - s/$DEFAULT_COLOR/$DEFAULT_COLOR \(default\)/gm; - - print "\nvalid color schemes are:\n"; - print $colorschemelist; - print "\nUse: \"explain.pl -color dump -opt jpg > graph.jpg\"\n"; - print "to construct a JPEG showing all the valid color schemes.\n"; - print "\nColors from www.ColorBrewer.org by Cynthia A. Brewer, Geography,\nPennsylvania State University.\n\n"; - - exit(0); - } - } - - @qlst = split(/,/,join(',', @qlst)); - - $glob_qlist = qlist_fixup(\@qlst); - -# print "loading...\n" ; -} - -sub analyze_node -{ - my ($node, $parse_ctx) = @_; - - if (defined($node) && exists($node->{txt})) - { - - # gather analyze statistics if it exists in this node... - if ($node->{txt} =~ - m/Slice\s+statistics.*(Settings.*)*Total\s+runtime/s) - { - my $t1 = $node->{txt}; - - # NOTE: the final statistics look something like this: - - # Slice statistics: - # (slice0) Executor memory: 472K bytes. - # (slice1) Executor memory: 464K bytes avg x 2 workers, 464K bytes max (seg0). - # Settings: - # Total runtime: 52347.493 ms - - # (we've actually added some vertical bars so it might look - # like this): - # || Slice statistics: - # || (slice0) Executor memory: 472K bytes. - - # NB: the "Settings" entry is optional, so - # add Settings if they are missing - unless ($t1 =~ - m/Slice\s+statistics.*Settings.*Total\s+runtime/s) - { - $t1 =~ - s/\n.*\s+Total\s+runtime/\n Settings\: \n Total runtime/; - } - - my @foo = ($t1 =~ m/Slice\s+statistics\:\s+(.*)\s+Settings\:\s+(.*)\s+Total\s+runtime:\s+(.*)\s+ms/s); - - if (scalar(@foo) == 3) - { - my $mem = shift @foo; - my $sett = shift @foo; - my $runt = shift @foo; - - $mem =~ s/\|\|//gm; # remove '||'... - $sett =~ s/\|\|//gm; - - my $statstuff = {}; - - my @baz = split(/\n/, $mem); - my $sliceh = {}; - for my $elt (@baz) - { - my @ztesch = ($elt =~ m/(slice\d+)/); - next unless (scalar(@ztesch)); - $elt =~ s/\s*\(slice\d+\)\s*//; - my $val = shift @ztesch; - $sliceh->{$val} = $elt; - } - - $statstuff->{memory} = $sliceh; - $statstuff->{settings} = $sett; - $statstuff->{runtime} = $runt; - $parse_ctx->{explain_analyze_stats} = $statstuff; - $node->{statistics} = $statstuff; - } - } - - my @short = $node->{txt} =~ m/\-\>\s*(.*)\s*\(cost\=/; - $node->{short} = shift @short; - - unless(exists($node->{id})) - { - print Data::Dumper->Dump([$node]), "\n"; - } - - if ($node->{id} == 1) - { - @short = $node->{txt} =~ m/^\s*\|\s*(.*)\s*\(cost\=/; - $node->{short} = shift @short; - - # handle case where dashed line might have wrapped... - unless (defined($node->{short}) && length($node->{short})) - { - # might not be first line... - @short = $node->{txt} =~ m/\s*\|\s*(.*)\s*\(cost\=/; - $node->{short} = shift @short; - } - - - } - - # handle case of "cost-free" txt (including a double || - # and not first line, or screwed-up parse of short as a single bar - # - # example: weird initplan like: - # || -> InitPlan (slice49) - if (defined($node->{short}) && length($node->{short}) - && ($node->{short} =~ m/\s*\|\s*/)) - { - $node->{short} = ""; - } - - unless (defined($node->{short}) && length($node->{short})) - { - @short = $node->{txt} =~ m/\s*\|(\|)?\s*(\w*)\s*/; - $node->{short} = shift @short; - - if (defined($node->{short}) && length($node->{short}) - && ($node->{short} =~ m/\s*\|\s*/)) - { - $node->{short} = ""; - } - - # last try!! - unless (defined($node->{short}) && length($node->{short})) - { - my $foo = $node->{txt}; - $foo =~ s/\-\>//gm; - $foo =~ s/\|//gm; - $foo =~ s/^\s+//gm; - $foo =~ s/\s+$//gm; - $node->{short} = $foo; - } - -# print "long: $node->{txt}\n"; -# print "short: $node->{short}\n"; - } - - $node->{short} =~ s/\s*$//; - - # remove quotes which mess up dot file - $node->{short} =~ s/\"//gm; - -# print "long: $node->{txt}\n"; -# print "short: $node->{short}\n"; - - # XXX XXX XXX XXX: FINAL "short" fixups - while (defined($node->{short}) && length($node->{short}) - && ($node->{short} =~ m/(\n)|^\s+|\s+$|(\(cost\=)/m)) - { - # remove leading and trailing spaces... - $node->{short} =~ s/^\s*//; - $node->{short} =~ s/\s*$//; - - # remove newlines - $node->{short} =~ s/(\n).*//gm; - - # remove cost=... - $node->{short} =~ s/\(cost\=.*//gm; - -# print "short fixup: $node->{short}\n\n\n"; - } - - { - if ($node->{txt} =~ m/(\d+(\.\d*)?)(\s*ms\s*to\s*end)/i) - { - - my @ggg = - ($node->{txt} =~ m/(\d+(\.\d*)?)(\s*ms\s*to\s*end)/i); - -# print join('*', @ggg), "\n"; - - my $tt = $ggg[0]; - - $node->{to_end} = $tt; - - $parse_ctx->{alltimes}->{$tt} = 1; - - if (exists($parse_ctx->{h_to_end}->{$tt})) - { - push @{$parse_ctx->{h_to_end}->{$tt}}, '"'. $node->{id} .'"'; - } - else - { - $parse_ctx->{h_to_end}->{$tt} = ['"'. $node->{id} . '"']; - } - - - - } - if ($node->{txt} =~ m/(\d+(\.\d*)?)(\s*ms\s*to\s*first\s*row)/i) - { - - my @ggg = - ($node->{txt} =~ m/(\d+(\.\d*)?)(\s*ms\s*to\s*first\s*row)/i); - -# print join('*', @ggg), "\n"; - - my $tt = $ggg[0]; - - $node->{to_first} = $tt; - - $parse_ctx->{alltimes}->{$tt} = 1; - - if (exists($parse_ctx->{h_to_first}->{$tt})) - { - push @{$parse_ctx->{h_to_first}->{$tt}}, '"' . $node->{id} . '"' ; - } - else - { - $parse_ctx->{h_to_first}->{$tt} = [ '"' . $node->{id} . '"']; - } - - } - - if ($node->{txt} =~ m/start offset by (\d+(\.\d*)?)(\s*ms)/i) - { - - my @ggg = - ($node->{txt} =~ m/start offset by (\d+(\.\d*)?)(\s*ms)/i); - -# print join('*', @ggg), "\n"; - - my $tt = $ggg[0]; - - $node->{to_startoff} = $tt; - - $parse_ctx->{allstarttimes}->{$tt} = 1; - - if (exists($parse_ctx->{h_to_startoff}->{$tt})) - { - push @{$parse_ctx->{h_to_startoff}->{$tt}}, '"'. $node->{id} .'"'; - } - else - { - $parse_ctx->{h_to_startoff}->{$tt} = ['"'. $node->{id} . '"']; - } - } - - if (exists($node->{to_end})) - { - $node->{total_time} = - (exists($node->{to_first})) ? - ($node->{to_end} - $node->{to_first}) : - $node->{to_end}; - } - - - } - - if (1) - { - if (exists($node->{child})) - { - delete $node->{child} - unless (defined($node->{child}) - && scalar(@{$node->{child}})); - } - } - - - } - -} - -sub parse_node -{ - - my ($ref_id, $parse_ctx, $depth, $plan_rows, $parent) = @_; - -# print "depth: $depth\n"; -# print "row: ",$plan_rows->[0],"\n" if (scalar(@{$plan_rows})); - -# print "first: $first\n" if defined ($first); - - my $spclen = undef; - my $node = undef; - - my $no_more_text = 0; - - while (scalar(@{$plan_rows})) - { - my $row = $plan_rows->[0]; - - unless (defined($node)) - { - $node = {}; - - $node->{child} = []; - - $node->{txt} = ""; - - $node->{parent} = $parent - if (defined($parent)); - - my $id = $$ref_id; - $id++; - $$ref_id= $id; - $node->{id} = $id; - } - - # XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX - # make initplan into a fake node so the graphs look nicer. - # Prefix it with an arrow and add a fake cost. - if ($row =~ m/\|(\s)*InitPlan(.*)slice/) - { - $row =~ s/InitPlan/\-\> InitPlan/; - if ($row !~ m/\(cost=/) - { - $row =~ s/\|$/\(cost=\?\)|/; - } - } - - if ($row !~ m/\|(\s)*\-\>/) - { - # add text to existing node - - if ($no_more_text) - { - print "error: no more text for ". $node->{id}, "\n"; - } - - $node->{txt} .= "\n" . $row; - -# print "txt: $node->{txt}\n"; - - shift @{$plan_rows}; - next; - } - else - { - # new node - unless ($no_more_text) - { - unless (length($node->{txt})) - { - $node->{txt} .= $row; - shift @{$plan_rows}; - next; - } - } - - # match the leading spaces before the '->', eg: - # "| -> Sort (cost=5990545.19..599..." - - my @spc = ($row =~ m/\|(\s*)\-\>/); - -# print "space match:", Data::Dumper->Dump(\@spc), "\n"; - - $spclen = scalar(@spc) ? length($spc[0]) : 0; - -# print "space len: $spclen, depth: $depth\n"; - - if ($spclen > $depth) - { - # found a child - push @{$node->{child}}, parse_node($ref_id, $parse_ctx, - $spclen, $plan_rows, - $node->{id}); - } - - } - - if (defined($spclen)) - { - if ($spclen <= $depth) - { # found a sibling or parent - # need to put the row back on the head of the list - - if (defined($node) && exists($node->{txt})) - { - analyze_node($node, $parse_ctx); - - return $node; - } - } - } - else - { - die ("what the heck?"); - } - - $spclen = undef; - $no_more_text = 1; - - } # end while - - if (defined($node)) - { - analyze_node($node, $parse_ctx); - } - - return $node; - -} - - -if (1) -{ - - my @bigarr; - - my $state = "INIT"; - - my $pair = undef; - - my ($query, $plan); - - my $ea_format=0; - my $bigdash = '-' x 40; # make big dash smaller - - my $magic; - - for (<>) - { - my $ini = $_; - - if ($state =~ m/INIT/) - { - if ($ini !~ m/(^EXPLAIN ANALYZE)|(QUERY PLAN)/) - { - next; - } - - $query = ""; - $plan = ""; - $pair = {}; - - if ($ini =~ m/^EXPLAIN ANALYZE/) - { - $ea_format = 1; - $state = "GETQUERY"; - next; - } - - if ($ini =~ m/QUERY PLAN/) - { - $ea_format = 0; - $plan = ""; - $state = "GETPLAN"; - next; - } - - } - - if ($state !~ m/GETPLAN/) - { - # should be START or GETQUERY only... - if ($ea_format) - { - if ($ini =~ m/^EXPLAIN ANALYZE/) - { - if (defined($pair)) - { - $pair->{plan} = $plan; - $pair->{query} = $query; - push @bigarr, $pair; - } - $pair = {}; - $query = ""; - $plan = ""; - $state = "GETQUERY"; - next; - } - } - else - { - # not explain analyze - if ($ini =~ m/QUERY PLAN/) - { - if (defined($pair)) - { - $pair->{plan} = $plan; - $pair->{query} = $query; - push @bigarr, $pair; - } - $pair = {}; - $query = ""; - $plan = ""; - $state = "GETPLAN"; - next; - } - - } - if ($state =~ m/GETQUERY/) - { - if ($ini =~ m/QUERY PLAN/) - { - if (!($ea_format)) - { - if (defined($pair)) - { - $pair->{plan} = $plan; - $pair->{query} = $query; - push @bigarr, $pair; - } - $pair = {}; - $query = ""; - } - - $plan = ""; - $state = "GETPLAN"; - next; - } - - $query .= $ini; - } - - } # end not getplan - - if ($state =~ m/GETPLAN/) - { - - if ($ea_format) - { - if ($ini !~ m/\|(.*)\|/) - { - $state = "START"; - next; - } - } - else - { - if ($ini =~ m/(\(\d+\s+rows\))|(Time\s+was.*seconds\.\s+Query\s+ended)/) - { - $state = "START"; - next; - } - } - # a bit weird here -- just ignore the separator. But - # maybe we should invest some effort to determine that the - # separator is the next line after the header (and only - # ignore it once) ? - next - if ($ini =~ m/$bigdash/); - - # add the missing bars - if (!($ea_format)) - { - if ($ini !~ m/\|(.*)\|/) - { - $ini = '|' . $ini . '|'; - } - } - - $plan .= $ini; - } - - } # end big for - if (defined($pair)) - { - $pair->{plan} = $plan; - $pair->{query} = $query; - push @bigarr, $pair; - } - -#print scalar(@bigarr), "\n"; - - -#print Data::Dumper->Dump(\@bigarr); - -#print $bigarr[0]->{plan}; - - unless(scalar(@{$glob_qlist})) - { - # build a 1-based list of queries - for (my $ii =1; $ii <= scalar(@bigarr); $ii++) - { - push @{$glob_qlist}, $ii; - } - } - - my $realSTDOUT; - - for my $qqq (@{$glob_qlist}) - { - my $qnum = $qqq - 1; # 0 based vs 1 based - - if ($qnum > scalar(@bigarr)) - { - warn("specified query $qqq is out-of-range -- skipping...\n"); - next; - } - - if ($glob_optn =~ m/query|text|txt/i) - { - doquery($bigarr[$qnum]->{query}); - next; - } - - my $plantxt = $bigarr[$qnum]->{plan}; - - unless (defined($plantxt) && length($plantxt)) - { - warn("invalid plan for query $qqq -- skipping...\n"); - next; - } - -#print $plantxt, "\n"; - - my @plan_r = split(/\n/, $plantxt); - - my $pr = \@plan_r; - - my $parse_ctx = {}; - - my $id = 0; - - $parse_ctx->{alltimes} = {}; - $parse_ctx->{h_to_end} = {}; - $parse_ctx->{h_to_first} = {}; - - $parse_ctx->{allstarttimes} = {}; - $parse_ctx->{h_to_startoff} = {}; - $parse_ctx->{explain_analyze_stats} = {}; - - my $plantree = parse_node(\$id, $parse_ctx, 0, $pr); - -# my @timelist = sort {$a <=> $b} keys (%{$parse_ctx->{alltimes}}); - my @timelist = sort {$a <=> $b} keys (%{$parse_ctx->{allstarttimes}}); - - - if (defined($glob_prune)) - { - if ($glob_prune =~ m/stat|heavy|heavily/i) - { - my $map_expr = 'delete $node->{to_end};'; - treeMap($plantree, undef, $map_expr); - $map_expr = 'delete $node->{to_first};'; - treeMap($plantree, undef, $map_expr); - - # additional statistics - $map_expr = 'delete $node->{to_startoff};'; - treeMap($plantree, undef, $map_expr); - $map_expr = 'delete $node->{total_time};'; - treeMap($plantree, undef, $map_expr); - $map_expr = 'delete $node->{statistics};'; - treeMap($plantree, undef, $map_expr); - } - if ($glob_prune =~ m/heavy|heavily/i) - { - treeMap($plantree, 'prune_heavily($node);'); - } - } - - - # magic mode : display everything magically - # - # NOTE: only set to magic on the first iteration, then reset - # to jpg, so performs correctly with multiple queries - if ($glob_optn =~ m/magic/i) - { - $glob_optn = "jpg"; - - use IO::File; - use POSIX qw(tmpnam); - - my $tmpnam; - - for (;;) { - my $tmpfh; - - $tmpnam = tmpnam(); - sysopen($tmpfh, $tmpnam, O_RDWR | O_CREAT | O_EXCL) && last; - } - - # create a temporary directory name -- just append ".dir" - # to the new tempfile name and mkdir - my $tmpdir = $tmpnam . ".dir"; - - mkdir($tmpdir) or die "magic failed" ; - - unlink $tmpnam; # we didn't need this tempfile anyhow - - # reset output file name to create files in the new - # temporary directory - $glob_outi = File::Spec->catfile($tmpdir, "query_"); - - $magic = $glob_outi; - } - - if ($glob_outi) - { - unless (defined($realSTDOUT)) - { - open $realSTDOUT, ">&STDOUT" or die "Can't dup STDOUT: $!"; - } - - my $outfilename = $glob_outi; - - # only need numbering if processed more than one query - my $neednum = (scalar(@bigarr) > 1); - - # check if name has an extension like ".foo" - if ($outfilename =~ m/\.(.){1,5}$/) - { - # qqq is query num (1 based) - - my $formatq = sprintf("%03d", $qqq); - - $outfilename =~ s/\.(.*)$/$formatq\.$1/ - if ($neednum); - } - else - { - # qqq is query num (1 based) - my $formatq = sprintf("%03d", $qqq); - - $outfilename .= $formatq - if ($neednum); - if ($glob_optn =~ m/yaml/i) - { - $outfilename .= ".yml"; - } - if ($glob_optn =~ m/json/i) - { - $outfilename .= ".json"; - } - if ($glob_optn =~ m/perl|dump/i) - { - $outfilename .= ".perl"; - } - if ($glob_optn =~ m/dot|graph/i) - { - $outfilename .= ".dot"; - } - if ($glob_optn =~ m/$GV_formats/i) - { - $outfilename .= ".$glob_optn"; - } - } - - close STDOUT; - - open (STDOUT, ">$outfilename" ) or die "can't open STDOUT: $!"; - -# print $outfilename, "\n"; - - } - - - if ($glob_optn =~ m/yaml/i) - { - doyaml($plantree); - } - if ($glob_optn =~ m/json/i) - { - doyaml($plantree, "json"); - } - if ($glob_optn =~ m/perl|dump/i) - { - doDataDump($plantree); - } - if ($glob_optn =~ m/dot|graph/i) - { - dodotfile($plantree, \@timelist, $qqq, $parse_ctx, - $glob_direction); - } - if ($glob_optn =~ m/operator/i) - { - doOperatorDump($plantree); - } - - if ($glob_optn =~ m/$GV_formats/i) - { - my $dotapp = "/Applications/Graphviz.app/Contents/MacOS/dot"; - - if ($^O !~ m/darwin/) - { - $dotapp = `which dot`; - chomp($dotapp); - } - die "could not find dot app: $dotapp" - unless (defined($dotapp) && length($dotapp) && (-e $dotapp)); - - # should have been able to redirect STDOUT thru a pipe - # directly to dotapp, but didn't work. Use a tmpfile - # instead. - - use IO::File; - use POSIX qw(tmpnam); - - my $tmpnam; - - for (;;) { - my $tmpfh; - - $tmpnam = tmpnam(); - sysopen($tmpfh, $tmpnam, O_RDWR | O_CREAT | O_EXCL) && last; - } - open my $oldout, ">&STDOUT" or die "Can't dup STDOUT: $!"; - - close STDOUT; - open (STDOUT, ">$tmpnam" ) or die "can't open STDOUT: $!"; - - select STDOUT; $| = 1; # make unbuffered - - dodotfile($plantree, \@timelist, $qqq, $parse_ctx, - $glob_direction); - - close STDOUT; - open STDOUT, ">&", $oldout or die "Can't dup \$oldout: $!"; - - system("cat $tmpnam | $dotapp -T$glob_optn"); - - unlink $tmpnam; - - - } - } #end for querynum - - if (defined($realSTDOUT)) - { - close STDOUT; - open STDOUT, ">&", $realSTDOUT or die "Can't dup \$oldout: $!"; - - } - - # magically display all files - if (defined($magic)) - { - # only need numbering if processed more than one query - my $neednum = (scalar(@{$glob_qlist}) > 1); - - if ($^O =~ m/darwin/) - { - # use ImageMagick montage - my $ggg = $magic . '*'; - my $montage = `which montage`; - chomp($montage); - - # only perform a montage if more than one query - if ($neednum && defined($montage) && ( -e $montage)) - { - my $dir = $magic; - # get the directory name (remove "query_" prefix) - $dir =~ s/query_$//; - system("cd $dir; montage -label \'%f\' $ggg -title \"$dir\n`date`\" -shadow INDEX.html; open INDEX.html"); - } - else - { - system("open $ggg"); - } - } - } - -} -#print "\nmax id: $id\n\n"; - -# - -sub treeMap -{ - my ($node, $pre_map, $post_map, $ctx) = @_; - - eval "$pre_map" - if (defined($pre_map)); - - if (exists($node->{child})) - { - for my $kid (@{$node->{child}}) - { - treeMap($kid, $pre_map, $post_map, $ctx); - } - } - eval "$post_map" - if (defined($post_map)); -} - -sub doDataDump -{ - my $plantree = shift; - - - local $Data::Dumper::Indent = 1; - local $Data::Dumper::Terse = 1; - local $Data::Dumper::Sortkeys = 1; - - my $map_expr = 'delete $node->{txt};'; -# my $map_expr = 'print "foo\n"'; - treeMap($plantree, undef, $map_expr); - - print Data::Dumper->Dump([$plantree]); -} - -sub doOperatorDump -{ - my $plantree = shift; - - print $plantree->{short}, "\n" if (exists($plantree->{short})); - - return - unless (exists($plantree->{child})); - - for my $kid (@{$plantree->{child}}) - { - doOperatorDump($kid); - } -} - -# add slice info to node -# and gather explain analyze stats -sub addslice -{ - my ($node, $ctx) = @_; - - # AUTO-6: find nodes with "(slice1)" info where the slice numbers aren't - # part of the "Slice statistics" - - my $txt1 = $node->{txt}; - $txt1 =~ s/Slice statistics.*//gs; - - if ($txt1 =~ /(slice(\d+))/) - { - my @ggg = ($txt1 =~ m/(slice(\d+))/) ; - $node->{slice} = shift @ggg; - - # check if we have explain analyze stats for the slice - if (exists($ctx->{explain_analyze_stats}) - && exists($ctx->{explain_analyze_stats}->{memory}) - && exists($ctx->{explain_analyze_stats}->{memory}->{$node->{slice}})) - { - $node->{memory} = - $ctx->{explain_analyze_stats}->{memory}->{$node->{slice}}; - } - - } -} - - -sub doquery -{ - my $qtxt = shift; - - print $qtxt, "\n"; - -} - -sub doyaml -{ - my ($plantree, $opti) = @_; - - $opti = "yaml" unless (defined($opti)); - - if ($opti =~ m/json/i) - { - # JSON might not be installed, so test for it. - - if (eval "require JSON") - { - my $map_expr = 'delete $node->{txt};'; - - treeMap($plantree, undef, $map_expr); - - # because JSON is REQUIREd, not USEd, the symbols are not - # imported into the environment. - print JSON::objToJson($plantree, {pretty => 1, indent => 2}); - } - else - { - die("Fatal Error: The required package JSON is not installed -- please download it from www.cpan.org\n"); - exit(1); - } - - } - else - { - # YAML might not be installed, so test for it. - - if (eval "require YAML") - { - my $map_expr = 'delete $node->{txt};'; - - treeMap($plantree, undef, $map_expr); - - # because YAML is REQUIREd, not USEd, the symbols are not - # imported into the environment. - print YAML::Dump($plantree); - } - else - { - die("Fatal Error: The required package YAML is not installed -- please download it from www.cpan.org\n"); - exit(1); - } - - } - -} - -# remove slice numbering information to construct even more generic plans -sub prune_heavily -{ - my $node = shift; - - return - unless (exists($node->{short})); - - if ($node->{short} =~ m/Delete\s*\(slice.*segment.*\)\s*\(row.*width.*\)/) - { - # QA-1309: fix strange DELETE operator formatting - $node->{short} = "Delete"; - } - elsif ($node->{short} =~ m/Update\s*\(slice.*segment.*\)\s*\(row.*width.*\)/) - { - # QA-1309: fix strange UPDATE operator formatting - $node->{short} = "Update"; - } - elsif ($node->{short} =~ m/\d+\:\d+/) - { - - # example: Gather Motion 8:1 (slice4); - - # strip the number of nodes and slice information - $node->{short} =~ s/\s+\d+\:\d+.*//; - - # Note: don't worry about removing "(slice1)" info from the - # "short" because addslice processes node->{text} - } -} - -# identify the slice for each node -# and find Shared Scan "Primary" -# and find MultiSliceMotion -sub pre_slice -{ - my ($node, $ctx) = @_; - - { - if (scalar(@{$ctx->{a1}})) - { - my $parent = $ctx->{a1}->[-1]; - - unless (exists($node->{slice})) - { - if (exists($parent->{slice})) - { - $node->{slice} = $parent->{slice}; - } - } - } - - # olap stuff - - if ($node->{short} =~ m/^Shared Scan/) - { - # if the Shared Scan has a child it is the "primary" - if (exists($node->{child})) - { - my $share_short_fixup = $node->{short}; - - # remove the slice number from the "short" - $share_short_fixup =~ s/(\d+)\:/\:/g; - - if (!exists($ctx->{share_input_h}->{$share_short_fixup})) - { - $ctx->{share_input_h}->{$share_short_fixup} = $node; - } - } - else # not the primary, mark as a duplicate node - { - $node->{SharedScanDuplicate} = 1; - } - } - if ($node->{short} =~ m/^Multi Slice Motion/) - { - # choose first Multi Slice Motion node as primary - if (!exists($ctx->{multi_slice_h}->{$node->{short}})) - { - $ctx->{multi_slice_h}->{$node->{short}} = $node; - } - else # not the primary, mark as a duplicate node - { - $node->{MultiSliceMotionDuplicate} = 1; - } - } - - if (exists($node->{total_time})) - { - my $tt = $node->{total_time}; - my $tt2 = $tt * $tt; - $ctx->{time_stats_h}->{cnt} += 1; - $ctx->{time_stats_h}->{sum} += $tt; - $ctx->{time_stats_h}->{sumsq} += $tt2; - - if (exists($ctx->{time_stats_h}->{tt_h}->{$tt})) - { - push @{$ctx->{time_stats_h}->{tt_h}->{$tt}}, $node; - } - else - { - $ctx->{time_stats_h}->{tt_h}->{$tt} = [$node]; - } - } - - } - - push @{$ctx->{a1}}, $node; - -} - -sub post_slice -{ - my ($node, $ctx) = @_; - - pop @{$ctx->{a1}}; - -} - -# make all duplicate sharedscan nodes point back to primary -sub sharedscan_fixup -{ - my ($node, $ctx) = @_; - - if (exists($node->{SharedScanDuplicate})) - { - my $share_short_fixup = $node->{short}; - - # remove the slice number from the "short" - $share_short_fixup =~ s/(\d+)\:/\:/g; - - $node->{SharedScanDuplicate} = - $ctx->{share_input_h}->{$share_short_fixup}; -# $node->{id} = -# $node->{SharedScanDuplicate}->{id}; - } - - if (exists($node->{MultiSliceMotionDuplicate})) - { - $node->{MultiSliceMotionDuplicate} = - $ctx->{multi_slice_h}->{$node->{short}}; - # XXX XXX: for this case the node is really the same - $node->{id} = - $node->{MultiSliceMotionDuplicate}->{id}; - } - -} - -sub human_num -{ - my $esti = shift; - - my @suffix = qw(K M G T P E Z Y); - my $suff = ""; - - # try to shorten estimate specification - while (length(POSIX::ceil($esti)) > 3) - { - $suff = shift @suffix; - - $esti = $esti/1000; - } - - if (length($suff)) - { - $esti *= 100; - $esti = POSIX::floor($esti+0.5); - $esti = $esti/100; - - $esti .= $suff; - } - - return $esti; -} - -# label left and right for nest loops -sub nestedloop_fixup -{ - my ($node, $ctx) = @_; - - return - unless (exists($node->{short}) && - ($node->{short} =~ m/Nested Loop/)); - - my @kidlist; - - if (exists($node->{child})) - { - for my $kid (@{$node->{child}}) - { - push @kidlist, $kid; - } - } - - return - unless (2 == scalar(@kidlist)); - - if ($kidlist[0]->{id} < $kidlist[1]->{id}) - { - $kidlist[0]->{nested_loop_position} = "left"; - $kidlist[1]->{nested_loop_position} = "right"; - } - else - { - $kidlist[1]->{nested_loop_position} = "left"; - $kidlist[0]->{nested_loop_position} = "right"; - } - -} - -# find rows out information -sub get_rows_out -{ - my ($node, $ctx, $edge) = @_; - - return - unless ($node->{txt} =~ m/(Rows out\:)|(\(cost\=.*\s+rows=.*\s+width\=.*\))/); - - my $long = ($edge =~ m/long|med/i); - - if ($node->{txt} =~ m/Rows out\:\s+Avg.*\s+rows\s+x\s+.*\s+workers/) - { - if (!$long) - { - # short result - my @foo = - ($node->{txt} =~ - m/Rows out\:\s+Avg\s+(.*)\s+rows\s+x\s+(.*)\s+workers/); - - goto L_get_est unless (2 == scalar(@foo)); - - # calculate row count as avg x num workers - $node->{rows_out} = $foo[0] * $foo[1]; - } - else - { - my @foo = - ($node->{txt} =~ - m/Rows out\:\s+(Avg.*workers)/); - - goto L_get_est unless (1 == scalar(@foo)); - - # just print the string - $node->{rows_out} = $foo[0]; - - if ($edge =~ m/med/i) - { - $node->{rows_out} =~ s/Avg\s+//; - $node->{rows_out} =~ s/rows\s+//; - $node->{rows_out} =~ s/\s*workers\s*//; - } - - } - } - elsif ($node->{txt} =~ m/Rows out\:\s+.*\s+rows/) - { - my @foo = - ($node->{txt} =~ - m/Rows out\:\s+(.*)\s+rows/); - - goto L_get_est unless (1 == scalar(@foo)); - - $node->{rows_out} = $foo[0]; - } - - if ( - exists($node->{rows_out}) && - length($node->{rows_out}) - ) - { - if ( - ($node->{rows_out} !~ m/avg/i) && - ($node->{rows_out} =~ m/x/)) - { - my @foo = ($node->{rows_out} =~ m/(x.*)$/); - - my $tail = $foo[0]; - - @foo = ($node->{rows_out} =~ m/(.*)\s+x.*/); - - my $head = $foo[0]; - - if (defined($tail) && defined($head)) - { - $head = human_num($head); - - $node->{rows_out} = $head . " " . $tail; - } - - } - elsif ($node->{rows_out} =~ m/^\d+$/) - { - $node->{rows_out} = human_num($node->{rows_out}); - } - } - -L_get_est: - - # add row estimates - if ($long && - ($node->{txt} =~ m/\(cost\=.*\s+rows=.*\s+width\=.*\)/)) - { - my @foo = ($node->{txt} =~ m/cost\=.*\s+rows=(\d+)\s+width\=.*/); - - if (scalar(@foo)) - { - use POSIX; - - my $esti = $foo[0]; - - $esti = human_num($esti); - - unless (exists($node->{rows_out}) && - length($node->{rows_out})) - { - $node->{rows_out} = ""; - } - - $node->{rows_out} .= " (est $esti)"; - } - } - -} # end get_rows_out - -sub calc_color_rank -{ - my $ctx = shift; - - return - unless (defined($glob_statcolor)); - - if ($ctx->{time_stats_h}->{cnt} > 1) - { - # population variance = - # (sum of the squares)/n - (square of the sums)/n*n - my $sum = $ctx->{time_stats_h}->{sum}; - my $sumsq = $ctx->{time_stats_h}->{sumsq}; - my $enn = $ctx->{time_stats_h}->{cnt}; - - my $pop_var = ($sumsq/$enn) - (($sum*$sum)/($enn*$enn)); - my $std_dev = sqrt($pop_var); - my $mean = $sum/$enn; - my $half = $std_dev/2; - - # calculate a stanine (9 buckets, each 1/2 of stddev). The - # middle bucket (5, which is 4 if we start at zero) is - # centered on the mean, so it starts on mean - (1/4 stddev), - # and ends at mean + (1/4 stddev). - my @bucket; - my $buckstart = ($mean-($half/2))-(3*$half); - - push @bucket, 0; - - for my $ii (1..7) - { - push @bucket, $buckstart; - $buckstart += $half; - } - push @bucket, 2**40; # "infinity" - - my @tlist = sort {$a <=> $b} (keys %{$ctx->{time_stats_h}->{tt_h}}); - - # must have at least two - my $firstt = shift @tlist; - my $lastt = pop @tlist; -# print "f,l: $firstt, $lastt\n"; - - for my $nod (@{$ctx->{time_stats_h}->{tt_h}->{$firstt}}) - { -# print "first ", $nod->{id}, ": ", $nod->{short}, " - ", 0, "\n"; - $nod->{color_rank} = 10; - } - for my $nod (@{$ctx->{time_stats_h}->{tt_h}->{$lastt}}) - { -# print "last ", $nod->{id}, ": ", $nod->{short}, " - ", 10, "\n"; - $nod->{color_rank} = 1; - } - -# print "bucket: ", Data::Dumper->Dump(\@bucket); -# print "tlist: ", Data::Dumper->Dump(\@tlist); -# print Data::Dumper->Dump([$ctx->{time_stats_h}]); - - my $bucknum = 1; - for my $tt (@tlist) - { -# print "tt: $tt\n"; -# print "bk: $bucket[$bucknum]\n"; - - while ($tt > $bucket[$bucknum]) - { -# print "$tt > $bucket[$bucknum]\n"; -# last if ($bucknum >= 11); - $bucknum++; - } - for my $nod (@{$ctx->{time_stats_h}->{tt_h}->{$tt}}) - { -# print "node ", $nod->{id}, ": ", $nod->{short}, " - ", $bucknum, "\n"; -# $nod->{color_rank} = ($bucknum-1); - $nod->{color_rank} = (10 - $bucknum); - } - } - - } - -} - -sub dodotfile -{ - my ($plantree, $time_list, $plan_num, $parse_ctx, $direction) = @_; - - { - my $map_expr = 'addslice($node, $ctx); '; - - treeMap($plantree, $map_expr, undef, $parse_ctx); - } - - -# $map_expr = 'propslice($node, $ctx);'; - my $ctx = {level => 0, a1 => [], - share_input_h => {}, multi_slice_h => {}, - time_stats_h => { cnt=>0, sum=>0, sumsq=>0, tt_h => {} } }; - -# my $map_expr = 'print "foo\n"'; - treeMap($plantree, - 'pre_slice($node, $ctx); ', - 'post_slice($node, $ctx); ', - $ctx); - - calc_color_rank($ctx); - - treeMap($plantree, - 'sharedscan_fixup($node, $ctx); ', - undef, - $ctx); - - # always label the left/right sides of nested loop - treeMap($plantree, - 'nestedloop_fixup($node, $ctx); ', - undef, - $ctx); - - if (defined($glob_edge) && length($glob_edge)) - { - treeMap($plantree, - 'get_rows_out($node, $ctx, $glob_edge); ', - undef, - $ctx); - } - - my $dotimeline = $glob_timeline; - - makedotfile($plantree, $time_list, $dotimeline, $plan_num, $parse_ctx, - $direction); -} - - -sub dotkid -{ - my $node = shift; - - # XXX XXX: olap fixup - don't label duplicate multi slice motion nodes - return - if (exists($node->{MultiSliceMotionDuplicate})); - - # XXX XXX: olap fixup - have children of primary sharedscan - # point to this node - if (exists($node->{SharedScanDuplicate})) - { - for my $kid (@{$node->{SharedScanDuplicate}->{child}}) - { - print '"' . $kid->{id} . '" -> "' . $node->{id} . '"' . ";\n"; - } - } - - my $docrunch = 2; - - if (exists($node->{child})) - { - if (($docrunch != 0 ) && (scalar(@{$node->{child}} > 10))) - { - my $maxi = scalar(@{$node->{child}}); - - $maxi -= 2; - - for my $ii (2..$maxi) - { - $node->{child}->[$ii]->{crunchme} = 1; - } - - if ($docrunch == 2) - { - splice(@{$node->{child}}, 3, ($maxi-2)); - - $node->{child}->[2]->{short} = "... removed " . ($maxi - 3) . " nodes ..."; - } - - - } - - for my $kid (@{$node->{child}}) - { - my $edge_label = ""; - - print '"' . $kid->{id} . '" -> "' . $node->{id} . '"'; - - if (exists($kid->{nested_loop_position})) - { - $edge_label .= $kid->{nested_loop_position}; - } - - if (exists($kid->{rows_out})) - { - $edge_label .= " "; - $edge_label .= " " - if (length($edge_label)); - $edge_label .= $kid->{rows_out}; - } - - if (length($edge_label)) - { - print ' [label="' . $edge_label . '" ] '; - } - - print ";\n"; - } - - for my $kid (@{$node->{child}}) - { - dotkid($kid); - } - - } - -} - -sub dotlabel_detail -{ - my $node = shift; - -# return $node->{short} ; - - my $outi = $node->{short}; - - my ($frst, $last) = (" ", " "); - - if (exists($node->{to_end})) - { - $last = "end: " . $node->{to_end}; - } - if (exists($node->{to_first})) - { - $frst = "first row: " . $node->{to_first}; - } - - my $slice = $node->{slice}; - $slice = " " - unless (defined($slice)); - - - if ((length($frst) > 1) || (length($last) > 1)) - { - my $memstuff = ""; - - # add memory statistics if have them... - if (exists($node->{memory})) - { - $memstuff = " | { {" . $node->{memory} . "} } "; - # make multiline - split on comma and "Work_mem" - # (using the vertical bar formatting character) - $memstuff =~ s/\,/\,\| /gm; - $memstuff =~ s/Work\_mem/\| Work\_mem/gm; - } - -# $outi .= " | { " . join(" | " , $frst, $last) . " } "; - $outi .= " | { " . join(" | " , $slice, $frst, $last) . " } " . $memstuff; - - # wrapping with braces changes record organization to vertical - $outi = "{ " . $outi . " } "; - } - - - return $outi; -} - - -sub dotlabel -{ - my $node = shift; - - # XXX XXX: olap fixup - don't label duplicate multi slice motion nodes - return - if (exists($node->{MultiSliceMotionDuplicate})); - - my $colortable = $glob_coltab{$glob_colorscheme}; - - my $color = scalar(@{$colortable}); - $color = $node->{slice} if (exists($node->{slice})); - $color =~ s/slice//; - - $color = ($color) % (scalar(@{$colortable})); - - # build list of node attributes - my @attrlist; - push @attrlist, "shape=record"; -# push @attrlist, "shape=polygon"; -# push @attrlist, "peripheries=2"; - -# push @attrlist, 'fontcolor=white'; - - push @attrlist, 'label="' . dotlabel_detail($node) .'"'; - push @attrlist, 'style=filled'; -# push @attrlist, 'style="filled,bold"'; -# push @attrlist, "color=" . $colortable->[$color]; -# push @attrlist, "fillcolor=" . $colortable->[$color]; - - if (exists($node->{color_rank})) # color by statistical ranking - { - my $edgecol = $glob_divcol{rdbu11}->[$node->{color_rank}]; - my $fillcol = $colortable->[$color]; - - if (defined($glob_statcolor)) - { - if ($glob_statcolor =~ m/^t$/i) - { - # show timing color only - $fillcol = $edgecol; - } - if ($glob_statcolor =~ m/^st/i) - { - # edge is slice color, fill is time stats - # invert the selection - ($edgecol, $fillcol) = ($fillcol, $edgecol); - } - } - - push @attrlist, 'style="filled,setlinewidth(6)"'; - push @attrlist, "color=\"" . $edgecol . '"'; - - push @attrlist, "fillcolor=\"" . $fillcol . '"'; - } - else - { - push @attrlist, "color=\"" . $colortable->[$color] . '"'; - push @attrlist, "fillcolor=\"" . $colortable->[$color] . '"'; - } - - if (exists($node->{crunchme})) - { - @attrlist = (); - -# push @attrlist, 'style=filled'; - push @attrlist, 'style=filled'; - push @attrlist, "color=\"" . $colortable->[$color] . '"'; - push @attrlist, "fillcolor=\"" . $colortable->[$color] . '"'; -# push @attrlist, "shape=circle"; - push @attrlist, "label=\"" . $node->{short} . '"'; -# push @attrlist, "fontsize=1"; -# push @attrlist, "height=0.01"; -# push @attrlist, "width=0.01"; -# push @attrlist, "height=0.12"; -# push @attrlist, "width=0.12"; - - print '"' . $node->{id} . '" [' . join(", ", @attrlist) . '];' . "\n" ; - } - else - { - print '"' . $node->{id} . '" [' . join(", ", @attrlist) . '];' . "\n" ; - } - - if (exists($node->{child})) - { - for my $kid (@{$node->{child}}) - { - dotlabel($kid); - } - - } - -} - - -sub makedotfile -{ - my ($plantree, $time_list, $do_timeline, $plan_num, $parse_ctx, - $direction) = @_; - -# print "\n\ndigraph plan1 { ranksep=.75; size = \"7.5,7.5\";\n\n \n"; - print "\n\ndigraph plan$plan_num { \n"; - -# print "graph [bgcolor=black];\n edge [style=bold, color=white];\n"; -# print "graph [bgcolor=black];\n edge [style=dashed, color=white];\n"; -# print "graph [bgcolor=black];\n edge [style=dotted, color=white];\n"; - - if ($do_timeline && scalar(@{$time_list})) - { - print " ranksep=.75; size = \"7.5,7.5\";\n\n \n"; - print " {\n node [shape=plaintext, fontsize=16];\n"; - print "/* the time-line graph */\n"; - - print join(' -> ', @{$time_list} ), ";\n"; - print "}\n"; - - print "node [shape=box];\n"; - - while ( my ($kk, $vv) = each(%{$parse_ctx->{h_to_startoff}})) - { - print '{ rank = same; ' . $kk . '; ' . join("; ", @{$vv}) . "; }\n"; - } - - } - - print "rankdir=$direction;\n"; - - dotkid($plantree); - - dotlabel($plantree); - - print "\n}\n"; - -} - - - -exit(); diff --git a/src/test/tinc/ext/gpdiff.pl b/src/test/tinc/ext/gpdiff.pl deleted file mode 100755 index 91e992614d9e01b3ddf447691cb591111fba6028..0000000000000000000000000000000000000000 --- a/src/test/tinc/ext/gpdiff.pl +++ /dev/null @@ -1,544 +0,0 @@ -#!/usr/bin/env perl -# -# $Header: //cdb2/feature/PARISTX/ORCA/cdb-pg/src/test/regress/gpdiff.pl#1 $ -# -# Copyright (c) 2007, 2008, 2009 GreenPlum. All rights reserved. -# Author: Jeffrey I Cohen -# -# -use Pod::Usage; -use strict; -use warnings; -use POSIX; -use File::Spec; -use Config; - -=head1 NAME - -B - GreenPlum diff - -=head1 SYNOPSIS - -B [options] logfile [logfile...] - -Options: - -Normally, gpdiff takes the standard "diff" options and passes them -directly to the diff program. Try `diff --help' for more information -on the standard options. The following options are specific to gpdiff: - - -help brief help message - -man full documentation - -version print gpdiff version and underlying diff version - -gpd_ignore_headers ignore header lines in query output - -gpd_ignore_plans ignore explain plan content in input files - -gpd_init load initialization file - -=head1 OPTIONS - -=over 8 - -=item B<-help> - - Print a brief help message and exits. - -=item B<-man> - - Prints the manual page and exits. - -=item B<-version> - - Prints the gpdiff version and underlying diff version - -=item B<-gpd_ignore_headers> - -gpdiff/atmsort expect Postgresql "psql-style" output for SELECT -statements, with a two line header composed of the column names, -separated by vertical bars (|), and a "separator" line of dashes and -pluses beneath, followed by the row output. The psql utility performs -some formatting to adjust the column widths to match the size of the -row output. Setting this parameter causes gpdiff to ignore any -differences in the column naming and format widths globally. - -=item B<-gpd_ignore_plans> - -Specify this option to ignore any explain plan diffs between the -input files. This will completely ignore any plan content in -the input files thus masking differences in plans between the input files. - -=item B<-gpd_init> - -Specify an initialization file containing a series of directives -(mainly for match_subs) that get applied to the input files. To -specify multiple initialization files, use multiple gpd_init arguments, eg: - - -gpd_init file1 -gpd_init file2 - -=back - -=head1 DESCRIPTION - -gpdiff compares files using diff after processing them with atmsort.pl. -This comparison is designed to ignore certain Greenplum-specific -informational messages, as well as handle the cases where query output -order may differ for a multi-segment Greenplum database versus a -single Postgresql instance. Type "atmsort.pl --man" for more details. -gpdiff is invoked by pg_regress as part of "make install-check". -In this case the diff options are something like: - - "-w -I NOTICE: -I HINT: -I CONTEXT: -I GP_IGNORE:". - -Like diff, gpdiff can compare two files, a file and directory, a -directory and file, and two directories. However, when gpdiff compares -two directories, it only returns the exit status of the diff -comparison of the final two files. - -=head1 BUGS - -While the exit status is set correctly for most cases, -STDERR messages from diff are not displayed. - -Also, atmsort cannot handle "unsorted" SELECT queries where the output -has strings with embedded newlines or pipe ("|") characters due to -limitations with the parser in the "tablelizer" function. Queries -with these characteristics must have an ORDER BY clause to avoid -potential erroneous comparison. - - -=head1 AUTHORS - -Jeffrey I Cohen - -Copyright (c) 2007, 2008, 2009 GreenPlum. All rights reserved. - -Address bug reports and comments to: jcohen@greenplum.com - - -=cut - -our $ATMSORT; -our $ATMDIFF = "diff"; - -my $glob_ignore_headers; -my $glob_ignore_plans; -my $glob_init_file = []; - -# assume atmsort.pl in same directory -BEGIN -{ - my $plname = $0; - my @foo = File::Spec->splitpath(File::Spec->rel2abs($plname)); - return 0 unless (scalar(@foo)); - pop @foo; - $ATMSORT = File::Spec->catfile( @foo, "atmsort.pl"); - - $glob_init_file = []; -} - -sub gpdiff_files -{ - my ($f1, $f2, $d2d) = @_; - - my $need_equiv = 0; - - my @tmpfils; - - # need gnu diff on solaris - if ($Config{'osname'} =~ m/solaris|sunos/i) - { - $ATMDIFF = "gdiff"; - } - - for my $ii (1..2) - { - my $tmpnam; - - for (;;) - { - my $tmpfh; - - $tmpnam = tmpnam(); - sysopen($tmpfh, $tmpnam, O_RDWR | O_CREAT | O_EXCL) && last; - } - - push @tmpfils, $tmpnam; - - } - - my $newf1 = shift @tmpfils; - my $newf2 = shift @tmpfils; - -# print $ATMSORT, "\n"; - - if (defined($d2d) && exists($d2d->{equiv})) - { - # assume f1 and f2 are the same... - system "$ATMSORT --do_equiv=compare < $f1 > $newf1"; - system "$ATMSORT --do_equiv=make < $f2 > $newf2"; - } - else - { - system "$ATMSORT < $f1 > $newf1"; - system "$ATMSORT < $f2 > $newf2"; - } - - my $args = join(" ", @ARGV, $newf1, $newf2); - -# print "args: $args\n"; - - my $outi =`$ATMDIFF $args`; - - my $stat = $? >> 8; # diff status - - unless (defined($d2d) && exists($d2d->{equiv})) - { - # check for start_equiv unless already doing equiv check - - # get the count of matching lines - my $grepout = `grep -c start_equiv $f1`; - chomp $grepout; - - $need_equiv = $grepout; -# $need_equiv = 0; - - if ($need_equiv) - { - $d2d = {} unless (defined($d2d)); - $d2d->{dir} = 1; - } - } - - # prefix the diff output with the files names for a "directory to - # directory" diff - if (defined($d2d) && length($outi)) - { - if (exists($d2d->{equiv})) - { - $outi = "$ATMDIFF $f1 $f2" . ".equiv\n" . $outi; - } - else - { - $outi = "$ATMDIFF $f1 $f2\n" . $outi; - } - } - - # replace temp file name references with actual file names - $outi =~ s/$newf1/$f1/gm; - $outi =~ s/$newf2/$f2/gm; - - print $outi; - -#my $stat = WEXITVALUE($?); # diff status - - unlink $newf1; - unlink $newf2; - - if ($need_equiv) - { - my $new_d2d = {}; - $new_d2d->{equiv} = 1; - - # call recursively if need to perform equiv comparison. - - my $stat1 = gpdiff_files($f1, $f1, $new_d2d); - my $stat2 = gpdiff_files($f2, $f2, $new_d2d); - - $stat = $stat1 if ($stat1); - $stat = $stat2 if ($stat2); - } - - return ($stat); - -} - -sub filefunc -{ - my ($f1, $f2, $d2d) = @_; - - if ((-f $f1) && (-f $f2)) - { - return (gpdiff_files($f1, $f2, $d2d)); - } - - # if f1 is a directory, do the filefunc of every file in that directory - if ((-d $f1) && (-d $f2)) - { - my $dir = $f1; - my ($dir_h, $stat); - - if ( opendir($dir_h, $dir) ) - { - my $fnam; - while ($fnam = readdir($dir_h)) - { - # ignore ., .. - next - unless ($fnam !~ m/^(\.)(\.)*$/); - - my $absname = - File::Spec->rel2abs( - File::Spec->catfile( - $dir, - $fnam - )); - - # specify that is a directory comparison - $d2d = {} unless (defined($d2d)); - $d2d->{dir} = 1; - $stat = filefunc($absname, $f2, $d2d); - } # end while - closedir $dir_h; - } # end if open - return $stat; - } - - # if f2 is a directory, find the corresponding file in that directory - if ((-f $f1) && (-d $f2)) - { - my $stat; - my @foo = File::Spec->splitpath($f1); - - return 0 - unless (scalar(@foo)); - my $basenam = $foo[-1]; - - my $fnam = - File::Spec->rel2abs( - File::Spec->catfile( - $f2, - $basenam - )); - - $stat = filefunc($f1, $fnam, $d2d); - - return $stat; - } - - # find f2 in dir f1 - if ((-f $f2) && (-d $f1)) - { - my $stat; - my @foo = File::Spec->splitpath($f2); - - return 0 - unless (scalar(@foo)); - my $basenam = $foo[-1]; - - my $fnam = - File::Spec->rel2abs( - File::Spec->catfile( - $f1, - $basenam - )); - - $stat = filefunc($fnam, $f2, $d2d); - - return $stat; - } - - return 0; -} - -if (1) -{ - my $man = 0; - my $help = 0; - my $verzion = 0; - my $pmsg = ""; - my @arg2; # arg list for diff - my %init_dup; - -# getatm(); - - # check for man or help args - if (scalar(@ARGV)) - { - my $argc = -1; - my $maxarg = scalar(@ARGV); - - while (($argc+1) < $maxarg) - { - $argc++; - my $arg = $ARGV[$argc]; - - unless ($arg =~ m/^\-/) - { - # even if no dash, might be a value for a dash arg... - push @arg2, $arg; - next; - } - - # ENGINF-180: ignore header formatting - if ($arg =~ - m/^\-(\-)*(gpd\_ignore\_headers)$/i) - { - $glob_ignore_headers = 1; - next; - } - if ($arg =~ - m/^\-(\-)*(gpd\_ignore\_plans)$/i) - { - $glob_ignore_plans = 1; - next; - } - if ($arg =~ - m/^\-(\-)*(gpd\_init|gp\_init)(\=)*(.*)$/i) - { - if ($arg =~ m/\=/) # check if "=filename" - { - my @foo = split (/\=/, $arg, 2); - - die "no init file" - unless (2 == scalar(@foo)); - - my $init_file = pop @foo; - - # ENGINF-200: allow multiple init files - if (exists($init_dup{$init_file})) - { - warn "duplicate init file \'$init_file\', skipping..."; - } - else - { - push @{$glob_init_file}, $init_file; - - $init_dup{$init_file} = 1; - } - - } - else # next arg must be init file - { - $argc++; - - die "no init file" - unless (defined($ARGV[$argc])); - - my $init_file = $ARGV[$argc]; - - # ENGINF-200: allow multiple init files - if (exists($init_dup{$init_file})) - { - warn "duplicate init file \'$init_file\', skipping..."; - } - else - { - push @{$glob_init_file}, $init_file; - - $init_dup{$init_file} = 1; - } - - } - next; - } - if ($arg =~ m/^\-(\-)*(v|version)$/) - { - $verzion = 1; - next; - } - if ($arg =~ m/^\-(\-)*(man|help|\?)$/i) - { - if ($arg =~ m/man/i) - { - $man = 1; - } - else - { - $help = 1; - } - next; - } - - # put all "dash" args on separate list for diff - push @arg2, $arg; - - } # end for - } - else - { - $pmsg = "missing an operand after \`gpdiff\'"; - $help = 1; - } - - if ((1 == scalar(@ARGV)) && (!($help || $man || $verzion))) - { - $pmsg = "unknown operand: $ARGV[0]"; - $help = 1; - } - - if ($verzion) - { - my $VERSION = do { my @r = (q$Revision: #1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; # must be all one line, for MakeMaker - - # need gnu diff on solaris - if ($Config{'osname'} =~ m/solaris|sunos/i) - { - $ATMDIFF = "gdiff"; - } - my $whichdiff = `which $ATMDIFF`; - chomp $whichdiff; - print "$0 version $VERSION\n"; - print "Type \'gpdiff.pl --help\' for more information on the standard options\n"; - print "$0 calls the \"", $whichdiff, "\" utility:\n\n"; - - my $outi = `$ATMDIFF -v`; - - $outi =~ s/^/ /gm; - - print $outi, "\n"; - - exit(1); - } - - pod2usage(-msg => $pmsg, -exitstatus => 1) if $help; - pod2usage(-msg => $pmsg, -exitstatus => 0, -verbose => 2) if $man; - - my $f2 = pop @ARGV; - my $f1 = pop @ARGV; - - for my $fname ($f1, $f2) - { - unless (-e $fname) - { - print STDERR "gpdiff: $fname: No such file or directory\n"; - } - } - exit(2) - unless ((-e $f1) && (-e $f2)); - - # use the "stripped" arg list for diff - @ARGV = (); - - # remove the filenames - pop @arg2; - pop @arg2; - - push(@ARGV, @arg2); - - # ENGINF-180: tell atmsort to ignore header formatting (globally) - if ($glob_ignore_headers) - { - $ATMSORT .= " --ignore_headers "; - } - - # Tell atmsort to ignore plan content if -gpd_ignore_plans is set - if ($glob_ignore_plans) - { - $ATMSORT .= " --ignore_plans "; - } - - # ENGINF-200: allow multiple init files - if (defined($glob_init_file) && scalar(@{$glob_init_file})) - { - # MPP-12262: test here, because we don't get status for atmsort call - for my $init_file (@{$glob_init_file}) - { - die "no such file: $init_file" - unless (-e $init_file); - } - - $ATMSORT .= " --init=". join(" --init=", @{$glob_init_file}) . " "; - } - - - exit(filefunc($f1, $f2)); -}