diff --git a/src/backend/gpopt/utils/COptTasks.cpp b/src/backend/gpopt/utils/COptTasks.cpp index 119fefa892fa5ea787a9b6d6b87dfde9e0273ac8..d1ab7833f0d5d5afe48adcd6a73458cc1a53d4bf 100644 --- a/src/backend/gpopt/utils/COptTasks.cpp +++ b/src/backend/gpopt/utils/COptTasks.cpp @@ -477,7 +477,7 @@ ICostModel * COptTasks::GetCostModel(CMemoryPool *mp, ULONG num_segments) { ICostModel *cost_model = NULL; - if (OPTIMIZER_GPDB_CALIBRATED >= optimizer_cost_model) + if (optimizer_cost_model >= OPTIMIZER_GPDB_CALIBRATED) { cost_model = GPOS_NEW(mp) CCostModelGPDB(mp, num_segments); } diff --git a/src/backend/gporca/data/dxl/minidump/BTreeIndex-Against-InListLarge.mdp b/src/backend/gporca/data/dxl/minidump/BTreeIndex-Against-InListLarge.mdp new file mode 100644 index 0000000000000000000000000000000000000000..cdc7eecb463041c518b03bf0a30e7ac1b1ba61be --- /dev/null +++ b/src/backend/gporca/data/dxl/minidump/BTreeIndex-Against-InListLarge.mdp @@ -0,0 +1,668 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/backend/gporca/data/dxl/minidump/BitmapIndexScanChooseIndex.mdp b/src/backend/gporca/data/dxl/minidump/BitmapIndexScanChooseIndex.mdp index e50d9bb1677d159e9dfcde060015925cc57c357e..ee67101f215cf81169fdf206b576c9f8bd5851fb 100644 --- a/src/backend/gporca/data/dxl/minidump/BitmapIndexScanChooseIndex.mdp +++ b/src/backend/gporca/data/dxl/minidump/BitmapIndexScanChooseIndex.mdp @@ -608,7 +608,7 @@ - + @@ -619,7 +619,7 @@ - + diff --git a/src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp b/src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp index 3aa8f79af0cb69a99a8d4b9cbbc24ed29c90f8a2..c42b21ed64f37b7839a27b8bd9c6500cea53b4a0 100644 --- a/src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp +++ b/src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp @@ -26,6 +26,7 @@ #include "gpopt/operators/CPhysicalMotion.h" #include "gpopt/operators/CPhysicalPartitionSelector.h" #include "gpopt/operators/CPredicateUtils.h" +#include "gpopt/operators/CScalarBitmapIndexProbe.h" #include "naucrates/statistics/CStatisticsUtils.h" #include "gpopt/operators/CExpression.h" #include "gpdbcost/CCostModelGPDB.h" @@ -1618,6 +1619,18 @@ CCostModelGPDB::CostBitmapTableScan(CMemoryPool *mp, CExpressionHandle &exprhdl, CColRefSet *pcrsUsed = pexprIndexCond->DeriveUsedColumns(); CColRefSet *outerRefs = exprhdl.DeriveOuterReferences(); CColRefSet *pcrsLocalUsed = GPOS_NEW(mp) CColRefSet(mp, *pcrsUsed); + IMDIndex::EmdindexType indexType = IMDIndex::EmdindSentinel; + + if (COperator::EopScalarBitmapIndexProbe == pexprIndexCond->Pop()->Eopid()) + { + indexType = CScalarBitmapIndexProbe::PopConvert(pexprIndexCond->Pop()) + ->Pindexdesc() + ->IndexType(); + } + + BOOL isInPredOnBtreeIndex = + (IMDIndex::EmdindBtree == indexType && + COperator::EopScalarArrayCmp == (*pexprIndexCond)[0]->Pop()->Eopid()); // subtract outer references from the used colrefs, so we can see // how many colrefs are used for this table @@ -1632,9 +1645,17 @@ CCostModelGPDB::CostBitmapTableScan(CMemoryPool *mp, CExpressionHandle &exprhdl, if (COperator::EopScalarBitmapIndexProbe != pexprIndexCond->Pop()->Eopid() || - 1 < pcrsLocalUsed->Size()) + 1 < pcrsLocalUsed->Size() || + (isInPredOnBtreeIndex && rows > 2.0 && + !GPOS_FTRACE(EopttraceCalibratedBitmapIndexCostModel))) { - // child is Bitmap AND/OR, or we use Multi column index + // Child is Bitmap AND/OR, or we use Multi column index or this is an IN predicate + // that's used with the "calibrated" cost model. + // Handling the IN predicate in this code path is to avoid plan regressions from + // earlier versions of the code that treated IN predicates like ORs and therefore + // also handled them in this code path. This is especially noticeable for btree + // indexes that often have a high NDV, because the small/large NDV cost model + // produces very high cost for cases with a higher NDV. const CDouble dIndexFilterCostUnit = pcmgpdb->GetCostModelParams() ->PcpLookup(CCostModelParamsGPDB::EcpIndexFilterCostUnit) @@ -1671,6 +1692,11 @@ CCostModelGPDB::CostBitmapTableScan(CMemoryPool *mp, CExpressionHandle &exprhdl, // if the expression is const table get, the pcrsUsed is empty // so we use minimum value MinDistinct for dNDV in that case. CDouble dNDV = CHistogram::MinDistinct; + CDouble dNDVThreshold = + pcmgpdb->GetCostModelParams() + ->PcpLookup(CCostModelParamsGPDB::EcpBitmapNDVThreshold) + ->Get(); + if (rows < 1.0) { // if we aren't accessing a row every rebind, then don't charge a cost for those cases where we don't have a row @@ -1698,10 +1724,7 @@ CCostModelGPDB::CostBitmapTableScan(CMemoryPool *mp, CExpressionHandle &exprhdl, if (!GPOS_FTRACE(EopttraceCalibratedBitmapIndexCostModel)) { - CDouble dNDVThreshold = - pcmgpdb->GetCostModelParams() - ->PcpLookup(CCostModelParamsGPDB::EcpBitmapNDVThreshold) - ->Get(); + // optimizer_cost_model = 'calibrated' if (dNDVThreshold <= dNDV) { result = CostBitmapLargeNDV(pcmgpdb, pci, dNDV); @@ -1713,44 +1736,66 @@ CCostModelGPDB::CostBitmapTableScan(CMemoryPool *mp, CExpressionHandle &exprhdl, } else { + // optimizer_cost_model = 'experimental' CDouble dBitmapIO = pcmgpdb->GetCostModelParams() ->PcpLookup(CCostModelParamsGPDB::EcpBitmapIOCostSmallNDV) ->Get(); - CDouble dInitScan = + CDouble c5_dInitScan = pcmgpdb->GetCostModelParams() ->PcpLookup(CCostModelParamsGPDB::EcpInitScanFactor) ->Get(); + CDouble c3_dBitmapPageCost = + pcmgpdb->GetCostModelParams() + ->PcpLookup(CCostModelParamsGPDB::EcpBitmapPageCost) + ->Get(); + BOOL isAOTable = CPhysicalScan::PopConvert(exprhdl.Pop()) + ->Ptabdesc() + ->IsAORowOrColTable(); + + // some cost constants determined with the cal_bitmap_test.py script + CDouble c1_cost_per_row(0.03); + CDouble c2_cost_per_byte(0.0001); + CDouble bitmap_union_cost_per_distinct_value(0.000027); + CDouble init_cost_advantage_for_bitmap_scan(0.9); - if (1 < pcrsUsed->Size()) // it is a join + if (IMDIndex::EmdindBtree == indexType) { - // The numbers below were experimentally determined using regression analysis in the cal_bitmap_test.py script - // The following dSizeCost is in the form C1 * rows + C2 * rows * width. This is because the width should have - // significantly less weight than rows as the execution time does not grow as fast in regards to width - CDouble dSizeCost = - rows * (1 + std::max(width * 0.005, 1.0)) * 0.05; - result = CCost( // cost for each byte returned by the index scan plus cost for incremental rebinds - pci->NumRebinds() * (dBitmapIO * dSizeCost + dInitRebind) + - // the BitmapPageCost * dNDV takes into account the idea of multiple tuples being on the same page. - // If you have a small NDV, the likelihood of multiple tuples matching on one page is high and so the - // page cost is reduced. Even though the page cost will decrease, the cost of accessing each tuple will - // dominate. Likewise, if the NDV is large, the num of tuples matching per page is lower so the page - // cost should be higher - dInitScan * dNDV); + // btree indexes are not sensitive to the NDV, since they don't have any bitmaps + c3_dBitmapPageCost = 0.0; } - else + + // Give the index scan a small initial advantage over the table scan, so we use indexes + // for small tables - this should avoid having table scan and index scan costs being + // very close together for many small queries. + c5_dInitScan = c5_dInitScan * init_cost_advantage_for_bitmap_scan; + + // The numbers below were experimentally determined using regression analysis in the cal_bitmap_test.py script + // The following dSizeCost is in the form C1 * rows + C2 * rows * width. This is because the width should have + // significantly less weight than rows as the execution time does not grow as fast in regards to width + CDouble dSizeCost = dBitmapIO * (rows * c1_cost_per_row + + rows * width * c2_cost_per_byte); + + CDouble bitmapUnionCost = 0; + + if (!isAOTable && indexType == IMDIndex::EmdindBitmap && dNDV > 1.0) { - // The numbers below were experimentally determined using regression analysis in the cal_bitmap_test.py script - CDouble dSizeCost = - rows * (1 + std::max(width * 0.005, 1.0)) * 0.001; - - result = - CCost( // cost for each byte returned by the index scan plus cost for incremental rebinds - pci->NumRebinds() * - (dBitmapIO * dSizeCost + 10 * dInitRebind) * dNDV + - // similar to above, the dInitScan * dNDV takes into account the likelihood of multiple tuples per page - dInitScan * dNDV); + CDouble baseTableRows = CPhysicalScan::PopConvert(exprhdl.Pop()) + ->PstatsBaseTable() + ->Rows(); + + // for bitmap index scans on heap tables, we found that there is an additional cost + // associated with unioning them that is proportional to the number of bitmaps involved + // (dNDV-1) times the width of the bitmap (proportional to the number of rows in the table) + bitmapUnionCost = std::max(0.0, dNDV.Get() - 1.0) * + baseTableRows * + bitmap_union_cost_per_distinct_value; } + + result = CCost(pci->NumRebinds() * + (dSizeCost + dNDV * c3_dBitmapPageCost + + dInitRebind + bitmapUnionCost) + + c5_dInitScan); } } diff --git a/src/backend/gporca/libgpdbcost/src/CCostModelParamsGPDB.cpp b/src/backend/gporca/libgpdbcost/src/CCostModelParamsGPDB.cpp index a176a29bc7ae2c48830fa088ae90dfbe6de9e36e..c7b9c65f47c0ddb01b67b7508822a6417f94ebaf 100644 --- a/src/backend/gporca/libgpdbcost/src/CCostModelParamsGPDB.cpp +++ b/src/backend/gporca/libgpdbcost/src/CCostModelParamsGPDB.cpp @@ -169,7 +169,7 @@ const CDouble CCostModelParamsGPDB::DBitmapPageCostLargeNDV(83.1651); const CDouble CCostModelParamsGPDB::DBitmapPageCostSmallNDV(204.3810); // default bitmap page cost with no assumption about NDV -const CDouble CCostModelParamsGPDB::DBitmapPageCost(50.4381); +const CDouble CCostModelParamsGPDB::DBitmapPageCost(10); // default threshold of NDV for bitmap costing const CDouble CCostModelParamsGPDB::DBitmapNDVThreshold(200); diff --git a/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp b/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp index 781f180d9b6682b321ca92d7a9e185076df89f29..3fd7ede9060b79d9dd0fc40a1685a23d0690158c 100644 --- a/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp +++ b/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp @@ -76,7 +76,7 @@ using namespace gpopt; // predicates less selective than this threshold // (selectivity is greater than this number) lead to // disqualification of a btree index on an AO table -#define AO_TABLE_BTREE_INDEX_SELECTIVITY_THRESHOLD 0.05 +#define AO_TABLE_BTREE_INDEX_SELECTIVITY_THRESHOLD 0.10 //--------------------------------------------------------------------------- // @function: diff --git a/src/backend/gporca/scripts/cal_bitmap_test.py b/src/backend/gporca/scripts/cal_bitmap_test.py index 0ba9cc754b74df43b3b066d801252107ee3cc23d..9c52a7538644b3d4644518ed6e941f3e5dd01563 100755 --- a/src/backend/gporca/scripts/cal_bitmap_test.py +++ b/src/backend/gporca/scripts/cal_bitmap_test.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Optimizer calibration test for bitmap indexes # @@ -25,10 +25,12 @@ import argparse import time import re import math +import os +import sys try: from gppylib.db import dbconn -except ImportError, e: +except ImportError as e: sys.exit('ERROR: Cannot import modules. Please check that you have sourced greenplum_path.sh. Detail: ' + str(e)) # constants @@ -64,20 +66,27 @@ NL_JOIN = "nl_join" NL_JOIN_PATTERN = r"Nested Loop" NL_JOIN_PATTERN_V5 = r"Nested Loop" -OPTIMIZER_DEFAULT_PLAN = "optimizer" - +FALLBACK_PLAN = "fallback" +FALLBACK_PATTERN = "Postgres query optimizer" +FALLBACK_PATTERN_V5 = "legacy query optimizer" +OPTIMIZER_DEFAULT_PLAN = "optimizer" # global variables # ----------------------------------------------------------------------------- -glob_verbose=False +# constants # only consider optimizer errors beyond x * sigma (standard deviation) as significant -glob_sigma_diff=3 -glob_log_file=None -glob_exe_timeout=40000 -glob_gpdb_major_version=7 +glob_sigma_diff = 3 +glob_log_file = None +glob_exe_timeout = 40000 +glob_gpdb_major_version = 7 +glob_dim_table_rows = 10000 +# global variables that may be modified +glob_verbose = False +glob_rowcount = -1 +glob_appendonly = False # SQL statements, DDL and DML # ----------------------------------------------------------------------------- @@ -119,42 +128,41 @@ DISTRIBUTED BY (id); """ _with_appendonly = """ -WITH (appendonly=true, compresslevel=5, compresstype=zlib) +WITH (appendonly=true) """ -_create_other_tables = [ """ +_create_other_tables = [""" CREATE TABLE cal_temp_ids(f_id int, f_rand double precision) DISTRIBUTED BY (f_id); """, - """ + """ CREATE TABLE cal_dim(dim_id int, dim_id2 int, txt text) DISTRIBUTED BY (dim_id); """, -""" + """ CREATE TABLE cal_bfv_dim (id integer, col2 integer) DISTRIBUTED BY (id); -""" ] +"""] # insert into temp table. Parameters: -# - integer start value (usually 0 or 1) -# - integer stop value (suggested value is 10000000) +# - integer stop value (suggested value is 10,000,000) _insert_into_temp = """ -INSERT INTO cal_temp_ids SELECT x, random() FROM (SELECT * FROM generate_series(%d,%d)) T(x); +INSERT INTO cal_temp_ids SELECT x, random() FROM (SELECT * FROM generate_series(1,%d)) T(x); """ _insert_into_table = """ INSERT INTO cal_txtest SELECT f_id, f_id, - f_id%10, - f_id%100, - f_id%1000, - f_id%10000, - f_id%10, - f_id%100, - f_id%1000, - f_id%10000, - repeat('a', 900) + f_id%10 + 1, + f_id%100 + 1, + f_id%1000 + 1, + f_id%10000 + 1, + f_id%10 + 1, + f_id%100 + 1, + f_id%1000 + 1, + f_id%10000 + 1, + repeat('a', 960) FROM cal_temp_ids order by f_rand; """ @@ -166,49 +174,49 @@ INSERT INTO cal_dim SELECT x, x, repeat('d', 100) FROM (SELECT * FROM generate_s _create_index_arr = [""" CREATE INDEX cal_txtest_i_bitmap_10 ON cal_txtest USING bitmap(bitmap10); """, - """ + """ CREATE INDEX cal_txtest_i_bitmap_100 ON cal_txtest USING bitmap(bitmap100); """, - """ + """ CREATE INDEX cal_txtest_i_bitmap_1000 ON cal_txtest USING bitmap(bitmap1000); """, - """ + """ CREATE INDEX cal_txtest_i_bitmap_10000 ON cal_txtest USING bitmap(bitmap10000); """, - ] + ] _create_bfv_index_arr = [""" CREATE INDEX idx_cal_bfvtest_bitmap ON cal_bfvtest USING bitmap(id); """, - ] + ] _create_ndv_index_arr = [""" CREATE INDEX cal_ndvtest_bitmap ON cal_ndvtest USING bitmap(val); """, - ] + ] -_create_btree_indexes_ao_arr = [""" +_create_btree_indexes_arr = [""" CREATE INDEX cal_txtest_i_btree_unique ON cal_txtest USING btree(btreeunique); """, - """ + """ CREATE INDEX cal_txtest_i_btree_10 ON cal_txtest USING btree(btree10); """, - """ + """ CREATE INDEX cal_txtest_i_btree_100 ON cal_txtest USING btree(btree100); """, - """ + """ CREATE INDEX cal_txtest_i_btree_1000 ON cal_txtest USING btree(btree1000); """, - """ + """ CREATE INDEX cal_txtest_i_btree_10000 ON cal_txtest USING btree(btree10000); """, - """ + """ CREATE INDEX idx_cal_bfvtest_btree ON cal_bfvtest USING btree(id); """, - """ + """ CREATE INDEX cal_ndvtest_btree ON cal_ndvtest USING btree(val); """, -] + ] _analyze_table = """ ANALYZE cal_txtest; @@ -222,120 +230,60 @@ _allow_system_mods_v5 = """ SET allow_system_table_mods to 'dml'; """ -# Make sure pg_statistics has smooth and precise statistics, so that the cardinality estimates we get are very precise -# -# For NDVs of 100 or less, list all of them -# For NDVs of more than 100, make some dummy NDVs and 5 intervals of the same length -# So far, id and btreeunique are not yet used (staattnums 1 and 2), no stats are changed +# Make sure pg_statistics and pg_class have accurate statistics, so that the cardinality estimates we get are very precise -_fix_statistics = [""" -UPDATE pg_statistic - SET stadistinct = 10, - stakind1 = 1, - stanumbers1 = '{ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1 }', - stavalues1 = '{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}'::int[] -WHERE starelid = 'cal_txtest'::regclass AND staattnum = 3; -""", - """ -UPDATE pg_statistic - SET stadistinct = 100, - stakind1 = 1, - stanumbers1 = '{ 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01 }', - stavalues1 = '{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99 }'::int[] -WHERE starelid = 'cal_txtest'::regclass AND staattnum = 4; -""", - """ -UPDATE pg_statistic - SET stadistinct = 1000, - stakind1 = 1, - stanumbers1 = '{ 0.001, 0.001, 0.001 }', - stavalues1 = '{100, 200, 300}'::int[], - stakind2 = 2, - stanumbers2 = '{}', - stavalues2 = '{0, 199, 399, 599, 799, 999}'::int[] -WHERE starelid = 'cal_txtest'::regclass AND staattnum = 5; -""", - """ -UPDATE pg_statistic - SET stadistinct = 10000, - stakind1 = 1, - stanumbers1 = '{ 0.0001, 0.0001, 0.0001 }', - stavalues1 = '{1000, 2000, 3000}'::int[], - stakind2 = 2, - stanumbers2 = '{}', - stavalues2 = '{0, 1999, 3999, 5999, 7999, 9999}'::int[] -WHERE starelid = 'cal_txtest'::regclass AND staattnum = 6; -""", - """ -UPDATE pg_statistic - SET stadistinct = 10, - stakind1 = 1, - stanumbers1 = '{ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1 }', - stavalues1 = '{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}'::int[] -WHERE starelid = 'cal_txtest'::regclass AND staattnum = 7; -""", - """ -UPDATE pg_statistic - SET stadistinct = 100, - stakind1 = 1, - stanumbers1 = '{ 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01 }', - stavalues1 = '{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99 }'::int[] -WHERE starelid = 'cal_txtest'::regclass AND staattnum = 8; -""", - """ -UPDATE pg_statistic - SET stadistinct = 1000, - stakind1 = 1, - stanumbers1 = '{ 0.001, 0.001, 0.001 }', - stavalues1 = '{100, 200, 300}'::int[], - stakind2 = 2, - stanumbers2 = '{}', - stavalues2 = '{0, 199, 399, 599, 799, 999}'::int[] -WHERE starelid = 'cal_txtest'::regclass AND staattnum = 9; -""", - """ -UPDATE pg_statistic - SET stadistinct = 10000, - stakind1 = 1, - stanumbers1 = '{ 0.0001, 0.0001, 0.0001 }', - stavalues1 = '{1000, 2000, 3000}'::int[], - stakind2 = 2, - stanumbers2 = '{}', - stavalues2 = '{0, 1999, 3999, 5999, 7999, 9999}'::int[] -WHERE starelid = 'cal_txtest'::regclass AND staattnum = 10; -""", - """ -UPDATE pg_statistic - SET stadistinct = 10000, - stakind1 = 1, - stanumbers1 = '{ 0.0001, 0.0001, 0.0001 }', - stavalues1 = '{1000, 2000, 3000}'::int[], - stakind2 = 2, - stanumbers2 = '{}', - stavalues2 = '{0, 1999, 3999, 5999, 7999, 9999}'::int[] -WHERE starelid = 'cal_dim'::regclass AND staattnum = 1; -""", - """ +_update_pg_class = """ +UPDATE pg_class + SET reltuples = %i +WHERE relname = '%s'; +""" + +# add an MCV or histogram (stakind1 = 1 or 2) and a correlation (stakind2 = 3) value +_update_pg_stats = """ UPDATE pg_statistic - SET stadistinct = 10000, - stakind1 = 1, - stanumbers1 = '{ 0.0001, 0.0001, 0.0001 }', - stavalues1 = '{1000, 2000, 3000}'::int[], - stakind2 = 2, - stanumbers2 = '{}', - stavalues2 = '{0, 1999, 3999, 5999, 7999, 9999}'::int[] -WHERE starelid = 'cal_dim'::regclass AND staattnum = 2; -""" ] + SET stadistinct = %f, + stakind1 = %d, + stanumbers1 = %s, + stavalues1 = %s, + stakind2 = 3, + stanumbers2 = '{ %f }', + stavalues2 = NULL, + stakind3 = 0, + stanumbers3 = NULL, + stavalues3 = NULL, + stakind4 = 0, + stanumbers4 = NULL, + stavalues4 = NULL +WHERE starelid = '%s'::regclass AND staattnum = %i; +""" +# columns to fix, in the format (table name, column name, attnum, ndv, num rows) +# use -1 as the NDV for unique columns and use -1 for the variable number of rows in the fact table +_stats_cols_to_fix = [ + ('cal_txtest', 'id', 1, -1, -1), + ('cal_txtest', 'btreeunique', 2, -1, -1), + ('cal_txtest', 'btree10', 3, 10, -1), + ('cal_txtest', 'btree100', 4, 100, -1), + ('cal_txtest', 'btree1000', 5, 1000, -1), + ('cal_txtest', 'btree10000', 6, 10000, -1), + ('cal_txtest', 'bitmap10', 7, 10, -1), + ('cal_txtest', 'bitmap100', 8, 100, -1), + ('cal_txtest', 'bitmap1000', 9, 1000, -1), + ('cal_txtest', 'bitmap10000', 10, 10000, -1), + ('cal_dim', 'dim_id', 1, -1, glob_dim_table_rows), + ('cal_dim', 'dim_id2', 2, -1, glob_dim_table_rows) +] # deal with command line arguments # ----------------------------------------------------------------------------- def parseargs(): - parser = argparse.ArgumentParser(description=_help, version='1.0') + parser = argparse.ArgumentParser(description=_help) - parser.add_argument("tests", metavar="TEST", choices=[ [], "all", "none", "bitmap_scan_tests", "bitmap_join_tests" ], nargs="*", - help="Run these tests (all, bitmap_scan_tests, bitmap_join_tests), default is none") + parser.add_argument("tests", metavar="TEST", choices=[[], "all", "none", "bitmap_scan_tests", "btree_ao_scan_tests", + "bitmap_ndv_scan_tests", "index_join_tests", "bfv_join_tests"], + nargs="*", + help="Run these tests (all, none, bitmap_scan_tests, btree_ao_scan_tests, bitmap_ndv_scan_tests, index_join_tests, bfv_join_tests), default is none") parser.add_argument("--create", action="store_true", help="Create the tables to use in the test") parser.add_argument("--execute", type=int, default="0", @@ -346,14 +294,14 @@ def parseargs(): help="Print more verbose output") parser.add_argument("--logFile", default="", help="Log diagnostic output to a file") - parser.add_argument("--host", default="localhost", - help="Host to connect to (default is localhost).") + parser.add_argument("--host", default="", + help="Host to connect to (default is localhost or $PGHOST, if set).") parser.add_argument("--port", type=int, default="0", - help="Port on the host to connect to") + help="Port on the host to connect to (default is 0 or $PGPORT, if set)") parser.add_argument("--dbName", default="", help="Database name to connect to") parser.add_argument("--appendOnly", action="store_true", - help="Create an append-only table (uses only bitmap indexes). Default is a heap table") + help="Create an append-only table. Default is a heap table") parser.add_argument("--numRows", type=int, default="10000000", help="Number of rows to INSERT INTO the table (default is 10 million)") @@ -363,6 +311,7 @@ def parseargs(): args = parser.parse_args() return args, parser + def log_output(str): if glob_verbose: print(str) @@ -376,12 +325,15 @@ def log_output(str): def connect(host, port_num, db_name): try: dburl = dbconn.DbURL(hostname=host, port=port_num, dbname=db_name) - conn = dbconn.connect(dburl, encoding="UTF8") + conn = dbconn.connect(dburl, encoding="UTF8", unsetSearchPath=False) + except Exception as e: - print("Exception during connect: %s" % e) + print(("Exception during connect: %s" % e)) quit() + return conn + def select_version(conn): global glob_gpdb_major_version sqlStr = "SELECT version()" @@ -401,6 +353,7 @@ def select_version(conn): for row in rows: log_output(str(row[0])) + def execute_sql(conn, sqlStr): try: log_output("") @@ -408,28 +361,47 @@ def execute_sql(conn, sqlStr): dbconn.execSQL(conn, sqlStr) except Exception as e: print("") - print("Error executing query: %s; Reason: %s" % (sqlStr, e)) + print(("Error executing query: %s; Reason: %s" % (sqlStr, e))) + dbconn.execSQL(conn, "abort") + + +def select_first_int(conn, sqlStr): + try: + log_output("") + log_output("Executing query: %s" % sqlStr) + curs = dbconn.query(conn, sqlStr) + rows = curs.fetchall() + for row in rows: + return int(row[0]) + + except Exception as e: + print("") + print(("Error executing query: %s; Reason: %s" % (sqlStr, e))) dbconn.execSQL(conn, "abort") + def execute_sql_arr(conn, sqlStrArr): for sqlStr in sqlStrArr: execute_sql(conn, sqlStr) + def execute_and_commit_sql(conn, sqlStr): execute_sql(conn, sqlStr) commit_db(conn) + def commit_db(conn): execute_sql(conn, "commit") + # run an SQL statement and return the elapsed wallclock time, in seconds def timed_execute_sql(conn, sqlStr): start = time.time() - execute_sql(conn, sqlStr) + num_rows = select_first_int(conn, sqlStr) end = time.time() - elapsed_time_in_msec = round((end-start)*1000) - log_output("Elapsed time (msec): %.0f" % elapsed_time_in_msec) - return elapsed_time_in_msec + elapsed_time_in_msec = round((end - start) * 1000) + log_output("Elapsed time (msec): %d, rows: %d" % (elapsed_time_in_msec, num_rows)) + return elapsed_time_in_msec, num_rows # run an SQL statement n times, unless it takes longer than a timeout @@ -437,17 +409,21 @@ def timed_execute_sql(conn, sqlStr): def timed_execute_n_times(conn, sqlStr, exec_n_times): sum_exec_times = 0.0 sum_square_exec_times = 0.0 - e = 1 + e = 0 act_num_exes = exec_n_times - while e <= act_num_exes: - exec_time = timed_execute_sql(conn, sqlStr) + num_rows = -1 + while e < act_num_exes: + exec_time, local_num_rows = timed_execute_sql(conn, sqlStr) + e = e + 1 sum_exec_times += exec_time - sum_square_exec_times += exec_time*exec_time + sum_square_exec_times += exec_time * exec_time + if num_rows >= 0 and local_num_rows != num_rows: + log_output("Inconsistent number of rows returned: %d and %d" % (num_rows, local_num_rows)) + num_rows = local_num_rows if exec_time > glob_exe_timeout: # we exceeded the timeout, don't keep executing this long query act_num_exes = e log_output("Query %s exceeded the timeout of %d seconds" % (sqlStr, glob_exe_timeout)) - e = e+1 # compute mean and standard deviation of the execution times mean = sum_exec_times / act_num_exes @@ -456,7 +432,7 @@ def timed_execute_n_times(conn, sqlStr, exec_n_times): variance = 0.0 else: variance = sum_square_exec_times / act_num_exes - mean * mean - return (round(mean, 3), round(math.sqrt(variance), 3), act_num_exes) + return (round(mean, 3), round(math.sqrt(variance), 3), act_num_exes, num_rows) # Explain a query and find a table scan or index scan in an explain output @@ -474,11 +450,13 @@ def explain_index_scan(conn, sqlStr): table_scan_pattern = TABLE_SCAN_PATTERN index_scan_pattern = INDEX_SCAN_PATTERN bitmap_scan_pattern = BITMAP_SCAN_PATTERN + fallback_pattern = FALLBACK_PATTERN if (glob_gpdb_major_version) <= 5: table_scan_pattern = TABLE_SCAN_PATTERN_V5 index_scan_pattern = INDEX_SCAN_PATTERN_V5 bitmap_scan_pattern = BITMAP_SCAN_PATTERN_V5 - + fallback_pattern = FALLBACK_PATTERN_V5 + for row in rows: log_output(row[0]) if re.search(TABLE_NAME_PATTERN, row[0]) or re.search(NDV_TABLE_NAME_PATTERN, row[0]): @@ -491,6 +469,9 @@ def explain_index_scan(conn, sqlStr): elif re.search(table_scan_pattern, row[0]): scan_type = TABLE_SCAN cost = cost_from_explain_line(row[0]) + elif re.search(fallback_pattern, row[0]): + log_output("*** ERROR: Fallback") + scan_type = FALLBACK_PLAN except Exception as e: log_output("\n*** ERROR explaining query:\n%s;\nReason: %s" % ("explain " + sqlStr, e)) @@ -515,12 +496,14 @@ def explain_join_scan(conn, sqlStr): table_scan_pattern = TABLE_SCAN_PATTERN index_scan_pattern = INDEX_SCAN_PATTERN bitmap_scan_pattern = BITMAP_SCAN_PATTERN + fallback_pattern = FALLBACK_PATTERN if (glob_gpdb_major_version) <= 5: hash_join_pattern = HASH_JOIN_PATTERN_V5 nl_join_pattern = NL_JOIN_PATTERN_V5 table_scan_pattern = TABLE_SCAN_PATTERN_V5 index_scan_pattern = INDEX_SCAN_PATTERN_V5 bitmap_scan_pattern = BITMAP_SCAN_PATTERN_V5 + fallback_pattern = FALLBACK_PATTERN_V5 # save the cost of the join above the scan type for row in rows: @@ -537,6 +520,9 @@ def explain_join_scan(conn, sqlStr): scan_type = INDEX_SCAN elif re.search(table_scan_pattern, row[0]): scan_type = TABLE_SCAN + elif re.search(fallback_pattern, row[0]): + log_output("*** ERROR: Fallback") + scan_type = FALLBACK_PLAN except Exception as e: log_output("\n*** ERROR explaining query:\n%s;\nReason: %s" % ("explain " + sqlStr, e)) @@ -557,12 +543,13 @@ def cost_from_explain_line(line): # iterate over one parameterized query, using a range of parameter values, explaining and (optionally) executing the query -def find_crossover(conn, lowParamValue, highParamLimit, setup, parameterizeMethod, explain_method, reset_method, plan_ids, force_methods, execute_n_times): +def find_crossover(conn, lowParamValue, highParamLimit, setup, parameterizeMethod, explain_method, reset_method, + plan_ids, force_methods, execute_n_times): # expects the following: # - conn: A connection # - lowParamValue: The lowest (integer) value to try for the parameter # - highParamLimit: The highest (integer) value to try for the parameter + 1 - # - setup: A method that runs any sql needed for setup before a particular select run, given a parameterized query and a paramter value + # - setup: A method that runs any sql needed for setup before a particular select run, given a parameterized query and a parameter value # - parameterizeMethod: A method to generate the actual query text, given a parameterized query and a parameter value # - explain_method: A method that takes a connection and an SQL string and returns a tuple (plan, cost) # - reset_method: A method to reset all gucs and similar switches, to get the default plan by the optimizer @@ -571,7 +558,7 @@ def find_crossover(conn, lowParamValue, highParamLimit, setup, parameterizeMetho # - force_methods: A list with

methods to force each plan id in the plan_ids array (these methods usually set gucs) # each methods takes one parameter, the connection # - execute_n_times: The number of times to execute the query (0 means don't execute, n>0 means execute n times) - + # returns the following: # - An explain dictionary, containing a mapping between a subset of the parameter values and result tuples, each result tuple consisting of #

+ 2 values: @@ -597,16 +584,17 @@ def find_crossover(conn, lowParamValue, highParamLimit, setup, parameterizeMetho reset_method(conn) # determine the increment - incParamValue = (highParamLimit - lowParamValue) / 10 + incParamValue = (highParamLimit - lowParamValue) // 10 if incParamValue == 0: incParamValue = 1 elif highParamLimit <= lowParamValue: - errMessages.append("Low parameter value %d must be less than high parameter limit %d" % (lowParamValue, highParamLimit)) + errMessages.append( + "Low parameter value %d must be less than high parameter limit %d" % (lowParamValue, highParamLimit)) return (explainDict, execDict, errMessages) # first part, run through the parameter values and determine the plan and cost chosen by the optimizer for paramValue in range(lowParamValue, highParamLimit, incParamValue): - + # do any setup required setupString = setup(paramValue) execute_sql(conn, setupString) @@ -615,7 +603,7 @@ def find_crossover(conn, lowParamValue, highParamLimit, setup, parameterizeMetho (plan, cost) = explain_method(conn, sqlString) explainDict[paramValue] = (plan, cost) log_output("For param value %d the optimizer chose %s with a cost of %f" % (paramValue, plan, cost)) - + # look for the crossover from one plan to another if not expCrossoverOccurred and paramValue > lowParamValue and plan != expPrevPlan: expCrossoverOccurred = True @@ -628,7 +616,8 @@ def find_crossover(conn, lowParamValue, highParamLimit, setup, parameterizeMetho # execute the query, if requested if execute_n_times > 0: - timed_execute_and_check_timeout(conn, sqlString, execute_n_times, paramValue, OPTIMIZER_DEFAULT_PLAN, execDict, timedOutDict, errMessages) + timed_execute_and_check_timeout(conn, sqlString, execute_n_times, paramValue, OPTIMIZER_DEFAULT_PLAN, + execDict, timedOutDict, errMessages) # second part, force different plans and record the costs for plan_num in range(0, len(plan_ids)): @@ -640,18 +629,22 @@ def find_crossover(conn, lowParamValue, highParamLimit, setup, parameterizeMetho # do any setup required setupString = setup(paramValue) execute_sql(conn, setupString) - # explain the query with the forced plan + # explain the query with the forced plan sqlString = parameterizeMethod(paramValue) (plan, cost) = explain_method(conn, sqlString) if plan_id != plan: - errMessages.append("For parameter value %d we tried to force a %s plan but got a %s plan." % (paramValue, plan_id, plan)) - log_output("For parameter value %d we tried to force a %s plan but got a %s plan." % (paramValue, plan_id, plan)) + errMessages.append("For parameter value %d we tried to force a %s plan but got a %s plan." % ( + paramValue, plan_id, plan)) + log_output("For parameter value %d we tried to force a %s plan but got a %s plan." % ( + paramValue, plan_id, plan)) # update the result dictionary resultList = list(explainDict[paramValue]) defaultPlanCost = resultList[1] # sanity check, the forced plan shouldn't have a cost that is lower than the default plan cost if defaultPlanCost > cost * 1.1: - errMessages.append("For parameter value %d and forced %s plan we got a cost of %f that is lower than the default cost of %f for the default %s plan." % (paramValue, plan_id, cost, defaultPlanCost, resultList[0])) + errMessages.append( + "For parameter value %d and forced %s plan we got a cost of %f that is lower than the default cost of %f for the default %s plan." % ( + paramValue, plan_id, cost, defaultPlanCost, resultList[0])) resultList.append(cost) explainDict[paramValue] = tuple(resultList) log_output("For param value %d we forced %s with a cost of %f" % (paramValue, plan, cost)) @@ -659,7 +652,8 @@ def find_crossover(conn, lowParamValue, highParamLimit, setup, parameterizeMetho # execute the forced plan if execute_n_times > 0: # execute the query times and record the mean and stddev of the time in execDict - timed_execute_and_check_timeout(conn, sqlString, execute_n_times, paramValue, plan_id, execDict, timedOutDict, errMessages) + timed_execute_and_check_timeout(conn, sqlString, execute_n_times, paramValue, plan_id, execDict, + timedOutDict, errMessages) # cleanup at exit reset_method(conn) @@ -678,23 +672,26 @@ def checkForOptimizerErrors(paramValue, chosenPlan, plan_ids, execDict): defaultExeTime = 1E6 defaultStdDev = 0.0 if (paramValue, OPTIMIZER_DEFAULT_PLAN) in execDict: - defaultExeTime, defaultStdDev = execDict[(paramValue, OPTIMIZER_DEFAULT_PLAN)] + defaultExeTime, defaultStdDev, numRows = execDict[(paramValue, OPTIMIZER_DEFAULT_PLAN)] if (paramValue, chosenPlan) in execDict: - forcedExeTime, forcedStdDev = execDict[(paramValue, chosenPlan)] - defaultExeTime = min(defaultExeTime, forcedExeTime) - defaultStdDev = max(defaultStdDev, forcedStdDev) + forcedExeTime, forcedStdDev, numRows = execDict[(paramValue, chosenPlan)] + if forcedExeTime < defaultExeTime: + defaultExeTime = forcedExeTime + defaultStdDev = forcedStdDev for pl in plan_ids: if (paramValue, pl) in execDict: - altExeTime, altStdDev = execDict[(paramValue, pl)] + altExeTime, altStdDev, numRows = execDict[(paramValue, pl)] # The execution times tend to be fairly unreliable. Try to avoid false positives by # requiring a significantly better alternative, measured in standard deviations. if altExeTime + glob_sigma_diff * max(defaultStdDev, altStdDev) < defaultExeTime: optimizerError = 100.0 * (defaultExeTime - altExeTime) / defaultExeTime # yes, plan pl is significantly better than the optimizer default choice - return (pl, round(optimizerError,1)) + return (pl, round(optimizerError, 1)) + elif chosenPlan == FALLBACK_PLAN: + return (FALLBACK_PLAN, -1.0) # the optimizer chose the right plan (at least we have not enough evidence to the contrary) return ("", 0.0) @@ -702,7 +699,7 @@ def checkForOptimizerErrors(paramValue, chosenPlan, plan_ids, execDict): # print the results of one test run -def print_results(testTitle, explainDict, execDict, errMessages, plan_ids): +def print_results(testTitle, explainDict, execDict, errMessages, plan_ids, execute_n_times): # print out the title of the test print("") print(testTitle) @@ -710,11 +707,11 @@ def print_results(testTitle, explainDict, execDict, errMessages, plan_ids): exeTimes = len(execDict) > 0 # make a list of plan ids with the default plan ids as first entry - plan_ids_with_default = [ OPTIMIZER_DEFAULT_PLAN ] + plan_ids_with_default = [OPTIMIZER_DEFAULT_PLAN] plan_ids_with_default.extend(plan_ids) # print a header row - headerList = [ "Parameter value", "Plan chosen by optimizer", "Cost" ] + headerList = ["Parameter value", "Plan chosen by optimizer", "Cost"] for p_id in plan_ids: headerList.append("Cost of forced %s plan" % p_id) if exeTimes: @@ -723,10 +720,12 @@ def print_results(testTitle, explainDict, execDict, errMessages, plan_ids): headerList.append("Execution time for default plan (ms)") for p_id in plan_ids: headerList.append("Execution time for forced %s plan (ms)" % p_id) - headerList.append("std dev default") - for p_id in plan_ids: - headerList.append("std dev %s" % p_id) - print(", ".join(headerList)) + if execute_n_times > 1: + headerList.append("Std dev default") + for p_id in plan_ids: + headerList.append("Std dev %s" % p_id) + headerList.append("Selectivity pct") + print((", ".join(headerList))) # sort the keys of the dictionary by parameter value sorted_params = sorted(explainDict.keys()) @@ -735,7 +734,7 @@ def print_results(testTitle, explainDict, execDict, errMessages, plan_ids): for p_val in sorted_params: # add the explain-related values vals = explainDict[p_val] - resultList = [ str(p_val) ] + resultList = [str(p_val)] for v in vals: resultList.append(str(v)) # add the execution-related values, if applicable @@ -746,32 +745,39 @@ def print_results(testTitle, explainDict, execDict, errMessages, plan_ids): resultList.append(str(optimizerError)) stddevList = [] - # our execution times will be a list of 2* (p+1) items, - # (default exe time, forced exe time plan 1 ... p, stddev for default time, stddevs for plans 1...p) + num_rows = -1 + # our execution times will be a list of 2* (p+1) + 1 items, + # (default exe time, forced exe time plan 1 ... p, stddev for default time, stddevs for plans 1...p, selectivity) # now loop over the list of p+1 plan ids for plan_id in plan_ids_with_default: if (p_val, plan_id) in execDict: # we did execute the query for this, append the avg time # right away and save the standard deviation for later - mean, stddev = execDict[(p_val, plan_id)] + mean, stddev, local_num_rows = execDict[(p_val, plan_id)] resultList.append(str(mean)) stddevList.append(str(stddev)) + if num_rows >= 0 and local_num_rows != num_rows: + errMessages.append("Inconsistent number of rows for parameter value %d: %d and %d" % (p_val, num_rows, local_num_rows)) + num_rows = local_num_rows else: # we didn't execute this query, add blank values resultList.append("") stddevList.append("") - # now add the standard deviations to the end of resultList - resultList.extend(stddevList) + if execute_n_times > 1: + # now add the standard deviations to the end of resultList + resultList.extend(stddevList) + # finally, the selectivity in percent + resultList.append(str((100.0 * num_rows) / glob_rowcount)) # print a comma-separated list of result values (CSV) - print(", ".join(resultList)) + print((", ".join(resultList))) # if there are any errors, print them at the end, leaving an empty line between the result and the errors if (len(errMessages) > 0): print("") - print("%d diagnostic message(s):" % len(errMessages)) + print(("%d diagnostic message(s):" % len(errMessages))) for e in errMessages: print(e) @@ -779,7 +785,8 @@ def print_results(testTitle, explainDict, execDict, errMessages, plan_ids): # execute a query n times, with a guard against long-running queries, # and record the result in execDict and any errors in errMessages -def timed_execute_and_check_timeout(conn, sqlString, execute_n_times, paramValue, plan_id, execDict, timedOutDict, errMessages): +def timed_execute_and_check_timeout(conn, sqlString, execute_n_times, paramValue, plan_id, execDict, timedOutDict, + errMessages): # timedOutDict contains a record of queries that have previously timed out: # plan_id -> (lowest param value for timeout, highest value for timeout, direction) # right now we ignore low/high values and direction (whether the execution increases or decreases with @@ -792,17 +799,18 @@ def timed_execute_and_check_timeout(conn, sqlString, execute_n_times, paramValue return # execute the query - mean, stddev, num_execs = timed_execute_n_times(conn, sqlString, execute_n_times) + mean, stddev, num_execs, num_rows = timed_execute_n_times(conn, sqlString, execute_n_times) # record the execution stats - execDict[(paramValue, plan_id)] = (mean, stddev) + execDict[(paramValue, plan_id)] = (mean, stddev, num_rows) # check for timeouts if num_execs < execute_n_times or mean > glob_exe_timeout: # record the timeout, without worrying about low/high values or directions for now timedOutDict[plan_id] = (paramValue, paramValue, "unknown_direction") - errMessages.append("The %s plan for parameter value %d took more than the allowed timeout, it was executed only %d time(s)" % - (plan_id, paramValue, num_execs)) + errMessages.append( + "The %s plan for parameter value %d took more than the allowed timeout, it was executed only %d time(s)" % + (plan_id, paramValue, num_execs)) # Definition of various test suites @@ -832,44 +840,44 @@ def timed_execute_and_check_timeout(conn, sqlString, execute_n_times, paramValue # GUC set statements -_reset_index_scan_forces = [ """ +_reset_index_scan_forces = [""" SELECT enable_xform('CXformImplementBitmapTableGet'); """, - """ + """ SELECT enable_xform('CXformGet2TableScan'); """ ] -_force_sequential_scan = [ """ +_force_sequential_scan = [""" SELECT disable_xform('CXformImplementBitmapTableGet'); -""" ] +"""] -_force_index_scan = [ """ +_force_index_scan = [""" SELECT disable_xform('CXformGet2TableScan'); -""" ] +"""] -_reset_index_join_forces = [ """ +_reset_index_join_forces = [""" SELECT enable_xform('CXformPushGbBelowJoin'); """, - """ + """ RESET optimizer_enable_indexjoin; """, - """ + """ RESET optimizer_enable_hashjoin; -""" ] +"""] -_force_hash_join = [ """ +_force_hash_join = [""" SELECT disable_xform('CXformPushGbBelowJoin'); """, - """ + """ SET optimizer_enable_indexjoin to off; -""" ] +"""] -_force_index_nlj = [ """ +_force_index_nlj = [""" SELECT disable_xform('CXformPushGbBelowJoin'); """, - """ + """ SET optimizer_enable_hashjoin to off; -""" ] +"""] # setup statements @@ -882,7 +890,7 @@ ANALYZE cal_bfvtest; ANALYZE cal_bfv_dim; """ -_insert_into_ndv_tables= """ +_insert_into_ndv_tables = """ TRUNCATE cal_ndvtest; INSERT INTO cal_ndvtest SELECT i, i %% %d FROM (SELECT generate_series(1,1000000) i)a; ANALYZE cal_ndvtest; @@ -917,7 +925,49 @@ WHERE bitmap10000 BETWEEN 0 AND %d; _bitmap_select_pt01_pct_multi = """ SELECT count(*) %s FROM cal_txtest -WHERE bitmap10000 = 0 OR bitmap10000 BETWEEN 2 AND %d+2; +WHERE bitmap10000 = 0 OR bitmap10000 BETWEEN 2 AND %d+1; +""" + +_btree_select_unique = """ +SELECT count(*) %s +FROM cal_txtest +WHERE btreeunique BETWEEN 0 AND %d; +""" + +_btree_select_10_pct = """ +SELECT count(*) %s +FROM cal_txtest +WHERE btree10 BETWEEN 0 AND %d; +""" + +_btree_select_1_pct = """ +SELECT count(*) %s +FROM cal_txtest +WHERE btree100 BETWEEN 0 AND %d; +""" + +_btree_select_pt1_pct = """ +SELECT count(*) %s +FROM cal_txtest +WHERE btree1000 BETWEEN 0 AND %d; +""" + +_btree_select_pt01_pct = """ +SELECT count(*) %s +FROM cal_txtest +WHERE btree10000 BETWEEN 0 AND %d; +""" + +_btree_select_pt01_pct_multi = """ +SELECT count(*) %s +FROM cal_txtest +WHERE btree10000 = 0 OR btree10000 BETWEEN 2 AND %d+1; +""" + +_btree_select_unique_in = """ +SELECT count(*) %s +FROM cal_txtest +WHERE btreeunique IN ( %s ); """ _bitmap_index_join = """ @@ -926,6 +976,12 @@ FROM cal_txtest f JOIN cal_dim d ON f.bitmap10000 = d.dim_id WHERE d.dim_id2 BETWEEN 0 AND %d; """ +_btree_index_join = """ +SELECT count(*) %s +FROM cal_txtest f JOIN cal_dim d ON f.btree10000 = d.dim_id +WHERE d.dim_id2 BETWEEN 0 AND %d; +""" + _bfv_join = """ SELECT count(*) FROM cal_bfvtest ft, cal_bfv_dim dt1 @@ -938,6 +994,7 @@ FROM cal_ndvtest WHERE val <= 1000000; """ + # Parameterize methods for the test queries above # ----------------------------------------------------------------------------- @@ -945,63 +1002,139 @@ WHERE val <= 1000000; def parameterize_bitmap_index_10_narrow(paramValue): return _bitmap_select_10_pct % ("", paramValue) + def parameterize_bitmap_index_10_wide(paramValue): return _bitmap_select_10_pct % (", max(txt)", paramValue) + # bitmap index scan with 0...100 % of values, for parameter values 0...10,000, in .01 % increments def parameterize_bitmap_index_10000_narrow(paramValue): return _bitmap_select_pt01_pct % ("", paramValue) + def parameterize_bitmap_index_10000_wide(paramValue): return _bitmap_select_pt01_pct % (", max(txt)", paramValue) + # bitmap index scan with 0...100 % of values, for parameter values 0...10,000, in .01 % increments, multiple ranges def parameterize_bitmap_index_10000_multi_narrow(paramValue): return _bitmap_select_pt01_pct_multi % ("", paramValue) + def parameterize_bitmap_index_10000_multi_wide(paramValue): return _bitmap_select_pt01_pct_multi % (", max(txt)", paramValue) + +# bitmap index scan on AO btree index with 0...100 % of values, for parameter values 0...10, in 10 % increments +def parameterize_btree_index_unique_narrow(paramValue): + return _btree_select_unique % ("", paramValue) + + +def parameterize_btree_index_unique_wide(paramValue): + return _btree_select_unique % (", max(txt)", paramValue) + + +def parameterize_btree_index_100_narrow(paramValue): + return _btree_select_1_pct % ("", paramValue) + + +def parameterize_btree_index_100_wide(paramValue): + return _btree_select_1_pct % (", max(txt)", paramValue) + + +# bitmap index scan on AO btree index with 0...100 % of values, for parameter values 0...10,000, in .01 % increments +def parameterize_btree_index_10000_narrow(paramValue): + return _btree_select_pt01_pct % ("", paramValue) + + +def parameterize_btree_index_10000_wide(paramValue): + return _btree_select_pt01_pct % (", max(txt)", paramValue) + + +# bitmap index scan on AO btree index with 0...100 % of values, for parameter values 0...10,000, in .01 % increments, multiple ranges +def parameterize_btree_index_10000_multi_narrow(paramValue): + return _btree_select_pt01_pct_multi % ("", paramValue) + + +def parameterize_btree_index_10000_multi_wide(paramValue): + return _btree_select_pt01_pct_multi % (", max(txt)", paramValue) + + +def parameterize_btree_unique_in_narrow(paramValue): + inlist = "0" + for p in range(1, paramValue+1): + inlist += ", " + str(5*p) + return _btree_select_unique_in % ("", inlist) + + +def parameterize_btree_unique_in_wide(paramValue): + inlist = "0" + for p in range(1, paramValue+1): + inlist += ", " + str(5*p) + return _btree_select_unique_in % (", max(txt)", inlist) + + # index join with 0...100 % of fact values, for parameter values 0...10,000, in .01 % increments def parameterize_bitmap_join_narrow(paramValue): return _bitmap_index_join % ("", paramValue) + def parameterize_bitmap_join_wide(paramValue): return _bitmap_index_join % (", max(f.txt)", paramValue) + +def parameterize_btree_join_narrow(paramValue): + return _btree_index_join % ("", paramValue) + + +def parameterize_btree_join_wide(paramValue): + return _btree_index_join % (", max(f.txt)", paramValue) + + def parameterize_insert_join_bfv(paramValue): return _insert_into_bfv_tables % (paramValue, paramValue) + def parameterize_insert_ndv(paramValue): return _insert_into_ndv_tables % (paramValue) + def parameterize_bitmap_join_bfv(paramValue): return _bfv_join + def parameterize_bitmap_index_ndv(paramValue): return _bitmap_index_ndv + def noSetupRequired(paramValue): return "SELECT 1;" + def explain_bitmap_index(conn, sqlStr): return explain_index_scan(conn, sqlStr) + def reset_index_test(conn): execute_sql_arr(conn, _reset_index_scan_forces) + def force_table_scan(conn): execute_sql_arr(conn, _force_sequential_scan) + def force_bitmap_scan(conn): execute_sql_arr(conn, _force_index_scan) + def reset_index_join(conn): execute_sql_arr(conn, _reset_index_join_forces) + def force_hash_join(conn): execute_sql_arr(conn, _force_hash_join) + def force_index_join(conn): execute_sql_arr(conn, _force_index_nlj) @@ -1009,26 +1142,34 @@ def force_index_join(conn): # Helper methods for running tests # ----------------------------------------------------------------------------- -def run_one_bitmap_scan_test(conn, testTitle, paramValueLow, paramValueHigh, setup, parameterizeMethod, execute_n_times): - plan_ids = [ BITMAP_SCAN, TABLE_SCAN ] - force_methods = [ force_bitmap_scan, force_table_scan ] - explainDict, execDict, errors = find_crossover(conn, paramValueLow, paramValueHigh, setup, parameterizeMethod, explain_bitmap_index, reset_index_test, plan_ids, force_methods, execute_n_times) - print_results(testTitle, explainDict, execDict, errors, plan_ids) +def run_one_bitmap_scan_test(conn, testTitle, paramValueLow, paramValueHigh, setup, parameterizeMethod, + execute_n_times): + log_output("Running bitmap scan test " + testTitle) + plan_ids = [BITMAP_SCAN, TABLE_SCAN] + force_methods = [force_bitmap_scan, force_table_scan] + explainDict, execDict, errors = find_crossover(conn, paramValueLow, paramValueHigh, setup, parameterizeMethod, + explain_bitmap_index, reset_index_test, plan_ids, force_methods, + execute_n_times) + print_results(testTitle, explainDict, execDict, errors, plan_ids, execute_n_times) -def run_one_bitmap_join_test(conn, testTitle, paramValueLow, paramValueHigh, setup, parameterizeMethod, execute_n_times): - plan_ids = [ BITMAP_SCAN, TABLE_SCAN ] - force_methods = [ force_index_join, force_hash_join ] - explainDict, execDict, errors = find_crossover(conn, paramValueLow, paramValueHigh, setup, parameterizeMethod, explain_join_scan, reset_index_join, plan_ids, force_methods, execute_n_times) - print_results(testTitle, explainDict, execDict, errors, plan_ids) + +def run_one_bitmap_join_test(conn, testTitle, paramValueLow, paramValueHigh, setup, parameterizeMethod, + execute_n_times): + log_output("Running bitmap join test " + testTitle) + plan_ids = [BITMAP_SCAN, TABLE_SCAN] + force_methods = [force_index_join, force_hash_join] + explainDict, execDict, errors = find_crossover(conn, paramValueLow, paramValueHigh, setup, parameterizeMethod, + explain_join_scan, reset_index_join, plan_ids, force_methods, + execute_n_times) + print_results(testTitle, explainDict, execDict, errors, plan_ids, execute_n_times) # Main driver for the tests # ----------------------------------------------------------------------------- def run_bitmap_index_scan_tests(conn, execute_n_times): - run_one_bitmap_scan_test(conn, - "Bitmap Scan Test, NDV=10, selectivity_pct=10*parameter_value, count(*)", + "Bitmap Scan Test; NDV=10; selectivity_pct=10*parameter_value; count(*)", 0, 10, noSetupRequired, @@ -1037,102 +1178,202 @@ def run_bitmap_index_scan_tests(conn, execute_n_times): # all full table scan, no crossover run_one_bitmap_scan_test(conn, - "Bitmap Scan Test, NDV=10, selectivity_pct=10*parameter_value, max(txt)", + "Bitmap Scan Test; NDV=10; selectivity_pct=10*parameter_value; max(txt)", 0, - 3, + 6, noSetupRequired, parameterize_bitmap_index_10_wide, execute_n_times) run_one_bitmap_scan_test(conn, - "Bitmap Scan Test, NDV=10000, selectivity_pct=0.01*parameter_value, count(*)", + "Bitmap Scan Test; NDV=10000; selectivity_pct=0.01*parameter_value; count(*)", 0, - 20, + 600 if glob_appendonly else 20, noSetupRequired, parameterize_bitmap_index_10000_narrow, execute_n_times) run_one_bitmap_scan_test(conn, - "Bitmap Scan Test, NDV=10000, selectivity_pct=0.01*parameter_value, count(*), largeNDV test", + "Bitmap Scan Test; NDV=10000; selectivity_pct=0.01*parameter_value; max(txt)", 0, - 300, - noSetupRequired, - parameterize_bitmap_index_10000_narrow, - execute_n_times) - - run_one_bitmap_scan_test(conn, - "Bitmap Scan Test, NDV=10000, selectivity_pct=0.01*parameter_value, max(txt)", - 5, - 25, + 300 if glob_appendonly else 20, noSetupRequired, parameterize_bitmap_index_10000_wide, execute_n_times) run_one_bitmap_scan_test(conn, - "Bitmap Scan Test, multi-range, NDV=10000, selectivity_pct=0.01*parameter_value, count(*)", + "Bitmap Scan Test; multi-range; NDV=10000; selectivity_pct=0.01*parameter_value; count(*)", 0, - 100, + 600 if glob_appendonly else 20, noSetupRequired, parameterize_bitmap_index_10000_multi_narrow, execute_n_times) run_one_bitmap_scan_test(conn, - "Bitmap Scan Test, multi-range, NDV=10000, selectivity_pct=0.01*parameter_value, max(txt)", + "Bitmap Scan Test; multi-range; NDV=10000; selectivity_pct=0.01*parameter_value; max(txt)", 0, - 60, + 300 if glob_appendonly else 20, noSetupRequired, parameterize_bitmap_index_10000_multi_wide, execute_n_times) + +def run_bitmap_ndv_scan_tests(conn, execute_n_times): run_one_bitmap_scan_test(conn, - "Bitmap Scan Test, ndv test, rows=1000000, parameter = insert statement modulo, count(*)", - 1, # modulo ex. would replace x in the following: SELECT i % x FROM generate_series(1,10000)i; - 10000, #max here is 10000 (num of rows) + "Bitmap Scan Test; ndv test; rows=1000000; parameter = insert statement modulo; count(*)", + 1, + # modulo ex. would replace x in the following: SELECT i % x FROM generate_series(1,10000)i; + 10000, # max here is 10000 (num of rows) parameterize_insert_ndv, parameterize_bitmap_index_ndv, execute_n_times) -def run_bitmap_index_join_tests(conn, execute_n_times): +def run_btree_ao_index_scan_tests(conn, execute_n_times): + # use the unique btree index (no bitmap equivalent), 0 to 10,000 rows + run_one_bitmap_scan_test(conn, + "Btree Scan Test; unique; selectivity_pct=100*parameter_value/%d; count(*)" % glob_rowcount, + 0, + glob_rowcount // 10, # 10% is the max allowed selectivity for a btree scan on an AO table + noSetupRequired, + parameterize_btree_index_unique_narrow, + execute_n_times) + + run_one_bitmap_scan_test(conn, + "Btree Scan Test; unique; selectivity_pct=100*parameter_value/%d; max(txt)" % glob_rowcount, + 0, + glob_rowcount // 20, + noSetupRequired, + parameterize_btree_index_unique_wide, + execute_n_times) + + run_one_bitmap_scan_test(conn, + "Btree Scan Test; NDV=100; selectivity_pct=parameter_value; count(*)", + 0, + 5, + noSetupRequired, + parameterize_btree_index_100_narrow, + execute_n_times) + + # all full table scan, no crossover + run_one_bitmap_scan_test(conn, + "Btree Scan Test; NDV=100; selectivity_pct=parameter_value; max(txt)", + 0, + 5, + noSetupRequired, + parameterize_btree_index_100_wide, + execute_n_times) + + run_one_bitmap_scan_test(conn, + "Btree Scan Test; NDV=10000; selectivity_pct=0.01*parameter_value; count(*)", + 0, + 500, + noSetupRequired, + parameterize_btree_index_10000_narrow, + execute_n_times) + + run_one_bitmap_scan_test(conn, + "Btree Scan Test; NDV=10000; selectivity_pct=0.01*parameter_value; max(txt)", + 0, + 1000, + noSetupRequired, + parameterize_btree_index_10000_wide, + execute_n_times) + + run_one_bitmap_scan_test(conn, + "Btree Scan Test; multi-range; NDV=10000; selectivity_pct=0.01*parameter_value; count(*)", + 0, + 1000, + noSetupRequired, + parameterize_btree_index_10000_multi_narrow, + execute_n_times) + + run_one_bitmap_scan_test(conn, + "Btree Scan Test; multi-range; NDV=10000; selectivity_pct=0.01*parameter_value; max(txt)", + 0, + 1000, + noSetupRequired, + parameterize_btree_index_10000_multi_wide, + execute_n_times) + + run_one_bitmap_scan_test(conn, + "Btree Scan Test; in-list; selectivity_pct=100*parameter_value/%d; count(*)" % glob_rowcount, + 0, + 5000, # length of IN list + noSetupRequired, + parameterize_btree_unique_in_narrow, + execute_n_times) + + run_one_bitmap_scan_test(conn, + "Btree Scan Test; in-list; selectivity_pct=100*parameter_value/%d; max(txt)" % glob_rowcount, + 0, + 3000, # length of IN list + noSetupRequired, + parameterize_btree_unique_in_wide, + execute_n_times) + + +def run_index_join_tests(conn, execute_n_times): run_one_bitmap_join_test(conn, - "Bitmap Join Test, NDV=10000, selectivity_pct=0.01*parameter_value, count(*)", + "Bitmap Join Test; NDV=10000; selectivity_pct=0.01*parameter_value; count(*)", 0, - 900, + 400, noSetupRequired, parameterize_bitmap_join_narrow, execute_n_times) - + run_one_bitmap_join_test(conn, - "Bitmap Join Test, NDV=10000, selectivity_pct=0.01*parameter_value, max(txt)", + "Bitmap Join Test; NDV=10000; selectivity_pct=0.01*parameter_value; max(txt)", 0, - 900, + 300, noSetupRequired, parameterize_bitmap_join_wide, execute_n_times) run_one_bitmap_join_test(conn, - "Bitmap Join BFV Test, Large Data, parameter = num rows inserted", - 10000, # num of rows inserted + "Btree Join Test; NDV=10000; selectivity_pct=0.01*parameter_value; count(*)", + 0, + 500, + noSetupRequired, + parameterize_btree_join_narrow, + execute_n_times) + + run_one_bitmap_join_test(conn, + "Btree Join Test; NDV=10000; selectivity_pct=0.01*parameter_value; max(txt)", + 0, + 400, + noSetupRequired, + parameterize_btree_join_wide, + execute_n_times) + + +def run_bfv_join_tests(conn, execute_n_times): + run_one_bitmap_join_test(conn, + "Bitmap Join BFV Test; Large Data; parameter = num rows inserted", + 10000, # num of rows inserted 900000, parameterize_insert_join_bfv, parameterize_bitmap_join_bfv, execute_n_times) - + # common parts of all test suites, create tables, run tests, drop objects # ----------------------------------------------------------------------------- # create the table(s), as regular or AO table, and insert num_rows into the main table def createDB(conn, use_ao, num_rows): + global glob_appendonly + create_options = "" if use_ao: create_options = _with_appendonly + glob_appendonly = True create_cal_table_stmt = _create_cal_table % create_options create_bfv_table = _create_bfv_table % create_options create_ndv_table = _create_ndv_table % create_options - insert_into_temp_stmt = _insert_into_temp % (1,num_rows) - insert_into_other_stmt = _insert_into_other_tables % (1,10000) - + insert_into_temp_stmt = _insert_into_temp % num_rows + insert_into_other_stmt = _insert_into_other_tables % (1, glob_dim_table_rows) + execute_sql(conn, _drop_tables) execute_sql(conn, create_cal_table_stmt) execute_sql(conn, create_bfv_table) @@ -1145,27 +1386,103 @@ def createDB(conn, use_ao, num_rows): execute_sql_arr(conn, _create_index_arr) execute_sql_arr(conn, _create_bfv_index_arr) execute_sql_arr(conn, _create_ndv_index_arr) - if use_ao: - execute_sql_arr(conn, _create_btree_indexes_ao_arr) + execute_sql_arr(conn, _create_btree_indexes_arr) execute_sql(conn, _analyze_table) commit_db(conn) + def dropDB(conn): - execute_sql(conn, _drop_tables) + execute_sql(conn, _drop_tables) + +# smooth statistics for a single integer column uniformly distributed between 1 and row_count, with a given row count and NDV +# +# For NDVs of 100 or less, list all of them +# For NDVs of more than 100, generate a histogram with 100 buckets +# Set the correlation to 0 for all columns, since the data was shuffled randomly +def smoothStatisticsForOneCol(conn, table_name, attnum, row_count, ndv): + # calculate stadistinct value and ndv, if specified as -1 + if ndv == -1: + stadistinct = -1 + ndv = row_count + else: + stadistinct = ndv + + # correlation to physical row ordering is 0 for all columns + corr = 0.0 + + # stakind: 1 is a list of most common values and frequencies, 2 is a histogram with range buckets + stakind = 1 + # arrays for stanumbers and stavalues + stanumbers = [] + stavalues = [] + stanumbers_txt = "NULL" + num_values = min(ndv, 100) + + if ndv <= 100: + # produce "ndv" MCVs, each with the same frequency + for i in range(1,num_values+1): + stanumbers.append(str(float(1)/ndv)) + stavalues.append(str(i)) + stanumbers_txt = "'{ " + ", ".join(stanumbers) + " }'::float[]" + else: + # produce a uniformly distributed histogram with 100 buckets (101 boundaries) + stakind = 2 + stavalues.append(str(1)) + for j in range(1,num_values+1): + stavalues.append(str((j*ndv) // num_values)) + + stavalues_txt = "'{ " + ", ".join(stavalues) + " }'::int[]" + execute_sql(conn, _update_pg_stats % (stadistinct, stakind, stanumbers_txt, stavalues_txt, corr, table_name, attnum)) # ensure that we have perfect histogram statistics on the relevant columns -def smoothStatistics(conn): +def smoothStatistics(conn, num_fact_table_rows): + prev_table_name = "" if glob_gpdb_major_version > 5: execute_sql(conn, _allow_system_mods) else: execute_sql(conn, _allow_system_mods_v5) - execute_sql_arr(conn, _fix_statistics) + for tup in _stats_cols_to_fix: + # note that col_name is just for human readability + (table_name, col_name, attnum, ndv, table_rows) = tup + if table_rows == -1: + table_rows = num_fact_table_rows + smoothStatisticsForOneCol(conn, table_name, attnum, table_rows, ndv) + if prev_table_name != table_name: + prev_table_name = table_name + execute_sql(conn, _update_pg_class % (table_rows, table_name)) commit_db(conn) +def inspectExistingTables(conn): + global glob_rowcount + global glob_appendonly + + sqlStr = "SELECT count(*) from cal_txtest" + curs = dbconn.query(conn, sqlStr) + + rows = curs.fetchall() + for row in rows: + glob_rowcount = row[0] + log_output("Row count of existing fact table is %d" % glob_rowcount) + + sqlStr = "SELECT lower(unnest(reloptions)) from pg_class where relname = 'cal_txtest'" + curs = dbconn.query(conn, sqlStr) + + rows = curs.fetchall() + for row in rows: + if re.search("appendonly", row[0]): + glob_appendonly = True + + if glob_appendonly: + log_output("Existing fact table is append-only") + else: + log_output("Existing fact table is not an append-only table") + def main(): global glob_verbose global glob_log_file + global glob_rowcount + args, parser = parseargs() if args.logFile != "": glob_log_file = open(args.logFile, "wt", 1) @@ -1175,23 +1492,40 @@ def main(): conn = connect(args.host, args.port, args.dbName) select_version(conn) if args.create: + glob_rowcount = args.numRows createDB(conn, args.appendOnly, args.numRows) - smoothStatistics(conn) + smoothStatistics(conn, args.numRows) + else: + inspectExistingTables(conn) + for test_unit in args.tests: if test_unit == "all": run_bitmap_index_scan_tests(conn, args.execute) - run_bitmap_index_join_tests(conn, args.execute) + if glob_appendonly: + # the btree tests are for bitmap scans on AO tables using btree indexes + run_btree_ao_index_scan_tests(conn, args.execute) + run_index_join_tests(conn, args.execute) + # skip the long-running bitmap_ndv_scan_tests and bfv_join_tests elif test_unit == "bitmap_scan_tests": run_bitmap_index_scan_tests(conn, args.execute) - elif test_unit == "bitmap_join_tests": - run_bitmap_index_join_tests(conn, args.execute) + elif test_unit == "bitmap_ndv_scan_tests": + run_bitmap_ndv_scan_tests(conn, args.execute) + elif test_unit == "btree_ao_scan_tests": + run_btree_ao_index_scan_tests(conn, args.execute) + elif test_unit == "index_join_tests": + run_index_join_tests(conn, args.execute) + elif test_unit == "bfv_join_tests": + run_bfv_join_tests(conn, args.execute) elif test_unit == "none": print("Skipping tests") - + if args.drop: dropDB(conn) + + conn.close() if glob_log_file != None: glob_log_file.close() - + + if __name__ == "__main__": main() diff --git a/src/backend/gporca/server/CMakeLists.txt b/src/backend/gporca/server/CMakeLists.txt index b27b686c65709badf1160899fc09ae1b76b40011..28b91cc56bb4e2e449b1ea58055fbb141af8f06c 100644 --- a/src/backend/gporca/server/CMakeLists.txt +++ b/src/backend/gporca/server/CMakeLists.txt @@ -87,7 +87,7 @@ CTypeModifierTest: TypeModifierColumn TypeModifierCast TypeModifierConst TypeModifierDoubleMappableConst TypeModifierArrayRef; CIndexScanTest: -BTreeIndex-Against-InList BTreeIndex-Against-ScalarSubquery +BTreeIndex-Against-InList BTreeIndex-Against-InListLarge BTreeIndex-Against-ScalarSubquery IndexScan-AOTable IndexScan-DroppedColumns IndexScan-BoolTrue IndexScan-BoolFalse IndexScan-Relabel IndexGet-OuterRefs LogicalIndexGetDroppedCols NewBtreeIndexScanCost IndexScan-ORPredsNonPart IndexScan-ORPredsAOPart IndexScan-AndedIn;