diff --git a/src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp b/src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp
index 3aa8f79af0cb69a99a8d4b9cbbc24ed29c90f8a2..c42b21ed64f37b7839a27b8bd9c6500cea53b4a0 100644
--- a/src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp
+++ b/src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp
@@ -26,6 +26,7 @@
#include "gpopt/operators/CPhysicalMotion.h"
#include "gpopt/operators/CPhysicalPartitionSelector.h"
#include "gpopt/operators/CPredicateUtils.h"
+#include "gpopt/operators/CScalarBitmapIndexProbe.h"
#include "naucrates/statistics/CStatisticsUtils.h"
#include "gpopt/operators/CExpression.h"
#include "gpdbcost/CCostModelGPDB.h"
@@ -1618,6 +1619,18 @@ CCostModelGPDB::CostBitmapTableScan(CMemoryPool *mp, CExpressionHandle &exprhdl,
CColRefSet *pcrsUsed = pexprIndexCond->DeriveUsedColumns();
CColRefSet *outerRefs = exprhdl.DeriveOuterReferences();
CColRefSet *pcrsLocalUsed = GPOS_NEW(mp) CColRefSet(mp, *pcrsUsed);
+ IMDIndex::EmdindexType indexType = IMDIndex::EmdindSentinel;
+
+ if (COperator::EopScalarBitmapIndexProbe == pexprIndexCond->Pop()->Eopid())
+ {
+ indexType = CScalarBitmapIndexProbe::PopConvert(pexprIndexCond->Pop())
+ ->Pindexdesc()
+ ->IndexType();
+ }
+
+ BOOL isInPredOnBtreeIndex =
+ (IMDIndex::EmdindBtree == indexType &&
+ COperator::EopScalarArrayCmp == (*pexprIndexCond)[0]->Pop()->Eopid());
// subtract outer references from the used colrefs, so we can see
// how many colrefs are used for this table
@@ -1632,9 +1645,17 @@ CCostModelGPDB::CostBitmapTableScan(CMemoryPool *mp, CExpressionHandle &exprhdl,
if (COperator::EopScalarBitmapIndexProbe !=
pexprIndexCond->Pop()->Eopid() ||
- 1 < pcrsLocalUsed->Size())
+ 1 < pcrsLocalUsed->Size() ||
+ (isInPredOnBtreeIndex && rows > 2.0 &&
+ !GPOS_FTRACE(EopttraceCalibratedBitmapIndexCostModel)))
{
- // child is Bitmap AND/OR, or we use Multi column index
+ // Child is Bitmap AND/OR, or we use Multi column index or this is an IN predicate
+ // that's used with the "calibrated" cost model.
+ // Handling the IN predicate in this code path is to avoid plan regressions from
+ // earlier versions of the code that treated IN predicates like ORs and therefore
+ // also handled them in this code path. This is especially noticeable for btree
+ // indexes that often have a high NDV, because the small/large NDV cost model
+ // produces very high cost for cases with a higher NDV.
const CDouble dIndexFilterCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpIndexFilterCostUnit)
@@ -1671,6 +1692,11 @@ CCostModelGPDB::CostBitmapTableScan(CMemoryPool *mp, CExpressionHandle &exprhdl,
// if the expression is const table get, the pcrsUsed is empty
// so we use minimum value MinDistinct for dNDV in that case.
CDouble dNDV = CHistogram::MinDistinct;
+ CDouble dNDVThreshold =
+ pcmgpdb->GetCostModelParams()
+ ->PcpLookup(CCostModelParamsGPDB::EcpBitmapNDVThreshold)
+ ->Get();
+
if (rows < 1.0)
{
// if we aren't accessing a row every rebind, then don't charge a cost for those cases where we don't have a row
@@ -1698,10 +1724,7 @@ CCostModelGPDB::CostBitmapTableScan(CMemoryPool *mp, CExpressionHandle &exprhdl,
if (!GPOS_FTRACE(EopttraceCalibratedBitmapIndexCostModel))
{
- CDouble dNDVThreshold =
- pcmgpdb->GetCostModelParams()
- ->PcpLookup(CCostModelParamsGPDB::EcpBitmapNDVThreshold)
- ->Get();
+ // optimizer_cost_model = 'calibrated'
if (dNDVThreshold <= dNDV)
{
result = CostBitmapLargeNDV(pcmgpdb, pci, dNDV);
@@ -1713,44 +1736,66 @@ CCostModelGPDB::CostBitmapTableScan(CMemoryPool *mp, CExpressionHandle &exprhdl,
}
else
{
+ // optimizer_cost_model = 'experimental'
CDouble dBitmapIO =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpBitmapIOCostSmallNDV)
->Get();
- CDouble dInitScan =
+ CDouble c5_dInitScan =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpInitScanFactor)
->Get();
+ CDouble c3_dBitmapPageCost =
+ pcmgpdb->GetCostModelParams()
+ ->PcpLookup(CCostModelParamsGPDB::EcpBitmapPageCost)
+ ->Get();
+ BOOL isAOTable = CPhysicalScan::PopConvert(exprhdl.Pop())
+ ->Ptabdesc()
+ ->IsAORowOrColTable();
+
+ // some cost constants determined with the cal_bitmap_test.py script
+ CDouble c1_cost_per_row(0.03);
+ CDouble c2_cost_per_byte(0.0001);
+ CDouble bitmap_union_cost_per_distinct_value(0.000027);
+ CDouble init_cost_advantage_for_bitmap_scan(0.9);
- if (1 < pcrsUsed->Size()) // it is a join
+ if (IMDIndex::EmdindBtree == indexType)
{
- // The numbers below were experimentally determined using regression analysis in the cal_bitmap_test.py script
- // The following dSizeCost is in the form C1 * rows + C2 * rows * width. This is because the width should have
- // significantly less weight than rows as the execution time does not grow as fast in regards to width
- CDouble dSizeCost =
- rows * (1 + std::max(width * 0.005, 1.0)) * 0.05;
- result = CCost( // cost for each byte returned by the index scan plus cost for incremental rebinds
- pci->NumRebinds() * (dBitmapIO * dSizeCost + dInitRebind) +
- // the BitmapPageCost * dNDV takes into account the idea of multiple tuples being on the same page.
- // If you have a small NDV, the likelihood of multiple tuples matching on one page is high and so the
- // page cost is reduced. Even though the page cost will decrease, the cost of accessing each tuple will
- // dominate. Likewise, if the NDV is large, the num of tuples matching per page is lower so the page
- // cost should be higher
- dInitScan * dNDV);
+ // btree indexes are not sensitive to the NDV, since they don't have any bitmaps
+ c3_dBitmapPageCost = 0.0;
}
- else
+
+ // Give the index scan a small initial advantage over the table scan, so we use indexes
+ // for small tables - this should avoid having table scan and index scan costs being
+ // very close together for many small queries.
+ c5_dInitScan = c5_dInitScan * init_cost_advantage_for_bitmap_scan;
+
+ // The numbers below were experimentally determined using regression analysis in the cal_bitmap_test.py script
+ // The following dSizeCost is in the form C1 * rows + C2 * rows * width. This is because the width should have
+ // significantly less weight than rows as the execution time does not grow as fast in regards to width
+ CDouble dSizeCost = dBitmapIO * (rows * c1_cost_per_row +
+ rows * width * c2_cost_per_byte);
+
+ CDouble bitmapUnionCost = 0;
+
+ if (!isAOTable && indexType == IMDIndex::EmdindBitmap && dNDV > 1.0)
{
- // The numbers below were experimentally determined using regression analysis in the cal_bitmap_test.py script
- CDouble dSizeCost =
- rows * (1 + std::max(width * 0.005, 1.0)) * 0.001;
-
- result =
- CCost( // cost for each byte returned by the index scan plus cost for incremental rebinds
- pci->NumRebinds() *
- (dBitmapIO * dSizeCost + 10 * dInitRebind) * dNDV +
- // similar to above, the dInitScan * dNDV takes into account the likelihood of multiple tuples per page
- dInitScan * dNDV);
+ CDouble baseTableRows = CPhysicalScan::PopConvert(exprhdl.Pop())
+ ->PstatsBaseTable()
+ ->Rows();
+
+ // for bitmap index scans on heap tables, we found that there is an additional cost
+ // associated with unioning them that is proportional to the number of bitmaps involved
+ // (dNDV-1) times the width of the bitmap (proportional to the number of rows in the table)
+ bitmapUnionCost = std::max(0.0, dNDV.Get() - 1.0) *
+ baseTableRows *
+ bitmap_union_cost_per_distinct_value;
}
+
+ result = CCost(pci->NumRebinds() *
+ (dSizeCost + dNDV * c3_dBitmapPageCost +
+ dInitRebind + bitmapUnionCost) +
+ c5_dInitScan);
}
}
diff --git a/src/backend/gporca/libgpdbcost/src/CCostModelParamsGPDB.cpp b/src/backend/gporca/libgpdbcost/src/CCostModelParamsGPDB.cpp
index a176a29bc7ae2c48830fa088ae90dfbe6de9e36e..c7b9c65f47c0ddb01b67b7508822a6417f94ebaf 100644
--- a/src/backend/gporca/libgpdbcost/src/CCostModelParamsGPDB.cpp
+++ b/src/backend/gporca/libgpdbcost/src/CCostModelParamsGPDB.cpp
@@ -169,7 +169,7 @@ const CDouble CCostModelParamsGPDB::DBitmapPageCostLargeNDV(83.1651);
const CDouble CCostModelParamsGPDB::DBitmapPageCostSmallNDV(204.3810);
// default bitmap page cost with no assumption about NDV
-const CDouble CCostModelParamsGPDB::DBitmapPageCost(50.4381);
+const CDouble CCostModelParamsGPDB::DBitmapPageCost(10);
// default threshold of NDV for bitmap costing
const CDouble CCostModelParamsGPDB::DBitmapNDVThreshold(200);
diff --git a/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp b/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp
index 781f180d9b6682b321ca92d7a9e185076df89f29..3fd7ede9060b79d9dd0fc40a1685a23d0690158c 100644
--- a/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp
+++ b/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp
@@ -76,7 +76,7 @@ using namespace gpopt;
// predicates less selective than this threshold
// (selectivity is greater than this number) lead to
// disqualification of a btree index on an AO table
-#define AO_TABLE_BTREE_INDEX_SELECTIVITY_THRESHOLD 0.05
+#define AO_TABLE_BTREE_INDEX_SELECTIVITY_THRESHOLD 0.10
//---------------------------------------------------------------------------
// @function:
diff --git a/src/backend/gporca/scripts/cal_bitmap_test.py b/src/backend/gporca/scripts/cal_bitmap_test.py
index 0ba9cc754b74df43b3b066d801252107ee3cc23d..9c52a7538644b3d4644518ed6e941f3e5dd01563 100755
--- a/src/backend/gporca/scripts/cal_bitmap_test.py
+++ b/src/backend/gporca/scripts/cal_bitmap_test.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
# Optimizer calibration test for bitmap indexes
#
@@ -25,10 +25,12 @@ import argparse
import time
import re
import math
+import os
+import sys
try:
from gppylib.db import dbconn
-except ImportError, e:
+except ImportError as e:
sys.exit('ERROR: Cannot import modules. Please check that you have sourced greenplum_path.sh. Detail: ' + str(e))
# constants
@@ -64,20 +66,27 @@ NL_JOIN = "nl_join"
NL_JOIN_PATTERN = r"Nested Loop"
NL_JOIN_PATTERN_V5 = r"Nested Loop"
-OPTIMIZER_DEFAULT_PLAN = "optimizer"
-
+FALLBACK_PLAN = "fallback"
+FALLBACK_PATTERN = "Postgres query optimizer"
+FALLBACK_PATTERN_V5 = "legacy query optimizer"
+OPTIMIZER_DEFAULT_PLAN = "optimizer"
# global variables
# -----------------------------------------------------------------------------
-glob_verbose=False
+# constants
# only consider optimizer errors beyond x * sigma (standard deviation) as significant
-glob_sigma_diff=3
-glob_log_file=None
-glob_exe_timeout=40000
-glob_gpdb_major_version=7
+glob_sigma_diff = 3
+glob_log_file = None
+glob_exe_timeout = 40000
+glob_gpdb_major_version = 7
+glob_dim_table_rows = 10000
+# global variables that may be modified
+glob_verbose = False
+glob_rowcount = -1
+glob_appendonly = False
# SQL statements, DDL and DML
# -----------------------------------------------------------------------------
@@ -119,42 +128,41 @@ DISTRIBUTED BY (id);
"""
_with_appendonly = """
-WITH (appendonly=true, compresslevel=5, compresstype=zlib)
+WITH (appendonly=true)
"""
-_create_other_tables = [ """
+_create_other_tables = ["""
CREATE TABLE cal_temp_ids(f_id int, f_rand double precision) DISTRIBUTED BY (f_id);
""",
- """
+ """
CREATE TABLE cal_dim(dim_id int,
dim_id2 int,
txt text)
DISTRIBUTED BY (dim_id);
""",
-"""
+ """
CREATE TABLE cal_bfv_dim (id integer, col2 integer) DISTRIBUTED BY (id);
-""" ]
+"""]
# insert into temp table. Parameters:
-# - integer start value (usually 0 or 1)
-# - integer stop value (suggested value is 10000000)
+# - integer stop value (suggested value is 10,000,000)
_insert_into_temp = """
-INSERT INTO cal_temp_ids SELECT x, random() FROM (SELECT * FROM generate_series(%d,%d)) T(x);
+INSERT INTO cal_temp_ids SELECT x, random() FROM (SELECT * FROM generate_series(1,%d)) T(x);
"""
_insert_into_table = """
INSERT INTO cal_txtest
SELECT f_id,
f_id,
- f_id%10,
- f_id%100,
- f_id%1000,
- f_id%10000,
- f_id%10,
- f_id%100,
- f_id%1000,
- f_id%10000,
- repeat('a', 900)
+ f_id%10 + 1,
+ f_id%100 + 1,
+ f_id%1000 + 1,
+ f_id%10000 + 1,
+ f_id%10 + 1,
+ f_id%100 + 1,
+ f_id%1000 + 1,
+ f_id%10000 + 1,
+ repeat('a', 960)
FROM cal_temp_ids
order by f_rand;
"""
@@ -166,49 +174,49 @@ INSERT INTO cal_dim SELECT x, x, repeat('d', 100) FROM (SELECT * FROM generate_s
_create_index_arr = ["""
CREATE INDEX cal_txtest_i_bitmap_10 ON cal_txtest USING bitmap(bitmap10);
""",
- """
+ """
CREATE INDEX cal_txtest_i_bitmap_100 ON cal_txtest USING bitmap(bitmap100);
""",
- """
+ """
CREATE INDEX cal_txtest_i_bitmap_1000 ON cal_txtest USING bitmap(bitmap1000);
""",
- """
+ """
CREATE INDEX cal_txtest_i_bitmap_10000 ON cal_txtest USING bitmap(bitmap10000);
""",
- ]
+ ]
_create_bfv_index_arr = ["""
CREATE INDEX idx_cal_bfvtest_bitmap ON cal_bfvtest USING bitmap(id);
""",
- ]
+ ]
_create_ndv_index_arr = ["""
CREATE INDEX cal_ndvtest_bitmap ON cal_ndvtest USING bitmap(val);
""",
- ]
+ ]
-_create_btree_indexes_ao_arr = ["""
+_create_btree_indexes_arr = ["""
CREATE INDEX cal_txtest_i_btree_unique ON cal_txtest USING btree(btreeunique);
""",
- """
+ """
CREATE INDEX cal_txtest_i_btree_10 ON cal_txtest USING btree(btree10);
""",
- """
+ """
CREATE INDEX cal_txtest_i_btree_100 ON cal_txtest USING btree(btree100);
""",
- """
+ """
CREATE INDEX cal_txtest_i_btree_1000 ON cal_txtest USING btree(btree1000);
""",
- """
+ """
CREATE INDEX cal_txtest_i_btree_10000 ON cal_txtest USING btree(btree10000);
""",
- """
+ """
CREATE INDEX idx_cal_bfvtest_btree ON cal_bfvtest USING btree(id);
""",
- """
+ """
CREATE INDEX cal_ndvtest_btree ON cal_ndvtest USING btree(val);
""",
-]
+ ]
_analyze_table = """
ANALYZE cal_txtest;
@@ -222,120 +230,60 @@ _allow_system_mods_v5 = """
SET allow_system_table_mods to 'dml';
"""
-# Make sure pg_statistics has smooth and precise statistics, so that the cardinality estimates we get are very precise
-#
-# For NDVs of 100 or less, list all of them
-# For NDVs of more than 100, make some dummy NDVs and 5 intervals of the same length
-# So far, id and btreeunique are not yet used (staattnums 1 and 2), no stats are changed
+# Make sure pg_statistics and pg_class have accurate statistics, so that the cardinality estimates we get are very precise
-_fix_statistics = ["""
-UPDATE pg_statistic
- SET stadistinct = 10,
- stakind1 = 1,
- stanumbers1 = '{ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1 }',
- stavalues1 = '{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}'::int[]
-WHERE starelid = 'cal_txtest'::regclass AND staattnum = 3;
-""",
- """
-UPDATE pg_statistic
- SET stadistinct = 100,
- stakind1 = 1,
- stanumbers1 = '{ 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01 }',
- stavalues1 = '{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99 }'::int[]
-WHERE starelid = 'cal_txtest'::regclass AND staattnum = 4;
-""",
- """
-UPDATE pg_statistic
- SET stadistinct = 1000,
- stakind1 = 1,
- stanumbers1 = '{ 0.001, 0.001, 0.001 }',
- stavalues1 = '{100, 200, 300}'::int[],
- stakind2 = 2,
- stanumbers2 = '{}',
- stavalues2 = '{0, 199, 399, 599, 799, 999}'::int[]
-WHERE starelid = 'cal_txtest'::regclass AND staattnum = 5;
-""",
- """
-UPDATE pg_statistic
- SET stadistinct = 10000,
- stakind1 = 1,
- stanumbers1 = '{ 0.0001, 0.0001, 0.0001 }',
- stavalues1 = '{1000, 2000, 3000}'::int[],
- stakind2 = 2,
- stanumbers2 = '{}',
- stavalues2 = '{0, 1999, 3999, 5999, 7999, 9999}'::int[]
-WHERE starelid = 'cal_txtest'::regclass AND staattnum = 6;
-""",
- """
-UPDATE pg_statistic
- SET stadistinct = 10,
- stakind1 = 1,
- stanumbers1 = '{ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1 }',
- stavalues1 = '{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}'::int[]
-WHERE starelid = 'cal_txtest'::regclass AND staattnum = 7;
-""",
- """
-UPDATE pg_statistic
- SET stadistinct = 100,
- stakind1 = 1,
- stanumbers1 = '{ 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01 }',
- stavalues1 = '{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99 }'::int[]
-WHERE starelid = 'cal_txtest'::regclass AND staattnum = 8;
-""",
- """
-UPDATE pg_statistic
- SET stadistinct = 1000,
- stakind1 = 1,
- stanumbers1 = '{ 0.001, 0.001, 0.001 }',
- stavalues1 = '{100, 200, 300}'::int[],
- stakind2 = 2,
- stanumbers2 = '{}',
- stavalues2 = '{0, 199, 399, 599, 799, 999}'::int[]
-WHERE starelid = 'cal_txtest'::regclass AND staattnum = 9;
-""",
- """
-UPDATE pg_statistic
- SET stadistinct = 10000,
- stakind1 = 1,
- stanumbers1 = '{ 0.0001, 0.0001, 0.0001 }',
- stavalues1 = '{1000, 2000, 3000}'::int[],
- stakind2 = 2,
- stanumbers2 = '{}',
- stavalues2 = '{0, 1999, 3999, 5999, 7999, 9999}'::int[]
-WHERE starelid = 'cal_txtest'::regclass AND staattnum = 10;
-""",
- """
-UPDATE pg_statistic
- SET stadistinct = 10000,
- stakind1 = 1,
- stanumbers1 = '{ 0.0001, 0.0001, 0.0001 }',
- stavalues1 = '{1000, 2000, 3000}'::int[],
- stakind2 = 2,
- stanumbers2 = '{}',
- stavalues2 = '{0, 1999, 3999, 5999, 7999, 9999}'::int[]
-WHERE starelid = 'cal_dim'::regclass AND staattnum = 1;
-""",
- """
+_update_pg_class = """
+UPDATE pg_class
+ SET reltuples = %i
+WHERE relname = '%s';
+"""
+
+# add an MCV or histogram (stakind1 = 1 or 2) and a correlation (stakind2 = 3) value
+_update_pg_stats = """
UPDATE pg_statistic
- SET stadistinct = 10000,
- stakind1 = 1,
- stanumbers1 = '{ 0.0001, 0.0001, 0.0001 }',
- stavalues1 = '{1000, 2000, 3000}'::int[],
- stakind2 = 2,
- stanumbers2 = '{}',
- stavalues2 = '{0, 1999, 3999, 5999, 7999, 9999}'::int[]
-WHERE starelid = 'cal_dim'::regclass AND staattnum = 2;
-""" ]
+ SET stadistinct = %f,
+ stakind1 = %d,
+ stanumbers1 = %s,
+ stavalues1 = %s,
+ stakind2 = 3,
+ stanumbers2 = '{ %f }',
+ stavalues2 = NULL,
+ stakind3 = 0,
+ stanumbers3 = NULL,
+ stavalues3 = NULL,
+ stakind4 = 0,
+ stanumbers4 = NULL,
+ stavalues4 = NULL
+WHERE starelid = '%s'::regclass AND staattnum = %i;
+"""
+# columns to fix, in the format (table name, column name, attnum, ndv, num rows)
+# use -1 as the NDV for unique columns and use -1 for the variable number of rows in the fact table
+_stats_cols_to_fix = [
+ ('cal_txtest', 'id', 1, -1, -1),
+ ('cal_txtest', 'btreeunique', 2, -1, -1),
+ ('cal_txtest', 'btree10', 3, 10, -1),
+ ('cal_txtest', 'btree100', 4, 100, -1),
+ ('cal_txtest', 'btree1000', 5, 1000, -1),
+ ('cal_txtest', 'btree10000', 6, 10000, -1),
+ ('cal_txtest', 'bitmap10', 7, 10, -1),
+ ('cal_txtest', 'bitmap100', 8, 100, -1),
+ ('cal_txtest', 'bitmap1000', 9, 1000, -1),
+ ('cal_txtest', 'bitmap10000', 10, 10000, -1),
+ ('cal_dim', 'dim_id', 1, -1, glob_dim_table_rows),
+ ('cal_dim', 'dim_id2', 2, -1, glob_dim_table_rows)
+]
# deal with command line arguments
# -----------------------------------------------------------------------------
def parseargs():
- parser = argparse.ArgumentParser(description=_help, version='1.0')
+ parser = argparse.ArgumentParser(description=_help)
- parser.add_argument("tests", metavar="TEST", choices=[ [], "all", "none", "bitmap_scan_tests", "bitmap_join_tests" ], nargs="*",
- help="Run these tests (all, bitmap_scan_tests, bitmap_join_tests), default is none")
+ parser.add_argument("tests", metavar="TEST", choices=[[], "all", "none", "bitmap_scan_tests", "btree_ao_scan_tests",
+ "bitmap_ndv_scan_tests", "index_join_tests", "bfv_join_tests"],
+ nargs="*",
+ help="Run these tests (all, none, bitmap_scan_tests, btree_ao_scan_tests, bitmap_ndv_scan_tests, index_join_tests, bfv_join_tests), default is none")
parser.add_argument("--create", action="store_true",
help="Create the tables to use in the test")
parser.add_argument("--execute", type=int, default="0",
@@ -346,14 +294,14 @@ def parseargs():
help="Print more verbose output")
parser.add_argument("--logFile", default="",
help="Log diagnostic output to a file")
- parser.add_argument("--host", default="localhost",
- help="Host to connect to (default is localhost).")
+ parser.add_argument("--host", default="",
+ help="Host to connect to (default is localhost or $PGHOST, if set).")
parser.add_argument("--port", type=int, default="0",
- help="Port on the host to connect to")
+ help="Port on the host to connect to (default is 0 or $PGPORT, if set)")
parser.add_argument("--dbName", default="",
help="Database name to connect to")
parser.add_argument("--appendOnly", action="store_true",
- help="Create an append-only table (uses only bitmap indexes). Default is a heap table")
+ help="Create an append-only table. Default is a heap table")
parser.add_argument("--numRows", type=int, default="10000000",
help="Number of rows to INSERT INTO the table (default is 10 million)")
@@ -363,6 +311,7 @@ def parseargs():
args = parser.parse_args()
return args, parser
+
def log_output(str):
if glob_verbose:
print(str)
@@ -376,12 +325,15 @@ def log_output(str):
def connect(host, port_num, db_name):
try:
dburl = dbconn.DbURL(hostname=host, port=port_num, dbname=db_name)
- conn = dbconn.connect(dburl, encoding="UTF8")
+ conn = dbconn.connect(dburl, encoding="UTF8", unsetSearchPath=False)
+
except Exception as e:
- print("Exception during connect: %s" % e)
+ print(("Exception during connect: %s" % e))
quit()
+
return conn
+
def select_version(conn):
global glob_gpdb_major_version
sqlStr = "SELECT version()"
@@ -401,6 +353,7 @@ def select_version(conn):
for row in rows:
log_output(str(row[0]))
+
def execute_sql(conn, sqlStr):
try:
log_output("")
@@ -408,28 +361,47 @@ def execute_sql(conn, sqlStr):
dbconn.execSQL(conn, sqlStr)
except Exception as e:
print("")
- print("Error executing query: %s; Reason: %s" % (sqlStr, e))
+ print(("Error executing query: %s; Reason: %s" % (sqlStr, e)))
+ dbconn.execSQL(conn, "abort")
+
+
+def select_first_int(conn, sqlStr):
+ try:
+ log_output("")
+ log_output("Executing query: %s" % sqlStr)
+ curs = dbconn.query(conn, sqlStr)
+ rows = curs.fetchall()
+ for row in rows:
+ return int(row[0])
+
+ except Exception as e:
+ print("")
+ print(("Error executing query: %s; Reason: %s" % (sqlStr, e)))
dbconn.execSQL(conn, "abort")
+
def execute_sql_arr(conn, sqlStrArr):
for sqlStr in sqlStrArr:
execute_sql(conn, sqlStr)
+
def execute_and_commit_sql(conn, sqlStr):
execute_sql(conn, sqlStr)
commit_db(conn)
+
def commit_db(conn):
execute_sql(conn, "commit")
+
# run an SQL statement and return the elapsed wallclock time, in seconds
def timed_execute_sql(conn, sqlStr):
start = time.time()
- execute_sql(conn, sqlStr)
+ num_rows = select_first_int(conn, sqlStr)
end = time.time()
- elapsed_time_in_msec = round((end-start)*1000)
- log_output("Elapsed time (msec): %.0f" % elapsed_time_in_msec)
- return elapsed_time_in_msec
+ elapsed_time_in_msec = round((end - start) * 1000)
+ log_output("Elapsed time (msec): %d, rows: %d" % (elapsed_time_in_msec, num_rows))
+ return elapsed_time_in_msec, num_rows
# run an SQL statement n times, unless it takes longer than a timeout
@@ -437,17 +409,21 @@ def timed_execute_sql(conn, sqlStr):
def timed_execute_n_times(conn, sqlStr, exec_n_times):
sum_exec_times = 0.0
sum_square_exec_times = 0.0
- e = 1
+ e = 0
act_num_exes = exec_n_times
- while e <= act_num_exes:
- exec_time = timed_execute_sql(conn, sqlStr)
+ num_rows = -1
+ while e < act_num_exes:
+ exec_time, local_num_rows = timed_execute_sql(conn, sqlStr)
+ e = e + 1
sum_exec_times += exec_time
- sum_square_exec_times += exec_time*exec_time
+ sum_square_exec_times += exec_time * exec_time
+ if num_rows >= 0 and local_num_rows != num_rows:
+ log_output("Inconsistent number of rows returned: %d and %d" % (num_rows, local_num_rows))
+ num_rows = local_num_rows
if exec_time > glob_exe_timeout:
# we exceeded the timeout, don't keep executing this long query
act_num_exes = e
log_output("Query %s exceeded the timeout of %d seconds" % (sqlStr, glob_exe_timeout))
- e = e+1
# compute mean and standard deviation of the execution times
mean = sum_exec_times / act_num_exes
@@ -456,7 +432,7 @@ def timed_execute_n_times(conn, sqlStr, exec_n_times):
variance = 0.0
else:
variance = sum_square_exec_times / act_num_exes - mean * mean
- return (round(mean, 3), round(math.sqrt(variance), 3), act_num_exes)
+ return (round(mean, 3), round(math.sqrt(variance), 3), act_num_exes, num_rows)
# Explain a query and find a table scan or index scan in an explain output
@@ -474,11 +450,13 @@ def explain_index_scan(conn, sqlStr):
table_scan_pattern = TABLE_SCAN_PATTERN
index_scan_pattern = INDEX_SCAN_PATTERN
bitmap_scan_pattern = BITMAP_SCAN_PATTERN
+ fallback_pattern = FALLBACK_PATTERN
if (glob_gpdb_major_version) <= 5:
table_scan_pattern = TABLE_SCAN_PATTERN_V5
index_scan_pattern = INDEX_SCAN_PATTERN_V5
bitmap_scan_pattern = BITMAP_SCAN_PATTERN_V5
-
+ fallback_pattern = FALLBACK_PATTERN_V5
+
for row in rows:
log_output(row[0])
if re.search(TABLE_NAME_PATTERN, row[0]) or re.search(NDV_TABLE_NAME_PATTERN, row[0]):
@@ -491,6 +469,9 @@ def explain_index_scan(conn, sqlStr):
elif re.search(table_scan_pattern, row[0]):
scan_type = TABLE_SCAN
cost = cost_from_explain_line(row[0])
+ elif re.search(fallback_pattern, row[0]):
+ log_output("*** ERROR: Fallback")
+ scan_type = FALLBACK_PLAN
except Exception as e:
log_output("\n*** ERROR explaining query:\n%s;\nReason: %s" % ("explain " + sqlStr, e))
@@ -515,12 +496,14 @@ def explain_join_scan(conn, sqlStr):
table_scan_pattern = TABLE_SCAN_PATTERN
index_scan_pattern = INDEX_SCAN_PATTERN
bitmap_scan_pattern = BITMAP_SCAN_PATTERN
+ fallback_pattern = FALLBACK_PATTERN
if (glob_gpdb_major_version) <= 5:
hash_join_pattern = HASH_JOIN_PATTERN_V5
nl_join_pattern = NL_JOIN_PATTERN_V5
table_scan_pattern = TABLE_SCAN_PATTERN_V5
index_scan_pattern = INDEX_SCAN_PATTERN_V5
bitmap_scan_pattern = BITMAP_SCAN_PATTERN_V5
+ fallback_pattern = FALLBACK_PATTERN_V5
# save the cost of the join above the scan type
for row in rows:
@@ -537,6 +520,9 @@ def explain_join_scan(conn, sqlStr):
scan_type = INDEX_SCAN
elif re.search(table_scan_pattern, row[0]):
scan_type = TABLE_SCAN
+ elif re.search(fallback_pattern, row[0]):
+ log_output("*** ERROR: Fallback")
+ scan_type = FALLBACK_PLAN
except Exception as e:
log_output("\n*** ERROR explaining query:\n%s;\nReason: %s" % ("explain " + sqlStr, e))
@@ -557,12 +543,13 @@ def cost_from_explain_line(line):
# iterate over one parameterized query, using a range of parameter values, explaining and (optionally) executing the query
-def find_crossover(conn, lowParamValue, highParamLimit, setup, parameterizeMethod, explain_method, reset_method, plan_ids, force_methods, execute_n_times):
+def find_crossover(conn, lowParamValue, highParamLimit, setup, parameterizeMethod, explain_method, reset_method,
+ plan_ids, force_methods, execute_n_times):
# expects the following:
# - conn: A connection
# - lowParamValue: The lowest (integer) value to try for the parameter
# - highParamLimit: The highest (integer) value to try for the parameter + 1
- # - setup: A method that runs any sql needed for setup before a particular select run, given a parameterized query and a paramter value
+ # - setup: A method that runs any sql needed for setup before a particular select run, given a parameterized query and a parameter value
# - parameterizeMethod: A method to generate the actual query text, given a parameterized query and a parameter value
# - explain_method: A method that takes a connection and an SQL string and returns a tuple (plan, cost)
# - reset_method: A method to reset all gucs and similar switches, to get the default plan by the optimizer
@@ -571,7 +558,7 @@ def find_crossover(conn, lowParamValue, highParamLimit, setup, parameterizeMetho
# - force_methods: A list with methods to force each plan id in the plan_ids array (these methods usually set gucs)
# each methods takes one parameter, the connection
# - execute_n_times: The number of times to execute the query (0 means don't execute, n>0 means execute n times)
-
+
# returns the following:
# - An explain dictionary, containing a mapping between a subset of the parameter values and result tuples, each result tuple consisting of
#
+ 2 values:
@@ -597,16 +584,17 @@ def find_crossover(conn, lowParamValue, highParamLimit, setup, parameterizeMetho
reset_method(conn)
# determine the increment
- incParamValue = (highParamLimit - lowParamValue) / 10
+ incParamValue = (highParamLimit - lowParamValue) // 10
if incParamValue == 0:
incParamValue = 1
elif highParamLimit <= lowParamValue:
- errMessages.append("Low parameter value %d must be less than high parameter limit %d" % (lowParamValue, highParamLimit))
+ errMessages.append(
+ "Low parameter value %d must be less than high parameter limit %d" % (lowParamValue, highParamLimit))
return (explainDict, execDict, errMessages)
# first part, run through the parameter values and determine the plan and cost chosen by the optimizer
for paramValue in range(lowParamValue, highParamLimit, incParamValue):
-
+
# do any setup required
setupString = setup(paramValue)
execute_sql(conn, setupString)
@@ -615,7 +603,7 @@ def find_crossover(conn, lowParamValue, highParamLimit, setup, parameterizeMetho
(plan, cost) = explain_method(conn, sqlString)
explainDict[paramValue] = (plan, cost)
log_output("For param value %d the optimizer chose %s with a cost of %f" % (paramValue, plan, cost))
-
+
# look for the crossover from one plan to another
if not expCrossoverOccurred and paramValue > lowParamValue and plan != expPrevPlan:
expCrossoverOccurred = True
@@ -628,7 +616,8 @@ def find_crossover(conn, lowParamValue, highParamLimit, setup, parameterizeMetho
# execute the query, if requested
if execute_n_times > 0:
- timed_execute_and_check_timeout(conn, sqlString, execute_n_times, paramValue, OPTIMIZER_DEFAULT_PLAN, execDict, timedOutDict, errMessages)
+ timed_execute_and_check_timeout(conn, sqlString, execute_n_times, paramValue, OPTIMIZER_DEFAULT_PLAN,
+ execDict, timedOutDict, errMessages)
# second part, force different plans and record the costs
for plan_num in range(0, len(plan_ids)):
@@ -640,18 +629,22 @@ def find_crossover(conn, lowParamValue, highParamLimit, setup, parameterizeMetho
# do any setup required
setupString = setup(paramValue)
execute_sql(conn, setupString)
- # explain the query with the forced plan
+ # explain the query with the forced plan
sqlString = parameterizeMethod(paramValue)
(plan, cost) = explain_method(conn, sqlString)
if plan_id != plan:
- errMessages.append("For parameter value %d we tried to force a %s plan but got a %s plan." % (paramValue, plan_id, plan))
- log_output("For parameter value %d we tried to force a %s plan but got a %s plan." % (paramValue, plan_id, plan))
+ errMessages.append("For parameter value %d we tried to force a %s plan but got a %s plan." % (
+ paramValue, plan_id, plan))
+ log_output("For parameter value %d we tried to force a %s plan but got a %s plan." % (
+ paramValue, plan_id, plan))
# update the result dictionary
resultList = list(explainDict[paramValue])
defaultPlanCost = resultList[1]
# sanity check, the forced plan shouldn't have a cost that is lower than the default plan cost
if defaultPlanCost > cost * 1.1:
- errMessages.append("For parameter value %d and forced %s plan we got a cost of %f that is lower than the default cost of %f for the default %s plan." % (paramValue, plan_id, cost, defaultPlanCost, resultList[0]))
+ errMessages.append(
+ "For parameter value %d and forced %s plan we got a cost of %f that is lower than the default cost of %f for the default %s plan." % (
+ paramValue, plan_id, cost, defaultPlanCost, resultList[0]))
resultList.append(cost)
explainDict[paramValue] = tuple(resultList)
log_output("For param value %d we forced %s with a cost of %f" % (paramValue, plan, cost))
@@ -659,7 +652,8 @@ def find_crossover(conn, lowParamValue, highParamLimit, setup, parameterizeMetho
# execute the forced plan
if execute_n_times > 0:
# execute the query times and record the mean and stddev of the time in execDict
- timed_execute_and_check_timeout(conn, sqlString, execute_n_times, paramValue, plan_id, execDict, timedOutDict, errMessages)
+ timed_execute_and_check_timeout(conn, sqlString, execute_n_times, paramValue, plan_id, execDict,
+ timedOutDict, errMessages)
# cleanup at exit
reset_method(conn)
@@ -678,23 +672,26 @@ def checkForOptimizerErrors(paramValue, chosenPlan, plan_ids, execDict):
defaultExeTime = 1E6
defaultStdDev = 0.0
if (paramValue, OPTIMIZER_DEFAULT_PLAN) in execDict:
- defaultExeTime, defaultStdDev = execDict[(paramValue, OPTIMIZER_DEFAULT_PLAN)]
+ defaultExeTime, defaultStdDev, numRows = execDict[(paramValue, OPTIMIZER_DEFAULT_PLAN)]
if (paramValue, chosenPlan) in execDict:
- forcedExeTime, forcedStdDev = execDict[(paramValue, chosenPlan)]
- defaultExeTime = min(defaultExeTime, forcedExeTime)
- defaultStdDev = max(defaultStdDev, forcedStdDev)
+ forcedExeTime, forcedStdDev, numRows = execDict[(paramValue, chosenPlan)]
+ if forcedExeTime < defaultExeTime:
+ defaultExeTime = forcedExeTime
+ defaultStdDev = forcedStdDev
for pl in plan_ids:
if (paramValue, pl) in execDict:
- altExeTime, altStdDev = execDict[(paramValue, pl)]
+ altExeTime, altStdDev, numRows = execDict[(paramValue, pl)]
# The execution times tend to be fairly unreliable. Try to avoid false positives by
# requiring a significantly better alternative, measured in standard deviations.
if altExeTime + glob_sigma_diff * max(defaultStdDev, altStdDev) < defaultExeTime:
optimizerError = 100.0 * (defaultExeTime - altExeTime) / defaultExeTime
# yes, plan pl is significantly better than the optimizer default choice
- return (pl, round(optimizerError,1))
+ return (pl, round(optimizerError, 1))
+ elif chosenPlan == FALLBACK_PLAN:
+ return (FALLBACK_PLAN, -1.0)
# the optimizer chose the right plan (at least we have not enough evidence to the contrary)
return ("", 0.0)
@@ -702,7 +699,7 @@ def checkForOptimizerErrors(paramValue, chosenPlan, plan_ids, execDict):
# print the results of one test run
-def print_results(testTitle, explainDict, execDict, errMessages, plan_ids):
+def print_results(testTitle, explainDict, execDict, errMessages, plan_ids, execute_n_times):
# print out the title of the test
print("")
print(testTitle)
@@ -710,11 +707,11 @@ def print_results(testTitle, explainDict, execDict, errMessages, plan_ids):
exeTimes = len(execDict) > 0
# make a list of plan ids with the default plan ids as first entry
- plan_ids_with_default = [ OPTIMIZER_DEFAULT_PLAN ]
+ plan_ids_with_default = [OPTIMIZER_DEFAULT_PLAN]
plan_ids_with_default.extend(plan_ids)
# print a header row
- headerList = [ "Parameter value", "Plan chosen by optimizer", "Cost" ]
+ headerList = ["Parameter value", "Plan chosen by optimizer", "Cost"]
for p_id in plan_ids:
headerList.append("Cost of forced %s plan" % p_id)
if exeTimes:
@@ -723,10 +720,12 @@ def print_results(testTitle, explainDict, execDict, errMessages, plan_ids):
headerList.append("Execution time for default plan (ms)")
for p_id in plan_ids:
headerList.append("Execution time for forced %s plan (ms)" % p_id)
- headerList.append("std dev default")
- for p_id in plan_ids:
- headerList.append("std dev %s" % p_id)
- print(", ".join(headerList))
+ if execute_n_times > 1:
+ headerList.append("Std dev default")
+ for p_id in plan_ids:
+ headerList.append("Std dev %s" % p_id)
+ headerList.append("Selectivity pct")
+ print((", ".join(headerList)))
# sort the keys of the dictionary by parameter value
sorted_params = sorted(explainDict.keys())
@@ -735,7 +734,7 @@ def print_results(testTitle, explainDict, execDict, errMessages, plan_ids):
for p_val in sorted_params:
# add the explain-related values
vals = explainDict[p_val]
- resultList = [ str(p_val) ]
+ resultList = [str(p_val)]
for v in vals:
resultList.append(str(v))
# add the execution-related values, if applicable
@@ -746,32 +745,39 @@ def print_results(testTitle, explainDict, execDict, errMessages, plan_ids):
resultList.append(str(optimizerError))
stddevList = []
- # our execution times will be a list of 2* (p+1) items,
- # (default exe time, forced exe time plan 1 ... p, stddev for default time, stddevs for plans 1...p)
+ num_rows = -1
+ # our execution times will be a list of 2* (p+1) + 1 items,
+ # (default exe time, forced exe time plan 1 ... p, stddev for default time, stddevs for plans 1...p, selectivity)
# now loop over the list of p+1 plan ids
for plan_id in plan_ids_with_default:
if (p_val, plan_id) in execDict:
# we did execute the query for this, append the avg time
# right away and save the standard deviation for later
- mean, stddev = execDict[(p_val, plan_id)]
+ mean, stddev, local_num_rows = execDict[(p_val, plan_id)]
resultList.append(str(mean))
stddevList.append(str(stddev))
+ if num_rows >= 0 and local_num_rows != num_rows:
+ errMessages.append("Inconsistent number of rows for parameter value %d: %d and %d" % (p_val, num_rows, local_num_rows))
+ num_rows = local_num_rows
else:
# we didn't execute this query, add blank values
resultList.append("")
stddevList.append("")
- # now add the standard deviations to the end of resultList
- resultList.extend(stddevList)
+ if execute_n_times > 1:
+ # now add the standard deviations to the end of resultList
+ resultList.extend(stddevList)
+ # finally, the selectivity in percent
+ resultList.append(str((100.0 * num_rows) / glob_rowcount))
# print a comma-separated list of result values (CSV)
- print(", ".join(resultList))
+ print((", ".join(resultList)))
# if there are any errors, print them at the end, leaving an empty line between the result and the errors
if (len(errMessages) > 0):
print("")
- print("%d diagnostic message(s):" % len(errMessages))
+ print(("%d diagnostic message(s):" % len(errMessages)))
for e in errMessages:
print(e)
@@ -779,7 +785,8 @@ def print_results(testTitle, explainDict, execDict, errMessages, plan_ids):
# execute a query n times, with a guard against long-running queries,
# and record the result in execDict and any errors in errMessages
-def timed_execute_and_check_timeout(conn, sqlString, execute_n_times, paramValue, plan_id, execDict, timedOutDict, errMessages):
+def timed_execute_and_check_timeout(conn, sqlString, execute_n_times, paramValue, plan_id, execDict, timedOutDict,
+ errMessages):
# timedOutDict contains a record of queries that have previously timed out:
# plan_id -> (lowest param value for timeout, highest value for timeout, direction)
# right now we ignore low/high values and direction (whether the execution increases or decreases with
@@ -792,17 +799,18 @@ def timed_execute_and_check_timeout(conn, sqlString, execute_n_times, paramValue
return
# execute the query
- mean, stddev, num_execs = timed_execute_n_times(conn, sqlString, execute_n_times)
+ mean, stddev, num_execs, num_rows = timed_execute_n_times(conn, sqlString, execute_n_times)
# record the execution stats
- execDict[(paramValue, plan_id)] = (mean, stddev)
+ execDict[(paramValue, plan_id)] = (mean, stddev, num_rows)
# check for timeouts
if num_execs < execute_n_times or mean > glob_exe_timeout:
# record the timeout, without worrying about low/high values or directions for now
timedOutDict[plan_id] = (paramValue, paramValue, "unknown_direction")
- errMessages.append("The %s plan for parameter value %d took more than the allowed timeout, it was executed only %d time(s)" %
- (plan_id, paramValue, num_execs))
+ errMessages.append(
+ "The %s plan for parameter value %d took more than the allowed timeout, it was executed only %d time(s)" %
+ (plan_id, paramValue, num_execs))
# Definition of various test suites
@@ -832,44 +840,44 @@ def timed_execute_and_check_timeout(conn, sqlString, execute_n_times, paramValue
# GUC set statements
-_reset_index_scan_forces = [ """
+_reset_index_scan_forces = ["""
SELECT enable_xform('CXformImplementBitmapTableGet');
""",
- """
+ """
SELECT enable_xform('CXformGet2TableScan');
""" ]
-_force_sequential_scan = [ """
+_force_sequential_scan = ["""
SELECT disable_xform('CXformImplementBitmapTableGet');
-""" ]
+"""]
-_force_index_scan = [ """
+_force_index_scan = ["""
SELECT disable_xform('CXformGet2TableScan');
-""" ]
+"""]
-_reset_index_join_forces = [ """
+_reset_index_join_forces = ["""
SELECT enable_xform('CXformPushGbBelowJoin');
""",
- """
+ """
RESET optimizer_enable_indexjoin;
""",
- """
+ """
RESET optimizer_enable_hashjoin;
-""" ]
+"""]
-_force_hash_join = [ """
+_force_hash_join = ["""
SELECT disable_xform('CXformPushGbBelowJoin');
""",
- """
+ """
SET optimizer_enable_indexjoin to off;
-""" ]
+"""]
-_force_index_nlj = [ """
+_force_index_nlj = ["""
SELECT disable_xform('CXformPushGbBelowJoin');
""",
- """
+ """
SET optimizer_enable_hashjoin to off;
-""" ]
+"""]
# setup statements
@@ -882,7 +890,7 @@ ANALYZE cal_bfvtest;
ANALYZE cal_bfv_dim;
"""
-_insert_into_ndv_tables= """
+_insert_into_ndv_tables = """
TRUNCATE cal_ndvtest;
INSERT INTO cal_ndvtest SELECT i, i %% %d FROM (SELECT generate_series(1,1000000) i)a;
ANALYZE cal_ndvtest;
@@ -917,7 +925,49 @@ WHERE bitmap10000 BETWEEN 0 AND %d;
_bitmap_select_pt01_pct_multi = """
SELECT count(*) %s
FROM cal_txtest
-WHERE bitmap10000 = 0 OR bitmap10000 BETWEEN 2 AND %d+2;
+WHERE bitmap10000 = 0 OR bitmap10000 BETWEEN 2 AND %d+1;
+"""
+
+_btree_select_unique = """
+SELECT count(*) %s
+FROM cal_txtest
+WHERE btreeunique BETWEEN 0 AND %d;
+"""
+
+_btree_select_10_pct = """
+SELECT count(*) %s
+FROM cal_txtest
+WHERE btree10 BETWEEN 0 AND %d;
+"""
+
+_btree_select_1_pct = """
+SELECT count(*) %s
+FROM cal_txtest
+WHERE btree100 BETWEEN 0 AND %d;
+"""
+
+_btree_select_pt1_pct = """
+SELECT count(*) %s
+FROM cal_txtest
+WHERE btree1000 BETWEEN 0 AND %d;
+"""
+
+_btree_select_pt01_pct = """
+SELECT count(*) %s
+FROM cal_txtest
+WHERE btree10000 BETWEEN 0 AND %d;
+"""
+
+_btree_select_pt01_pct_multi = """
+SELECT count(*) %s
+FROM cal_txtest
+WHERE btree10000 = 0 OR btree10000 BETWEEN 2 AND %d+1;
+"""
+
+_btree_select_unique_in = """
+SELECT count(*) %s
+FROM cal_txtest
+WHERE btreeunique IN ( %s );
"""
_bitmap_index_join = """
@@ -926,6 +976,12 @@ FROM cal_txtest f JOIN cal_dim d ON f.bitmap10000 = d.dim_id
WHERE d.dim_id2 BETWEEN 0 AND %d;
"""
+_btree_index_join = """
+SELECT count(*) %s
+FROM cal_txtest f JOIN cal_dim d ON f.btree10000 = d.dim_id
+WHERE d.dim_id2 BETWEEN 0 AND %d;
+"""
+
_bfv_join = """
SELECT count(*)
FROM cal_bfvtest ft, cal_bfv_dim dt1
@@ -938,6 +994,7 @@ FROM cal_ndvtest
WHERE val <= 1000000;
"""
+
# Parameterize methods for the test queries above
# -----------------------------------------------------------------------------
@@ -945,63 +1002,139 @@ WHERE val <= 1000000;
def parameterize_bitmap_index_10_narrow(paramValue):
return _bitmap_select_10_pct % ("", paramValue)
+
def parameterize_bitmap_index_10_wide(paramValue):
return _bitmap_select_10_pct % (", max(txt)", paramValue)
+
# bitmap index scan with 0...100 % of values, for parameter values 0...10,000, in .01 % increments
def parameterize_bitmap_index_10000_narrow(paramValue):
return _bitmap_select_pt01_pct % ("", paramValue)
+
def parameterize_bitmap_index_10000_wide(paramValue):
return _bitmap_select_pt01_pct % (", max(txt)", paramValue)
+
# bitmap index scan with 0...100 % of values, for parameter values 0...10,000, in .01 % increments, multiple ranges
def parameterize_bitmap_index_10000_multi_narrow(paramValue):
return _bitmap_select_pt01_pct_multi % ("", paramValue)
+
def parameterize_bitmap_index_10000_multi_wide(paramValue):
return _bitmap_select_pt01_pct_multi % (", max(txt)", paramValue)
+
+# bitmap index scan on AO btree index with 0...100 % of values, for parameter values 0...10, in 10 % increments
+def parameterize_btree_index_unique_narrow(paramValue):
+ return _btree_select_unique % ("", paramValue)
+
+
+def parameterize_btree_index_unique_wide(paramValue):
+ return _btree_select_unique % (", max(txt)", paramValue)
+
+
+def parameterize_btree_index_100_narrow(paramValue):
+ return _btree_select_1_pct % ("", paramValue)
+
+
+def parameterize_btree_index_100_wide(paramValue):
+ return _btree_select_1_pct % (", max(txt)", paramValue)
+
+
+# bitmap index scan on AO btree index with 0...100 % of values, for parameter values 0...10,000, in .01 % increments
+def parameterize_btree_index_10000_narrow(paramValue):
+ return _btree_select_pt01_pct % ("", paramValue)
+
+
+def parameterize_btree_index_10000_wide(paramValue):
+ return _btree_select_pt01_pct % (", max(txt)", paramValue)
+
+
+# bitmap index scan on AO btree index with 0...100 % of values, for parameter values 0...10,000, in .01 % increments, multiple ranges
+def parameterize_btree_index_10000_multi_narrow(paramValue):
+ return _btree_select_pt01_pct_multi % ("", paramValue)
+
+
+def parameterize_btree_index_10000_multi_wide(paramValue):
+ return _btree_select_pt01_pct_multi % (", max(txt)", paramValue)
+
+
+def parameterize_btree_unique_in_narrow(paramValue):
+ inlist = "0"
+ for p in range(1, paramValue+1):
+ inlist += ", " + str(5*p)
+ return _btree_select_unique_in % ("", inlist)
+
+
+def parameterize_btree_unique_in_wide(paramValue):
+ inlist = "0"
+ for p in range(1, paramValue+1):
+ inlist += ", " + str(5*p)
+ return _btree_select_unique_in % (", max(txt)", inlist)
+
+
# index join with 0...100 % of fact values, for parameter values 0...10,000, in .01 % increments
def parameterize_bitmap_join_narrow(paramValue):
return _bitmap_index_join % ("", paramValue)
+
def parameterize_bitmap_join_wide(paramValue):
return _bitmap_index_join % (", max(f.txt)", paramValue)
+
+def parameterize_btree_join_narrow(paramValue):
+ return _btree_index_join % ("", paramValue)
+
+
+def parameterize_btree_join_wide(paramValue):
+ return _btree_index_join % (", max(f.txt)", paramValue)
+
+
def parameterize_insert_join_bfv(paramValue):
return _insert_into_bfv_tables % (paramValue, paramValue)
+
def parameterize_insert_ndv(paramValue):
return _insert_into_ndv_tables % (paramValue)
+
def parameterize_bitmap_join_bfv(paramValue):
return _bfv_join
+
def parameterize_bitmap_index_ndv(paramValue):
return _bitmap_index_ndv
+
def noSetupRequired(paramValue):
return "SELECT 1;"
+
def explain_bitmap_index(conn, sqlStr):
return explain_index_scan(conn, sqlStr)
+
def reset_index_test(conn):
execute_sql_arr(conn, _reset_index_scan_forces)
+
def force_table_scan(conn):
execute_sql_arr(conn, _force_sequential_scan)
+
def force_bitmap_scan(conn):
execute_sql_arr(conn, _force_index_scan)
+
def reset_index_join(conn):
execute_sql_arr(conn, _reset_index_join_forces)
+
def force_hash_join(conn):
execute_sql_arr(conn, _force_hash_join)
+
def force_index_join(conn):
execute_sql_arr(conn, _force_index_nlj)
@@ -1009,26 +1142,34 @@ def force_index_join(conn):
# Helper methods for running tests
# -----------------------------------------------------------------------------
-def run_one_bitmap_scan_test(conn, testTitle, paramValueLow, paramValueHigh, setup, parameterizeMethod, execute_n_times):
- plan_ids = [ BITMAP_SCAN, TABLE_SCAN ]
- force_methods = [ force_bitmap_scan, force_table_scan ]
- explainDict, execDict, errors = find_crossover(conn, paramValueLow, paramValueHigh, setup, parameterizeMethod, explain_bitmap_index, reset_index_test, plan_ids, force_methods, execute_n_times)
- print_results(testTitle, explainDict, execDict, errors, plan_ids)
+def run_one_bitmap_scan_test(conn, testTitle, paramValueLow, paramValueHigh, setup, parameterizeMethod,
+ execute_n_times):
+ log_output("Running bitmap scan test " + testTitle)
+ plan_ids = [BITMAP_SCAN, TABLE_SCAN]
+ force_methods = [force_bitmap_scan, force_table_scan]
+ explainDict, execDict, errors = find_crossover(conn, paramValueLow, paramValueHigh, setup, parameterizeMethod,
+ explain_bitmap_index, reset_index_test, plan_ids, force_methods,
+ execute_n_times)
+ print_results(testTitle, explainDict, execDict, errors, plan_ids, execute_n_times)
-def run_one_bitmap_join_test(conn, testTitle, paramValueLow, paramValueHigh, setup, parameterizeMethod, execute_n_times):
- plan_ids = [ BITMAP_SCAN, TABLE_SCAN ]
- force_methods = [ force_index_join, force_hash_join ]
- explainDict, execDict, errors = find_crossover(conn, paramValueLow, paramValueHigh, setup, parameterizeMethod, explain_join_scan, reset_index_join, plan_ids, force_methods, execute_n_times)
- print_results(testTitle, explainDict, execDict, errors, plan_ids)
+
+def run_one_bitmap_join_test(conn, testTitle, paramValueLow, paramValueHigh, setup, parameterizeMethod,
+ execute_n_times):
+ log_output("Running bitmap join test " + testTitle)
+ plan_ids = [BITMAP_SCAN, TABLE_SCAN]
+ force_methods = [force_index_join, force_hash_join]
+ explainDict, execDict, errors = find_crossover(conn, paramValueLow, paramValueHigh, setup, parameterizeMethod,
+ explain_join_scan, reset_index_join, plan_ids, force_methods,
+ execute_n_times)
+ print_results(testTitle, explainDict, execDict, errors, plan_ids, execute_n_times)
# Main driver for the tests
# -----------------------------------------------------------------------------
def run_bitmap_index_scan_tests(conn, execute_n_times):
-
run_one_bitmap_scan_test(conn,
- "Bitmap Scan Test, NDV=10, selectivity_pct=10*parameter_value, count(*)",
+ "Bitmap Scan Test; NDV=10; selectivity_pct=10*parameter_value; count(*)",
0,
10,
noSetupRequired,
@@ -1037,102 +1178,202 @@ def run_bitmap_index_scan_tests(conn, execute_n_times):
# all full table scan, no crossover
run_one_bitmap_scan_test(conn,
- "Bitmap Scan Test, NDV=10, selectivity_pct=10*parameter_value, max(txt)",
+ "Bitmap Scan Test; NDV=10; selectivity_pct=10*parameter_value; max(txt)",
0,
- 3,
+ 6,
noSetupRequired,
parameterize_bitmap_index_10_wide,
execute_n_times)
run_one_bitmap_scan_test(conn,
- "Bitmap Scan Test, NDV=10000, selectivity_pct=0.01*parameter_value, count(*)",
+ "Bitmap Scan Test; NDV=10000; selectivity_pct=0.01*parameter_value; count(*)",
0,
- 20,
+ 600 if glob_appendonly else 20,
noSetupRequired,
parameterize_bitmap_index_10000_narrow,
execute_n_times)
run_one_bitmap_scan_test(conn,
- "Bitmap Scan Test, NDV=10000, selectivity_pct=0.01*parameter_value, count(*), largeNDV test",
+ "Bitmap Scan Test; NDV=10000; selectivity_pct=0.01*parameter_value; max(txt)",
0,
- 300,
- noSetupRequired,
- parameterize_bitmap_index_10000_narrow,
- execute_n_times)
-
- run_one_bitmap_scan_test(conn,
- "Bitmap Scan Test, NDV=10000, selectivity_pct=0.01*parameter_value, max(txt)",
- 5,
- 25,
+ 300 if glob_appendonly else 20,
noSetupRequired,
parameterize_bitmap_index_10000_wide,
execute_n_times)
run_one_bitmap_scan_test(conn,
- "Bitmap Scan Test, multi-range, NDV=10000, selectivity_pct=0.01*parameter_value, count(*)",
+ "Bitmap Scan Test; multi-range; NDV=10000; selectivity_pct=0.01*parameter_value; count(*)",
0,
- 100,
+ 600 if glob_appendonly else 20,
noSetupRequired,
parameterize_bitmap_index_10000_multi_narrow,
execute_n_times)
run_one_bitmap_scan_test(conn,
- "Bitmap Scan Test, multi-range, NDV=10000, selectivity_pct=0.01*parameter_value, max(txt)",
+ "Bitmap Scan Test; multi-range; NDV=10000; selectivity_pct=0.01*parameter_value; max(txt)",
0,
- 60,
+ 300 if glob_appendonly else 20,
noSetupRequired,
parameterize_bitmap_index_10000_multi_wide,
execute_n_times)
+
+def run_bitmap_ndv_scan_tests(conn, execute_n_times):
run_one_bitmap_scan_test(conn,
- "Bitmap Scan Test, ndv test, rows=1000000, parameter = insert statement modulo, count(*)",
- 1, # modulo ex. would replace x in the following: SELECT i % x FROM generate_series(1,10000)i;
- 10000, #max here is 10000 (num of rows)
+ "Bitmap Scan Test; ndv test; rows=1000000; parameter = insert statement modulo; count(*)",
+ 1,
+ # modulo ex. would replace x in the following: SELECT i % x FROM generate_series(1,10000)i;
+ 10000, # max here is 10000 (num of rows)
parameterize_insert_ndv,
parameterize_bitmap_index_ndv,
execute_n_times)
-def run_bitmap_index_join_tests(conn, execute_n_times):
+def run_btree_ao_index_scan_tests(conn, execute_n_times):
+ # use the unique btree index (no bitmap equivalent), 0 to 10,000 rows
+ run_one_bitmap_scan_test(conn,
+ "Btree Scan Test; unique; selectivity_pct=100*parameter_value/%d; count(*)" % glob_rowcount,
+ 0,
+ glob_rowcount // 10, # 10% is the max allowed selectivity for a btree scan on an AO table
+ noSetupRequired,
+ parameterize_btree_index_unique_narrow,
+ execute_n_times)
+
+ run_one_bitmap_scan_test(conn,
+ "Btree Scan Test; unique; selectivity_pct=100*parameter_value/%d; max(txt)" % glob_rowcount,
+ 0,
+ glob_rowcount // 20,
+ noSetupRequired,
+ parameterize_btree_index_unique_wide,
+ execute_n_times)
+
+ run_one_bitmap_scan_test(conn,
+ "Btree Scan Test; NDV=100; selectivity_pct=parameter_value; count(*)",
+ 0,
+ 5,
+ noSetupRequired,
+ parameterize_btree_index_100_narrow,
+ execute_n_times)
+
+ # all full table scan, no crossover
+ run_one_bitmap_scan_test(conn,
+ "Btree Scan Test; NDV=100; selectivity_pct=parameter_value; max(txt)",
+ 0,
+ 5,
+ noSetupRequired,
+ parameterize_btree_index_100_wide,
+ execute_n_times)
+
+ run_one_bitmap_scan_test(conn,
+ "Btree Scan Test; NDV=10000; selectivity_pct=0.01*parameter_value; count(*)",
+ 0,
+ 500,
+ noSetupRequired,
+ parameterize_btree_index_10000_narrow,
+ execute_n_times)
+
+ run_one_bitmap_scan_test(conn,
+ "Btree Scan Test; NDV=10000; selectivity_pct=0.01*parameter_value; max(txt)",
+ 0,
+ 1000,
+ noSetupRequired,
+ parameterize_btree_index_10000_wide,
+ execute_n_times)
+
+ run_one_bitmap_scan_test(conn,
+ "Btree Scan Test; multi-range; NDV=10000; selectivity_pct=0.01*parameter_value; count(*)",
+ 0,
+ 1000,
+ noSetupRequired,
+ parameterize_btree_index_10000_multi_narrow,
+ execute_n_times)
+
+ run_one_bitmap_scan_test(conn,
+ "Btree Scan Test; multi-range; NDV=10000; selectivity_pct=0.01*parameter_value; max(txt)",
+ 0,
+ 1000,
+ noSetupRequired,
+ parameterize_btree_index_10000_multi_wide,
+ execute_n_times)
+
+ run_one_bitmap_scan_test(conn,
+ "Btree Scan Test; in-list; selectivity_pct=100*parameter_value/%d; count(*)" % glob_rowcount,
+ 0,
+ 5000, # length of IN list
+ noSetupRequired,
+ parameterize_btree_unique_in_narrow,
+ execute_n_times)
+
+ run_one_bitmap_scan_test(conn,
+ "Btree Scan Test; in-list; selectivity_pct=100*parameter_value/%d; max(txt)" % glob_rowcount,
+ 0,
+ 3000, # length of IN list
+ noSetupRequired,
+ parameterize_btree_unique_in_wide,
+ execute_n_times)
+
+
+def run_index_join_tests(conn, execute_n_times):
run_one_bitmap_join_test(conn,
- "Bitmap Join Test, NDV=10000, selectivity_pct=0.01*parameter_value, count(*)",
+ "Bitmap Join Test; NDV=10000; selectivity_pct=0.01*parameter_value; count(*)",
0,
- 900,
+ 400,
noSetupRequired,
parameterize_bitmap_join_narrow,
execute_n_times)
-
+
run_one_bitmap_join_test(conn,
- "Bitmap Join Test, NDV=10000, selectivity_pct=0.01*parameter_value, max(txt)",
+ "Bitmap Join Test; NDV=10000; selectivity_pct=0.01*parameter_value; max(txt)",
0,
- 900,
+ 300,
noSetupRequired,
parameterize_bitmap_join_wide,
execute_n_times)
run_one_bitmap_join_test(conn,
- "Bitmap Join BFV Test, Large Data, parameter = num rows inserted",
- 10000, # num of rows inserted
+ "Btree Join Test; NDV=10000; selectivity_pct=0.01*parameter_value; count(*)",
+ 0,
+ 500,
+ noSetupRequired,
+ parameterize_btree_join_narrow,
+ execute_n_times)
+
+ run_one_bitmap_join_test(conn,
+ "Btree Join Test; NDV=10000; selectivity_pct=0.01*parameter_value; max(txt)",
+ 0,
+ 400,
+ noSetupRequired,
+ parameterize_btree_join_wide,
+ execute_n_times)
+
+
+def run_bfv_join_tests(conn, execute_n_times):
+ run_one_bitmap_join_test(conn,
+ "Bitmap Join BFV Test; Large Data; parameter = num rows inserted",
+ 10000, # num of rows inserted
900000,
parameterize_insert_join_bfv,
parameterize_bitmap_join_bfv,
execute_n_times)
-
+
# common parts of all test suites, create tables, run tests, drop objects
# -----------------------------------------------------------------------------
# create the table(s), as regular or AO table, and insert num_rows into the main table
def createDB(conn, use_ao, num_rows):
+ global glob_appendonly
+
create_options = ""
if use_ao:
create_options = _with_appendonly
+ glob_appendonly = True
create_cal_table_stmt = _create_cal_table % create_options
create_bfv_table = _create_bfv_table % create_options
create_ndv_table = _create_ndv_table % create_options
- insert_into_temp_stmt = _insert_into_temp % (1,num_rows)
- insert_into_other_stmt = _insert_into_other_tables % (1,10000)
-
+ insert_into_temp_stmt = _insert_into_temp % num_rows
+ insert_into_other_stmt = _insert_into_other_tables % (1, glob_dim_table_rows)
+
execute_sql(conn, _drop_tables)
execute_sql(conn, create_cal_table_stmt)
execute_sql(conn, create_bfv_table)
@@ -1145,27 +1386,103 @@ def createDB(conn, use_ao, num_rows):
execute_sql_arr(conn, _create_index_arr)
execute_sql_arr(conn, _create_bfv_index_arr)
execute_sql_arr(conn, _create_ndv_index_arr)
- if use_ao:
- execute_sql_arr(conn, _create_btree_indexes_ao_arr)
+ execute_sql_arr(conn, _create_btree_indexes_arr)
execute_sql(conn, _analyze_table)
commit_db(conn)
+
def dropDB(conn):
- execute_sql(conn, _drop_tables)
+ execute_sql(conn, _drop_tables)
+
+# smooth statistics for a single integer column uniformly distributed between 1 and row_count, with a given row count and NDV
+#
+# For NDVs of 100 or less, list all of them
+# For NDVs of more than 100, generate a histogram with 100 buckets
+# Set the correlation to 0 for all columns, since the data was shuffled randomly
+def smoothStatisticsForOneCol(conn, table_name, attnum, row_count, ndv):
+ # calculate stadistinct value and ndv, if specified as -1
+ if ndv == -1:
+ stadistinct = -1
+ ndv = row_count
+ else:
+ stadistinct = ndv
+
+ # correlation to physical row ordering is 0 for all columns
+ corr = 0.0
+
+ # stakind: 1 is a list of most common values and frequencies, 2 is a histogram with range buckets
+ stakind = 1
+ # arrays for stanumbers and stavalues
+ stanumbers = []
+ stavalues = []
+ stanumbers_txt = "NULL"
+ num_values = min(ndv, 100)
+
+ if ndv <= 100:
+ # produce "ndv" MCVs, each with the same frequency
+ for i in range(1,num_values+1):
+ stanumbers.append(str(float(1)/ndv))
+ stavalues.append(str(i))
+ stanumbers_txt = "'{ " + ", ".join(stanumbers) + " }'::float[]"
+ else:
+ # produce a uniformly distributed histogram with 100 buckets (101 boundaries)
+ stakind = 2
+ stavalues.append(str(1))
+ for j in range(1,num_values+1):
+ stavalues.append(str((j*ndv) // num_values))
+
+ stavalues_txt = "'{ " + ", ".join(stavalues) + " }'::int[]"
+ execute_sql(conn, _update_pg_stats % (stadistinct, stakind, stanumbers_txt, stavalues_txt, corr, table_name, attnum))
# ensure that we have perfect histogram statistics on the relevant columns
-def smoothStatistics(conn):
+def smoothStatistics(conn, num_fact_table_rows):
+ prev_table_name = ""
if glob_gpdb_major_version > 5:
execute_sql(conn, _allow_system_mods)
else:
execute_sql(conn, _allow_system_mods_v5)
- execute_sql_arr(conn, _fix_statistics)
+ for tup in _stats_cols_to_fix:
+ # note that col_name is just for human readability
+ (table_name, col_name, attnum, ndv, table_rows) = tup
+ if table_rows == -1:
+ table_rows = num_fact_table_rows
+ smoothStatisticsForOneCol(conn, table_name, attnum, table_rows, ndv)
+ if prev_table_name != table_name:
+ prev_table_name = table_name
+ execute_sql(conn, _update_pg_class % (table_rows, table_name))
commit_db(conn)
+def inspectExistingTables(conn):
+ global glob_rowcount
+ global glob_appendonly
+
+ sqlStr = "SELECT count(*) from cal_txtest"
+ curs = dbconn.query(conn, sqlStr)
+
+ rows = curs.fetchall()
+ for row in rows:
+ glob_rowcount = row[0]
+ log_output("Row count of existing fact table is %d" % glob_rowcount)
+
+ sqlStr = "SELECT lower(unnest(reloptions)) from pg_class where relname = 'cal_txtest'"
+ curs = dbconn.query(conn, sqlStr)
+
+ rows = curs.fetchall()
+ for row in rows:
+ if re.search("appendonly", row[0]):
+ glob_appendonly = True
+
+ if glob_appendonly:
+ log_output("Existing fact table is append-only")
+ else:
+ log_output("Existing fact table is not an append-only table")
+
def main():
global glob_verbose
global glob_log_file
+ global glob_rowcount
+
args, parser = parseargs()
if args.logFile != "":
glob_log_file = open(args.logFile, "wt", 1)
@@ -1175,23 +1492,40 @@ def main():
conn = connect(args.host, args.port, args.dbName)
select_version(conn)
if args.create:
+ glob_rowcount = args.numRows
createDB(conn, args.appendOnly, args.numRows)
- smoothStatistics(conn)
+ smoothStatistics(conn, args.numRows)
+ else:
+ inspectExistingTables(conn)
+
for test_unit in args.tests:
if test_unit == "all":
run_bitmap_index_scan_tests(conn, args.execute)
- run_bitmap_index_join_tests(conn, args.execute)
+ if glob_appendonly:
+ # the btree tests are for bitmap scans on AO tables using btree indexes
+ run_btree_ao_index_scan_tests(conn, args.execute)
+ run_index_join_tests(conn, args.execute)
+ # skip the long-running bitmap_ndv_scan_tests and bfv_join_tests
elif test_unit == "bitmap_scan_tests":
run_bitmap_index_scan_tests(conn, args.execute)
- elif test_unit == "bitmap_join_tests":
- run_bitmap_index_join_tests(conn, args.execute)
+ elif test_unit == "bitmap_ndv_scan_tests":
+ run_bitmap_ndv_scan_tests(conn, args.execute)
+ elif test_unit == "btree_ao_scan_tests":
+ run_btree_ao_index_scan_tests(conn, args.execute)
+ elif test_unit == "index_join_tests":
+ run_index_join_tests(conn, args.execute)
+ elif test_unit == "bfv_join_tests":
+ run_bfv_join_tests(conn, args.execute)
elif test_unit == "none":
print("Skipping tests")
-
+
if args.drop:
dropDB(conn)
+
+ conn.close()
if glob_log_file != None:
glob_log_file.close()
-
+
+
if __name__ == "__main__":
main()
diff --git a/src/backend/gporca/server/CMakeLists.txt b/src/backend/gporca/server/CMakeLists.txt
index b27b686c65709badf1160899fc09ae1b76b40011..28b91cc56bb4e2e449b1ea58055fbb141af8f06c 100644
--- a/src/backend/gporca/server/CMakeLists.txt
+++ b/src/backend/gporca/server/CMakeLists.txt
@@ -87,7 +87,7 @@ CTypeModifierTest:
TypeModifierColumn TypeModifierCast TypeModifierConst TypeModifierDoubleMappableConst TypeModifierArrayRef;
CIndexScanTest:
-BTreeIndex-Against-InList BTreeIndex-Against-ScalarSubquery
+BTreeIndex-Against-InList BTreeIndex-Against-InListLarge BTreeIndex-Against-ScalarSubquery
IndexScan-AOTable IndexScan-DroppedColumns IndexScan-BoolTrue IndexScan-BoolFalse
IndexScan-Relabel IndexGet-OuterRefs LogicalIndexGetDroppedCols NewBtreeIndexScanCost
IndexScan-ORPredsNonPart IndexScan-ORPredsAOPart IndexScan-AndedIn;