diff --git a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp index d85bac99bedb569e1cc9c7f083f6adfdcee18c38..f252f2d75c5b2e2b0c011e591b65aeeada1b565f 100644 --- a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp +++ b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp @@ -2312,14 +2312,11 @@ CTranslatorRelcacheToDXL::PimdobjRelStats return pdxlrelstats; } -//--------------------------------------------------------------------------- -// @function: -// CTranslatorRelcacheToDXL::PimdobjColStats -// -// @doc: -// Retrieve column statistics from relcache -// -//--------------------------------------------------------------------------- +// Retrieve column statistics from relcache +// If all statistics are missing, create dummy statistics +// Also, if the statistics are broken, create dummy statistics +// However, if any statistics are present and not broken, +// create column statistics using these statistics IMDCacheObject * CTranslatorRelcacheToDXL::PimdobjColStats ( @@ -2392,42 +2389,6 @@ CTranslatorRelcacheToDXL::PimdobjColStats return CDXLColStats::PdxlcolstatsDummy(pmp, pmdidColStats, pmdnameCol, dWidth); } - // histogram values extracted from the pg_statistic tuple for a given column - AttStatsSlot histSlot; - - // most common values and their frequencies extracted from the pg_statistic - // tuple for a given column - AttStatsSlot mcvSlot; - - (void) gpdb::FGetAttrStatsSlot - ( - &mcvSlot, - heaptupleStats, - STATISTIC_KIND_MCV, - InvalidOid, - ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS - ); - - if (mcvSlot.nvalues != mcvSlot.nnumbers) - { - // if the number of MCVs(nvalues) and number of MCFs(nnumbers) do not match, we discard the MCVs and MCFs - gpdb::FreeAttrStatsSlot(&mcvSlot); - mcvSlot.numbers = NULL; - mcvSlot.values = NULL; - mcvSlot.values_arr = NULL; - mcvSlot.numbers_arr = NULL; - mcvSlot.nnumbers = 0; - mcvSlot.nvalues = 0; - - char msgbuf[NAMEDATALEN * 2 + 100]; - snprintf(msgbuf, sizeof(msgbuf), "The number of most common values and frequencies do not match on column %ls of table %ls.", - pmdcol->Mdname().Pstr()->Wsz(), pmdrel->Mdname().Pstr()->Wsz()); - GpdbEreport(ERRCODE_SUCCESSFUL_COMPLETION, - LOG, - msgbuf, - NULL); - } - Form_pg_statistic fpsStats = (Form_pg_statistic) GETSTRUCT(heaptupleStats); // null frequency and NDV @@ -2439,9 +2400,6 @@ CTranslatorRelcacheToDXL::PimdobjColStats iNullNDV = 1; } - // fix mcv and null frequencies (sometimes they can add up to more than 1.0) - NormalizeFrequencies(mcvSlot.numbers, (ULONG) mcvSlot.nvalues, &dNullFrequency); - // column width CDouble dWidth = CDouble(fpsStats->stawidth); @@ -2458,13 +2416,63 @@ CTranslatorRelcacheToDXL::PimdobjColStats } dDistinct = dDistinct.FpCeil(); - // total MCV frequency - CDouble dMCFSum = 0.0; - for (int i = 0; i < mcvSlot.nvalues; i++) + BOOL fDummyStats = false; + // most common values and their frequencies extracted from the pg_statistic + // tuple for a given column + AttStatsSlot mcvSlot; + + (void) gpdb::FGetAttrStatsSlot + ( + &mcvSlot, + heaptupleStats, + STATISTIC_KIND_MCV, + InvalidOid, + ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS + ); + if (InvalidOid != mcvSlot.valuetype && mcvSlot.valuetype != oidAttType) + { + char msgbuf[NAMEDATALEN * 2 + 100]; + snprintf(msgbuf, sizeof(msgbuf), "Type mismatch between attribute %ls of table %ls having type %d and statistic having type %d, please ANALYZE the table again", + pmdcol->Mdname().Pstr()->Wsz(), pmdrel->Mdname().Pstr()->Wsz(), oidAttType, mcvSlot.valuetype); + GpdbEreport(ERRCODE_SUCCESSFUL_COMPLETION, + NOTICE, + msgbuf, + NULL); + + gpdb::FreeAttrStatsSlot(&mcvSlot); + fDummyStats = true; + } + + else if (mcvSlot.nvalues != mcvSlot.nnumbers) + { + char msgbuf[NAMEDATALEN * 2 + 100]; + snprintf(msgbuf, sizeof(msgbuf), "The number of most common values and frequencies do not match on column %ls of table %ls.", + pmdcol->Mdname().Pstr()->Wsz(), pmdrel->Mdname().Pstr()->Wsz()); + GpdbEreport(ERRCODE_SUCCESSFUL_COMPLETION, + NOTICE, + msgbuf, + NULL); + + // if the number of MCVs(nvalues) and number of MCFs(nnumbers) do not match, we discard the MCVs and MCFs + gpdb::FreeAttrStatsSlot(&mcvSlot); + fDummyStats = true; + } + else { - dMCFSum = dMCFSum + CDouble(mcvSlot.numbers[i]); + // fix mcv and null frequencies (sometimes they can add up to more than 1.0) + NormalizeFrequencies(mcvSlot.numbers, (ULONG) mcvSlot.nvalues, &dNullFrequency); + + // total MCV frequency + CDouble dMCFSum = 0.0; + for (int i = 0; i < mcvSlot.nvalues; i++) + { + dMCFSum = dMCFSum + CDouble(mcvSlot.numbers[i]); + } } + // histogram values extracted from the pg_statistic tuple for a given column + AttStatsSlot histSlot; + // get histogram datums from pg_statistic entry (void) gpdb::FGetAttrStatsSlot ( @@ -2475,6 +2483,30 @@ CTranslatorRelcacheToDXL::PimdobjColStats ATTSTATSSLOT_VALUES ); + if (InvalidOid != histSlot.valuetype && histSlot.valuetype != oidAttType) + { + char msgbuf[NAMEDATALEN * 2 + 100]; + snprintf(msgbuf, sizeof(msgbuf), "Type mismatch between attribute %ls of table %ls having type %d and statistic having type %d, please ANALYZE the table again", + pmdcol->Mdname().Pstr()->Wsz(), pmdrel->Mdname().Pstr()->Wsz(), oidAttType, histSlot.valuetype); + GpdbEreport(ERRCODE_SUCCESSFUL_COMPLETION, + NOTICE, + msgbuf, + NULL); + + gpdb::FreeAttrStatsSlot(&histSlot); + fDummyStats = true; + } + + if (fDummyStats) + { + pdrgpdxlbucket->Release(); + pmdidColStats->AddRef(); + + CDouble dWidth = CStatistics::DDefaultColumnWidth; + gpdb::FreeHeapTuple(heaptupleStats); + return CDXLColStats::PdxlcolstatsDummy(pmp, pmdidColStats, pmdnameCol, dWidth); + } + CDouble dNDVBuckets(0.0); CDouble dFreqBuckets(0.0); CDouble dDistinctRemain(0.0); diff --git a/src/test/regress/expected/bfv_statistic.out b/src/test/regress/expected/bfv_statistic.out index cee4632939d678b529f70eba7aba711486959b24..b796b17b1f94cc69a9ddafbac88189f263e739e0 100644 --- a/src/test/regress/expected/bfv_statistic.out +++ b/src/test/regress/expected/bfv_statistic.out @@ -440,7 +440,11 @@ INSERT INTO test_broken_stats VALUES(1, 'abc'), (2, 'cde'), (3, 'efg'), (3, 'efg ANALYZE test_broken_stats; SET allow_system_table_mods='DML'; -- Simulate broken stats by changing the data type of MCV slot to a different type than in pg_attribute -UPDATE pg_statistic SET stavalues1='{1,2,3}'::int[] WHERE starelid ='bfv_statistic.test_broken_stats'::regclass AND staattnum=2; +-- Broken MCVs +UPDATE pg_statistic SET stavalues1='{1,2,3}'::int[] WHERE starelid ='test_broken_stats'::regclass AND staattnum=2; +-- Broken histogram +UPDATE pg_statistic SET stakind2=2 WHERE starelid ='test_broken_stats'::regclass AND staattnum=2; +UPDATE pg_statistic SET stavalues2='{1,2,3}'::int[] WHERE starelid ='test_broken_stats'::regclass AND staattnum=2 and stakind2=2; SELECT * FROM test_broken_stats t1, good_tab t2 WHERE t1.b = t2.b; a | b | a | b ---+---+---+--- diff --git a/src/test/regress/expected/bfv_statistic_optimizer.out b/src/test/regress/expected/bfv_statistic_optimizer.out index 6b3817a0c685dc9e1ae60207208ecfb34a3a118c..7fede1857dd2c791e6f859d93fb2b2f3962abccb 100644 --- a/src/test/regress/expected/bfv_statistic_optimizer.out +++ b/src/test/regress/expected/bfv_statistic_optimizer.out @@ -57,6 +57,9 @@ set allow_system_table_mods=DML; update pg_statistic set stavalues1='{6,3,1,5,4,2}'::int[] where starelid='bfv_statistics_foo2'::regclass; -- excercise the translator explain select a from bfv_statistics_foo2 where a > 1 order by a; +NOTICE: The number of most common values and frequencies do not match on column a of table bfv_statistics_foo2. +NOTICE: One or more columns in the following table(s) do not have statistics: bfv_statistics_foo2 +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. QUERY PLAN ------------------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=8 width=4) @@ -440,8 +443,16 @@ INSERT INTO test_broken_stats VALUES(1, 'abc'), (2, 'cde'), (3, 'efg'), (3, 'efg ANALYZE test_broken_stats; SET allow_system_table_mods='DML'; -- Simulate broken stats by changing the data type of MCV slot to a different type than in pg_attribute -UPDATE pg_statistic SET stavalues1='{1,2,3}'::int[] WHERE starelid ='bfv_statistic.test_broken_stats'::regclass AND staattnum=2; +-- Broken MCVs +UPDATE pg_statistic SET stavalues1='{1,2,3}'::int[] WHERE starelid ='test_broken_stats'::regclass AND staattnum=2; +-- Broken histogram +UPDATE pg_statistic SET stakind2=2 WHERE starelid ='test_broken_stats'::regclass AND staattnum=2; +UPDATE pg_statistic SET stavalues2='{1,2,3}'::int[] WHERE starelid ='test_broken_stats'::regclass AND staattnum=2 and stakind2=2; SELECT * FROM test_broken_stats t1, good_tab t2 WHERE t1.b = t2.b; +NOTICE: Type mismatch between attribute b of table test_broken_stats having type 25 and statistic having type 23, please ANALYZE the table again +NOTICE: Type mismatch between attribute b of table test_broken_stats having type 25 and statistic having type 23, please ANALYZE the table again +NOTICE: One or more columns in the following table(s) do not have statistics: test_broken_stats +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. a | b | a | b ---+---+---+--- (0 rows) diff --git a/src/test/regress/sql/bfv_statistic.sql b/src/test/regress/sql/bfv_statistic.sql index 693fd963e391e3d8496f5062c3bdfa59ddb4e884..f95bcda9cdbd0dad37e4273079f6d41eae5ba2ef 100644 --- a/src/test/regress/sql/bfv_statistic.sql +++ b/src/test/regress/sql/bfv_statistic.sql @@ -272,7 +272,11 @@ INSERT INTO test_broken_stats VALUES(1, 'abc'), (2, 'cde'), (3, 'efg'), (3, 'efg ANALYZE test_broken_stats; SET allow_system_table_mods='DML'; -- Simulate broken stats by changing the data type of MCV slot to a different type than in pg_attribute -UPDATE pg_statistic SET stavalues1='{1,2,3}'::int[] WHERE starelid ='bfv_statistic.test_broken_stats'::regclass AND staattnum=2; +-- Broken MCVs +UPDATE pg_statistic SET stavalues1='{1,2,3}'::int[] WHERE starelid ='test_broken_stats'::regclass AND staattnum=2; +-- Broken histogram +UPDATE pg_statistic SET stakind2=2 WHERE starelid ='test_broken_stats'::regclass AND staattnum=2; +UPDATE pg_statistic SET stavalues2='{1,2,3}'::int[] WHERE starelid ='test_broken_stats'::regclass AND staattnum=2 and stakind2=2; SELECT * FROM test_broken_stats t1, good_tab t2 WHERE t1.b = t2.b;