From dcfeb8429f1f6ca6f9dcf57777fc4a870d3cd566 Mon Sep 17 00:00:00 2001 From: Ashuka Xue Date: Wed, 22 Apr 2020 15:29:15 -0700 Subject: [PATCH] [Refactor] Rename functions for clarity and add DbgPrints Functions renamed: - CHistogram::Buckets -> GetNumBuckets - CHistogram::ParseDXLToBucketsArray -> GetBuckets Implemented DbgPrint for: - CBucket - CHistogram Co-authored-by: Ashuka Xue Co-authored-by: Shreedhar Hardikar --- .../translate/CTranslatorRelcacheToDXL.cpp | 4 +- .../include/naucrates/statistics/CBucket.h | 6 +++ .../include/naucrates/statistics/CHistogram.h | 8 +++- .../libnaucrates/src/statistics/CBucket.cpp | 9 ++++ .../src/statistics/CHistogram.cpp | 45 +++++++++++-------- .../src/statistics/CStatisticsUtils.cpp | 8 ++-- .../dxl/statistics/CHistogramTest.cpp | 16 +++---- .../dxl/statistics/CJoinCardinalityTest.cpp | 4 +- 8 files changed, 64 insertions(+), 36 deletions(-) diff --git a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp index 041cad62ae..2ac082a5b5 100644 --- a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp +++ b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp @@ -2959,7 +2959,7 @@ CTranslatorRelcacheToDXL::TransformStatsToDXLBucketArray num_distinct, hist_freq ); - if (0 == histogram->Buckets()) + if (0 == histogram->GetNumBuckets()) { has_hist = false; } @@ -3165,7 +3165,7 @@ CTranslatorRelcacheToDXL::TransformHistogramToDXLBucketArray ) { CDXLBucketArray *dxl_stats_bucket_array = GPOS_NEW(mp) CDXLBucketArray(mp); - const CBucketArray *buckets = hist->ParseDXLToBucketsArray(); + const CBucketArray *buckets = hist->GetBuckets(); ULONG num_buckets = buckets->Size(); for (ULONG ul = 0; ul < num_buckets; ul++) { diff --git a/src/backend/gporca/libnaucrates/include/naucrates/statistics/CBucket.h b/src/backend/gporca/libnaucrates/include/naucrates/statistics/CBucket.h index 2b675fd718..f3d1954bfd 100644 --- a/src/backend/gporca/libnaucrates/include/naucrates/statistics/CBucket.h +++ b/src/backend/gporca/libnaucrates/include/naucrates/statistics/CBucket.h @@ -13,6 +13,8 @@ #define GPNAUCRATES_CBucket_H #include "gpos/base.h" +#include "gpos/task/CTask.h" +#include "gpos/error/CAutoTrace.h" #include "naucrates/statistics/CPoint.h" #include "naucrates/statistics/IBucket.h" @@ -171,6 +173,10 @@ namespace gpnaucrates virtual IOstream &OsPrint(IOstream &os) const; +#ifdef GPOS_DEBUG + void DbgPrint() const; +#endif + // construct new bucket with lower bound greater than given point CBucket *MakeBucketGreaterThan(CMemoryPool *mp, CPoint *point) const; diff --git a/src/backend/gporca/libnaucrates/include/naucrates/statistics/CHistogram.h b/src/backend/gporca/libnaucrates/include/naucrates/statistics/CHistogram.h index 9058df12ad..9db99615a8 100644 --- a/src/backend/gporca/libnaucrates/include/naucrates/statistics/CHistogram.h +++ b/src/backend/gporca/libnaucrates/include/naucrates/statistics/CHistogram.h @@ -348,13 +348,13 @@ namespace gpnaucrates const; // number of buckets - ULONG Buckets() const + ULONG GetNumBuckets() const { return m_histogram_buckets->Size(); } // buckets accessor - const CBucketArray *ParseDXLToBucketsArray() const + const CBucketArray *GetBuckets() const { return m_histogram_buckets; } @@ -375,6 +375,10 @@ namespace gpnaucrates virtual IOstream &OsPrint(IOstream &os) const; +#ifdef GPOS_DEBUG + void DbgPrint() const; +#endif + // total frequency from buckets and null fraction CDouble GetFrequency() const; diff --git a/src/backend/gporca/libnaucrates/src/statistics/CBucket.cpp b/src/backend/gporca/libnaucrates/src/statistics/CBucket.cpp index 75d6439b59..097f513984 100644 --- a/src/backend/gporca/libnaucrates/src/statistics/CBucket.cpp +++ b/src/backend/gporca/libnaucrates/src/statistics/CBucket.cpp @@ -246,6 +246,15 @@ CBucket::OsPrint return os; } +#ifdef GPOS_DEBUG +void +CBucket::DbgPrint() const +{ + CAutoTrace at(CTask::Self()->Pmp()); + OsPrint(at.Os()); +} +#endif + //--------------------------------------------------------------------------- // @function: // CBucket::MakeBucketGreaterThan diff --git a/src/backend/gporca/libnaucrates/src/statistics/CHistogram.cpp b/src/backend/gporca/libnaucrates/src/statistics/CHistogram.cpp index 74e095ca80..d7f02a8bf5 100644 --- a/src/backend/gporca/libnaucrates/src/statistics/CHistogram.cpp +++ b/src/backend/gporca/libnaucrates/src/statistics/CHistogram.cpp @@ -176,6 +176,15 @@ CHistogram::OsPrint return os; } +#ifdef GPOS_DEBUG +void +CHistogram::DbgPrint() const +{ + CAutoTrace at(CTask::Self()->Pmp()); + OsPrint(at.Os()); +} +#endif + // check if histogram is empty BOOL CHistogram::IsEmpty @@ -973,8 +982,8 @@ CHistogram::MakeLASJHistogram CBucket *candidate_bucket = NULL; - const ULONG buckets1 = Buckets(); - const ULONG buckets2 = histogram->Buckets(); + const ULONG buckets1 = GetNumBuckets(); + const ULONG buckets2 = histogram->GetNumBuckets(); while (idx1 < buckets1 && idx2 < buckets2) { @@ -1041,7 +1050,7 @@ CDouble CHistogram::NormalizeHistogram() { // trivially normalized - if (Buckets() == 0 && CStatistics::Epsilon > m_null_freq && CStatistics::Epsilon > m_distinct_remaining) + if (GetNumBuckets() == 0 && CStatistics::Epsilon > m_null_freq && CStatistics::Epsilon > m_distinct_remaining) { return CDouble(GPOS_FP_ABS_MAX); } @@ -1156,8 +1165,8 @@ CHistogram::MakeJoinHistogramEqualityFilter ULONG idx1 = 0; // index on buckets from this histogram ULONG idx2 = 0; // index on buckets from other histogram - const ULONG buckets1 = Buckets(); - const ULONG buckets2 = histogram->Buckets(); + const ULONG buckets1 = GetNumBuckets(); + const ULONG buckets2 = histogram->GetNumBuckets(); CDouble hist1_buckets_freq(0.0); CDouble hist2_buckets_freq(0.0); @@ -1323,8 +1332,8 @@ CHistogram::CanComputeJoinNDVRemain GPOS_ASSERT(NULL != histogram1); GPOS_ASSERT(NULL != histogram2); - BOOL has_buckets1 = (0 != histogram1->Buckets()); - BOOL has_buckets2 = (0 != histogram2->Buckets()); + BOOL has_buckets1 = (0 != histogram1->GetNumBuckets()); + BOOL has_buckets2 = (0 != histogram2->GetNumBuckets()); BOOL has_distinct_remain1 = CStatistics::Epsilon < histogram1->GetDistinctRemain(); BOOL has_distinct_remain2 = CStatistics::Epsilon < histogram2->GetDistinctRemain(); @@ -1404,14 +1413,14 @@ CHistogram::ComputeJoinNDVRemainInfo CDouble remaining_join_NDVs = std::max(final_join_NDVs - join_NDVs, CDouble(0.0)); // compute the frequency of the non-joining buckets in each input histogram - CDouble freq_buckets1 = CStatisticsUtils::GetFrequency(histogram1->ParseDXLToBucketsArray()); - CDouble freq_buckets2 = CStatisticsUtils::GetFrequency(histogram2->ParseDXLToBucketsArray()); + CDouble freq_buckets1 = CStatisticsUtils::GetFrequency(histogram1->GetBuckets()); + CDouble freq_buckets2 = CStatisticsUtils::GetFrequency(histogram2->GetBuckets()); CDouble freq_non_join_buckets1 = std::max(CDouble(0), (freq_buckets1 - hist1_buckets_freq)); CDouble freq_non_join_buckets2 = std::max(CDouble(0), (freq_buckets2 - hist2_buckets_freq)); // compute the NDV of the non-joining buckets - CDouble NDVs_non_join_buckets1 = CStatisticsUtils::GetNumDistinct(histogram1->ParseDXLToBucketsArray()) - join_NDVs; - CDouble NDVs_non_join_buckets2 = CStatisticsUtils::GetNumDistinct(histogram2->ParseDXLToBucketsArray()) - join_NDVs; + CDouble NDVs_non_join_buckets1 = CStatisticsUtils::GetNumDistinct(histogram1->GetBuckets()) - join_NDVs; + CDouble NDVs_non_join_buckets2 = CStatisticsUtils::GetNumDistinct(histogram2->GetBuckets()) - join_NDVs; CDouble freq_remain1 = histogram1->GetFreqRemain(); CDouble freq_remain2 = histogram2->GetFreqRemain(); @@ -1554,8 +1563,8 @@ CHistogram::MakeUnionAllHistogramNormalize } } - const ULONG buckets1 = Buckets(); - const ULONG buckets2 = histogram->Buckets(); + const ULONG buckets1 = GetNumBuckets(); + const ULONG buckets2 = histogram->GetNumBuckets(); GPOS_ASSERT_IFF(NULL == bucket1, idx1 == buckets1); GPOS_ASSERT_IFF(NULL == bucket2, idx2 == buckets2); @@ -1751,8 +1760,8 @@ CHistogram::MakeUnionHistogramNormalize } } - const ULONG buckets1 = Buckets(); - const ULONG buckets2 = other_histogram->Buckets(); + const ULONG buckets1 = GetNumBuckets(); + const ULONG buckets2 = other_histogram->GetNumBuckets(); GPOS_ASSERT_IFF(NULL == bucket1, idx1 == buckets1); GPOS_ASSERT_IFF(NULL == bucket2, idx2 == buckets2); @@ -1880,7 +1889,7 @@ CHistogram::operator [] ) const { - if (pos < Buckets()) + if (pos < GetNumBuckets()) { return (*m_histogram_buckets) [pos]; } @@ -2067,13 +2076,13 @@ CHistogram::DoNDVBasedCardEstimation { GPOS_ASSERT(NULL != histogram); - if (0 == histogram->Buckets()) + if (0 == histogram->GetNumBuckets()) { // no buckets, so join cardinality estimation is based solely on NDV remain return true; } - const IBucket *bucket = (*histogram->ParseDXLToBucketsArray())[0]; + const IBucket *bucket = (*histogram->GetBuckets())[0]; IDatum *datum = bucket->GetLowerBound()->GetDatum(); diff --git a/src/backend/gporca/libnaucrates/src/statistics/CStatisticsUtils.cpp b/src/backend/gporca/libnaucrates/src/statistics/CStatisticsUtils.cpp index 1cc32f3e66..9f62498b60 100644 --- a/src/backend/gporca/libnaucrates/src/statistics/CStatisticsUtils.cpp +++ b/src/backend/gporca/libnaucrates/src/statistics/CStatisticsUtils.cpp @@ -188,11 +188,11 @@ CStatisticsUtils::MergeMCVHist GPOS_ASSERT(NULL != histogram); GPOS_ASSERT(mcv_histogram->IsWellDefined()); GPOS_ASSERT(histogram->IsWellDefined()); - GPOS_ASSERT(0 < mcv_histogram->Buckets()); - GPOS_ASSERT(0 < histogram->Buckets()); + GPOS_ASSERT(0 < mcv_histogram->GetNumBuckets()); + GPOS_ASSERT(0 < histogram->GetNumBuckets()); - const CBucketArray *mcv_buckets = mcv_histogram->ParseDXLToBucketsArray(); - const CBucketArray *histogram_buckets = histogram->ParseDXLToBucketsArray(); + const CBucketArray *mcv_buckets = mcv_histogram->GetBuckets(); + const CBucketArray *histogram_buckets = histogram->GetBuckets(); IDatum *datum = (*mcv_buckets)[0]->GetLowerBound()->GetDatum(); diff --git a/src/backend/gporca/server/src/unittest/dxl/statistics/CHistogramTest.cpp b/src/backend/gporca/server/src/unittest/dxl/statistics/CHistogramTest.cpp index 39a9faf87f..6191c079f6 100644 --- a/src/backend/gporca/server/src/unittest/dxl/statistics/CHistogramTest.cpp +++ b/src/backend/gporca/server/src/unittest/dxl/statistics/CHistogramTest.cpp @@ -71,24 +71,24 @@ CHistogramTest::EresUnittest_CHistogramInt4() CPoint *ppoint0 = CTestUtils::PpointInt4(mp, 9); CHistogram *phist0 = histogram->MakeHistogramFilter(CStatsPred::EstatscmptG, ppoint0); CCardinalityTestUtils::PrintHist(mp, "phist0", phist0); - GPOS_RTL_ASSERT(phist0->Buckets() == 9); + GPOS_RTL_ASSERT(phist0->GetNumBuckets() == 9); CPoint *point1 = CTestUtils::PpointInt4(mp, 35); CHistogram *histogram1 = histogram->MakeHistogramFilter(CStatsPred::EstatscmptL, point1); CCardinalityTestUtils::PrintHist(mp, "histogram1", histogram1); - GPOS_RTL_ASSERT(histogram1->Buckets() == 4); + GPOS_RTL_ASSERT(histogram1->GetNumBuckets() == 4); // edge case where point is equal to upper bound CPoint *point2 = CTestUtils::PpointInt4(mp, 50); CHistogram *histogram2 = histogram->MakeHistogramFilter(CStatsPred::EstatscmptL,point2); CCardinalityTestUtils::PrintHist(mp, "histogram2", histogram2); - GPOS_RTL_ASSERT(histogram2->Buckets() == 5); + GPOS_RTL_ASSERT(histogram2->GetNumBuckets() == 5); // equality check CPoint *point3 = CTestUtils::PpointInt4(mp, 100); CHistogram *phist3 = histogram->MakeHistogramFilter(CStatsPred::EstatscmptEq, point3); CCardinalityTestUtils::PrintHist(mp, "phist3", phist3); - GPOS_RTL_ASSERT(phist3->Buckets() == 1); + GPOS_RTL_ASSERT(phist3->GetNumBuckets() == 1); // normalized output after filter CPoint *ppoint4 = CTestUtils::PpointInt4(mp, 100); @@ -100,12 +100,12 @@ CHistogramTest::EresUnittest_CHistogramInt4() // lasj CHistogram *phist5 = histogram->MakeLASJHistogram(CStatsPred::EstatscmptEq, histogram2); CCardinalityTestUtils::PrintHist(mp, "phist5", phist5); - GPOS_RTL_ASSERT(phist5->Buckets() == 5); + GPOS_RTL_ASSERT(phist5->GetNumBuckets() == 5); // inequality check CHistogram *phist6 = histogram->MakeHistogramFilter(CStatsPred::EstatscmptNEq, point2); CCardinalityTestUtils::PrintHist(mp, "phist6", phist6); - GPOS_RTL_ASSERT(phist6->Buckets() == 10); + GPOS_RTL_ASSERT(phist6->GetNumBuckets() == 10); // histogram with null fraction and remaining tuples CHistogram *phist7 = PhistExampleInt4Remain(mp); @@ -125,7 +125,7 @@ CHistogramTest::EresUnittest_CHistogramInt4() // equality join, hitting remaining tuples CHistogram *phist10 = phist7->MakeJoinHistogram(CStatsPred::EstatscmptEq, phist7); - GPOS_RTL_ASSERT(phist10->Buckets() == 5); + GPOS_RTL_ASSERT(phist10->GetNumBuckets() == 5); GPOS_RTL_ASSERT(fabs((phist10->GetDistinctRemain() - 2.0).Get()) < CStatistics::Epsilon); GPOS_RTL_ASSERT(fabs((phist10->GetFreqRemain() - 0.08).Get()) < CStatistics::Epsilon); @@ -173,7 +173,7 @@ CHistogramTest::EresUnittest_CHistogramBool() CDouble scale_factor(0.0); CHistogram *histogram1 = histogram->MakeHistogramFilterNormalize(CStatsPred::EstatscmptEq, point1, &scale_factor); CCardinalityTestUtils::PrintHist(mp, "histogram1", histogram1); - GPOS_RTL_ASSERT(histogram1->Buckets() == 1); + GPOS_RTL_ASSERT(histogram1->GetNumBuckets() == 1); // clean up point1->Release(); diff --git a/src/backend/gporca/server/src/unittest/dxl/statistics/CJoinCardinalityTest.cpp b/src/backend/gporca/server/src/unittest/dxl/statistics/CJoinCardinalityTest.cpp index f5c8a6241a..082d9d9adf 100644 --- a/src/backend/gporca/server/src/unittest/dxl/statistics/CJoinCardinalityTest.cpp +++ b/src/backend/gporca/server/src/unittest/dxl/statistics/CJoinCardinalityTest.cpp @@ -160,11 +160,11 @@ CJoinCardinalityTest::EresUnittest_JoinNDVRemain() CDouble dNDVRemainJoin = elem.m_dNDVRemainJoin; CDouble dFreqRemainJoin = elem.m_dFreqRemainJoin; - CDouble dDiffNDVJoin(fabs((dNDVBucketsJoin - CStatisticsUtils::GetNumDistinct(join_histogram->ParseDXLToBucketsArray())).Get())); + CDouble dDiffNDVJoin(fabs((dNDVBucketsJoin - CStatisticsUtils::GetNumDistinct(join_histogram->GetBuckets())).Get())); CDouble dDiffNDVRemainJoin(fabs((dNDVRemainJoin - join_histogram->GetDistinctRemain()).Get())); CDouble dDiffFreqRemainJoin(fabs((dFreqRemainJoin - join_histogram->GetFreqRemain()).Get())); - if (join_histogram->Buckets() != ulBucketsJoin || (dDiffNDVJoin > CStatistics::Epsilon) + if (join_histogram->GetNumBuckets() != ulBucketsJoin || (dDiffNDVJoin > CStatistics::Epsilon) || (dDiffNDVRemainJoin > CStatistics::Epsilon) || (dDiffFreqRemainJoin > CStatistics::Epsilon)) { eres = GPOS_FAILED; -- GitLab