提交 dcfeb842 编写于 作者: A Ashuka Xue

[Refactor] Rename functions for clarity and add DbgPrints

Functions renamed:
- CHistogram::Buckets -> GetNumBuckets
- CHistogram::ParseDXLToBucketsArray -> GetBuckets

Implemented DbgPrint for:
- CBucket
- CHistogram
Co-authored-by: NAshuka Xue <axue@pivotal.io>
Co-authored-by: NShreedhar Hardikar <shardikar@pivotal.io>
上级 7a665560
......@@ -2959,7 +2959,7 @@ CTranslatorRelcacheToDXL::TransformStatsToDXLBucketArray
num_distinct,
hist_freq
);
if (0 == histogram->Buckets())
if (0 == histogram->GetNumBuckets())
{
has_hist = false;
}
......@@ -3165,7 +3165,7 @@ CTranslatorRelcacheToDXL::TransformHistogramToDXLBucketArray
)
{
CDXLBucketArray *dxl_stats_bucket_array = GPOS_NEW(mp) CDXLBucketArray(mp);
const CBucketArray *buckets = hist->ParseDXLToBucketsArray();
const CBucketArray *buckets = hist->GetBuckets();
ULONG num_buckets = buckets->Size();
for (ULONG ul = 0; ul < num_buckets; ul++)
{
......
......@@ -13,6 +13,8 @@
#define GPNAUCRATES_CBucket_H
#include "gpos/base.h"
#include "gpos/task/CTask.h"
#include "gpos/error/CAutoTrace.h"
#include "naucrates/statistics/CPoint.h"
#include "naucrates/statistics/IBucket.h"
......@@ -171,6 +173,10 @@ namespace gpnaucrates
virtual
IOstream &OsPrint(IOstream &os) const;
#ifdef GPOS_DEBUG
void DbgPrint() const;
#endif
// construct new bucket with lower bound greater than given point
CBucket *MakeBucketGreaterThan(CMemoryPool *mp, CPoint *point) const;
......
......@@ -348,13 +348,13 @@ namespace gpnaucrates
const;
// number of buckets
ULONG Buckets() const
ULONG GetNumBuckets() const
{
return m_histogram_buckets->Size();
}
// buckets accessor
const CBucketArray *ParseDXLToBucketsArray() const
const CBucketArray *GetBuckets() const
{
return m_histogram_buckets;
}
......@@ -375,6 +375,10 @@ namespace gpnaucrates
virtual
IOstream &OsPrint(IOstream &os) const;
#ifdef GPOS_DEBUG
void DbgPrint() const;
#endif
// total frequency from buckets and null fraction
CDouble GetFrequency() const;
......
......@@ -246,6 +246,15 @@ CBucket::OsPrint
return os;
}
#ifdef GPOS_DEBUG
void
CBucket::DbgPrint() const
{
CAutoTrace at(CTask::Self()->Pmp());
OsPrint(at.Os());
}
#endif
//---------------------------------------------------------------------------
// @function:
// CBucket::MakeBucketGreaterThan
......
......@@ -176,6 +176,15 @@ CHistogram::OsPrint
return os;
}
#ifdef GPOS_DEBUG
void
CHistogram::DbgPrint() const
{
CAutoTrace at(CTask::Self()->Pmp());
OsPrint(at.Os());
}
#endif
// check if histogram is empty
BOOL
CHistogram::IsEmpty
......@@ -973,8 +982,8 @@ CHistogram::MakeLASJHistogram
CBucket *candidate_bucket = NULL;
const ULONG buckets1 = Buckets();
const ULONG buckets2 = histogram->Buckets();
const ULONG buckets1 = GetNumBuckets();
const ULONG buckets2 = histogram->GetNumBuckets();
while (idx1 < buckets1 && idx2 < buckets2)
{
......@@ -1041,7 +1050,7 @@ CDouble
CHistogram::NormalizeHistogram()
{
// trivially normalized
if (Buckets() == 0 && CStatistics::Epsilon > m_null_freq && CStatistics::Epsilon > m_distinct_remaining)
if (GetNumBuckets() == 0 && CStatistics::Epsilon > m_null_freq && CStatistics::Epsilon > m_distinct_remaining)
{
return CDouble(GPOS_FP_ABS_MAX);
}
......@@ -1156,8 +1165,8 @@ CHistogram::MakeJoinHistogramEqualityFilter
ULONG idx1 = 0; // index on buckets from this histogram
ULONG idx2 = 0; // index on buckets from other histogram
const ULONG buckets1 = Buckets();
const ULONG buckets2 = histogram->Buckets();
const ULONG buckets1 = GetNumBuckets();
const ULONG buckets2 = histogram->GetNumBuckets();
CDouble hist1_buckets_freq(0.0);
CDouble hist2_buckets_freq(0.0);
......@@ -1323,8 +1332,8 @@ CHistogram::CanComputeJoinNDVRemain
GPOS_ASSERT(NULL != histogram1);
GPOS_ASSERT(NULL != histogram2);
BOOL has_buckets1 = (0 != histogram1->Buckets());
BOOL has_buckets2 = (0 != histogram2->Buckets());
BOOL has_buckets1 = (0 != histogram1->GetNumBuckets());
BOOL has_buckets2 = (0 != histogram2->GetNumBuckets());
BOOL has_distinct_remain1 = CStatistics::Epsilon < histogram1->GetDistinctRemain();
BOOL has_distinct_remain2 = CStatistics::Epsilon < histogram2->GetDistinctRemain();
......@@ -1404,14 +1413,14 @@ CHistogram::ComputeJoinNDVRemainInfo
CDouble remaining_join_NDVs = std::max(final_join_NDVs - join_NDVs, CDouble(0.0));
// compute the frequency of the non-joining buckets in each input histogram
CDouble freq_buckets1 = CStatisticsUtils::GetFrequency(histogram1->ParseDXLToBucketsArray());
CDouble freq_buckets2 = CStatisticsUtils::GetFrequency(histogram2->ParseDXLToBucketsArray());
CDouble freq_buckets1 = CStatisticsUtils::GetFrequency(histogram1->GetBuckets());
CDouble freq_buckets2 = CStatisticsUtils::GetFrequency(histogram2->GetBuckets());
CDouble freq_non_join_buckets1 = std::max(CDouble(0), (freq_buckets1 - hist1_buckets_freq));
CDouble freq_non_join_buckets2 = std::max(CDouble(0), (freq_buckets2 - hist2_buckets_freq));
// compute the NDV of the non-joining buckets
CDouble NDVs_non_join_buckets1 = CStatisticsUtils::GetNumDistinct(histogram1->ParseDXLToBucketsArray()) - join_NDVs;
CDouble NDVs_non_join_buckets2 = CStatisticsUtils::GetNumDistinct(histogram2->ParseDXLToBucketsArray()) - join_NDVs;
CDouble NDVs_non_join_buckets1 = CStatisticsUtils::GetNumDistinct(histogram1->GetBuckets()) - join_NDVs;
CDouble NDVs_non_join_buckets2 = CStatisticsUtils::GetNumDistinct(histogram2->GetBuckets()) - join_NDVs;
CDouble freq_remain1 = histogram1->GetFreqRemain();
CDouble freq_remain2 = histogram2->GetFreqRemain();
......@@ -1554,8 +1563,8 @@ CHistogram::MakeUnionAllHistogramNormalize
}
}
const ULONG buckets1 = Buckets();
const ULONG buckets2 = histogram->Buckets();
const ULONG buckets1 = GetNumBuckets();
const ULONG buckets2 = histogram->GetNumBuckets();
GPOS_ASSERT_IFF(NULL == bucket1, idx1 == buckets1);
GPOS_ASSERT_IFF(NULL == bucket2, idx2 == buckets2);
......@@ -1751,8 +1760,8 @@ CHistogram::MakeUnionHistogramNormalize
}
}
const ULONG buckets1 = Buckets();
const ULONG buckets2 = other_histogram->Buckets();
const ULONG buckets1 = GetNumBuckets();
const ULONG buckets2 = other_histogram->GetNumBuckets();
GPOS_ASSERT_IFF(NULL == bucket1, idx1 == buckets1);
GPOS_ASSERT_IFF(NULL == bucket2, idx2 == buckets2);
......@@ -1880,7 +1889,7 @@ CHistogram::operator []
)
const
{
if (pos < Buckets())
if (pos < GetNumBuckets())
{
return (*m_histogram_buckets) [pos];
}
......@@ -2067,13 +2076,13 @@ CHistogram::DoNDVBasedCardEstimation
{
GPOS_ASSERT(NULL != histogram);
if (0 == histogram->Buckets())
if (0 == histogram->GetNumBuckets())
{
// no buckets, so join cardinality estimation is based solely on NDV remain
return true;
}
const IBucket *bucket = (*histogram->ParseDXLToBucketsArray())[0];
const IBucket *bucket = (*histogram->GetBuckets())[0];
IDatum *datum = bucket->GetLowerBound()->GetDatum();
......
......@@ -188,11 +188,11 @@ CStatisticsUtils::MergeMCVHist
GPOS_ASSERT(NULL != histogram);
GPOS_ASSERT(mcv_histogram->IsWellDefined());
GPOS_ASSERT(histogram->IsWellDefined());
GPOS_ASSERT(0 < mcv_histogram->Buckets());
GPOS_ASSERT(0 < histogram->Buckets());
GPOS_ASSERT(0 < mcv_histogram->GetNumBuckets());
GPOS_ASSERT(0 < histogram->GetNumBuckets());
const CBucketArray *mcv_buckets = mcv_histogram->ParseDXLToBucketsArray();
const CBucketArray *histogram_buckets = histogram->ParseDXLToBucketsArray();
const CBucketArray *mcv_buckets = mcv_histogram->GetBuckets();
const CBucketArray *histogram_buckets = histogram->GetBuckets();
IDatum *datum = (*mcv_buckets)[0]->GetLowerBound()->GetDatum();
......
......@@ -71,24 +71,24 @@ CHistogramTest::EresUnittest_CHistogramInt4()
CPoint *ppoint0 = CTestUtils::PpointInt4(mp, 9);
CHistogram *phist0 = histogram->MakeHistogramFilter(CStatsPred::EstatscmptG, ppoint0);
CCardinalityTestUtils::PrintHist(mp, "phist0", phist0);
GPOS_RTL_ASSERT(phist0->Buckets() == 9);
GPOS_RTL_ASSERT(phist0->GetNumBuckets() == 9);
CPoint *point1 = CTestUtils::PpointInt4(mp, 35);
CHistogram *histogram1 = histogram->MakeHistogramFilter(CStatsPred::EstatscmptL, point1);
CCardinalityTestUtils::PrintHist(mp, "histogram1", histogram1);
GPOS_RTL_ASSERT(histogram1->Buckets() == 4);
GPOS_RTL_ASSERT(histogram1->GetNumBuckets() == 4);
// edge case where point is equal to upper bound
CPoint *point2 = CTestUtils::PpointInt4(mp, 50);
CHistogram *histogram2 = histogram->MakeHistogramFilter(CStatsPred::EstatscmptL,point2);
CCardinalityTestUtils::PrintHist(mp, "histogram2", histogram2);
GPOS_RTL_ASSERT(histogram2->Buckets() == 5);
GPOS_RTL_ASSERT(histogram2->GetNumBuckets() == 5);
// equality check
CPoint *point3 = CTestUtils::PpointInt4(mp, 100);
CHistogram *phist3 = histogram->MakeHistogramFilter(CStatsPred::EstatscmptEq, point3);
CCardinalityTestUtils::PrintHist(mp, "phist3", phist3);
GPOS_RTL_ASSERT(phist3->Buckets() == 1);
GPOS_RTL_ASSERT(phist3->GetNumBuckets() == 1);
// normalized output after filter
CPoint *ppoint4 = CTestUtils::PpointInt4(mp, 100);
......@@ -100,12 +100,12 @@ CHistogramTest::EresUnittest_CHistogramInt4()
// lasj
CHistogram *phist5 = histogram->MakeLASJHistogram(CStatsPred::EstatscmptEq, histogram2);
CCardinalityTestUtils::PrintHist(mp, "phist5", phist5);
GPOS_RTL_ASSERT(phist5->Buckets() == 5);
GPOS_RTL_ASSERT(phist5->GetNumBuckets() == 5);
// inequality check
CHistogram *phist6 = histogram->MakeHistogramFilter(CStatsPred::EstatscmptNEq, point2);
CCardinalityTestUtils::PrintHist(mp, "phist6", phist6);
GPOS_RTL_ASSERT(phist6->Buckets() == 10);
GPOS_RTL_ASSERT(phist6->GetNumBuckets() == 10);
// histogram with null fraction and remaining tuples
CHistogram *phist7 = PhistExampleInt4Remain(mp);
......@@ -125,7 +125,7 @@ CHistogramTest::EresUnittest_CHistogramInt4()
// equality join, hitting remaining tuples
CHistogram *phist10 = phist7->MakeJoinHistogram(CStatsPred::EstatscmptEq, phist7);
GPOS_RTL_ASSERT(phist10->Buckets() == 5);
GPOS_RTL_ASSERT(phist10->GetNumBuckets() == 5);
GPOS_RTL_ASSERT(fabs((phist10->GetDistinctRemain() - 2.0).Get()) < CStatistics::Epsilon);
GPOS_RTL_ASSERT(fabs((phist10->GetFreqRemain() - 0.08).Get()) < CStatistics::Epsilon);
......@@ -173,7 +173,7 @@ CHistogramTest::EresUnittest_CHistogramBool()
CDouble scale_factor(0.0);
CHistogram *histogram1 = histogram->MakeHistogramFilterNormalize(CStatsPred::EstatscmptEq, point1, &scale_factor);
CCardinalityTestUtils::PrintHist(mp, "histogram1", histogram1);
GPOS_RTL_ASSERT(histogram1->Buckets() == 1);
GPOS_RTL_ASSERT(histogram1->GetNumBuckets() == 1);
// clean up
point1->Release();
......
......@@ -160,11 +160,11 @@ CJoinCardinalityTest::EresUnittest_JoinNDVRemain()
CDouble dNDVRemainJoin = elem.m_dNDVRemainJoin;
CDouble dFreqRemainJoin = elem.m_dFreqRemainJoin;
CDouble dDiffNDVJoin(fabs((dNDVBucketsJoin - CStatisticsUtils::GetNumDistinct(join_histogram->ParseDXLToBucketsArray())).Get()));
CDouble dDiffNDVJoin(fabs((dNDVBucketsJoin - CStatisticsUtils::GetNumDistinct(join_histogram->GetBuckets())).Get()));
CDouble dDiffNDVRemainJoin(fabs((dNDVRemainJoin - join_histogram->GetDistinctRemain()).Get()));
CDouble dDiffFreqRemainJoin(fabs((dFreqRemainJoin - join_histogram->GetFreqRemain()).Get()));
if (join_histogram->Buckets() != ulBucketsJoin || (dDiffNDVJoin > CStatistics::Epsilon)
if (join_histogram->GetNumBuckets() != ulBucketsJoin || (dDiffNDVJoin > CStatistics::Epsilon)
|| (dDiffNDVRemainJoin > CStatistics::Epsilon) || (dDiffFreqRemainJoin > CStatistics::Epsilon))
{
eres = GPOS_FAILED;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册