提交 2ef4bdb2 编写于 作者: A Ashuka Xue 提交者: Ashuka Xue

[Refactor] Update MakeStatsFilter, Rename CreateHistMashMapAfterMergingDisjPreds ->

MergeHistogramMapsforDisjPreds

This commit refactors MakeStatsFilter to use
MakeHistHashMapConjOrDisjFilter instead of individually calling
MakeHistHashMapConj and MakeHistHashMapDisj.

This commit also modifies MergeHistogramMapsForDisjPreds to avoid copy
and creating unnecessary histogram buckets.
上级 8cbfb7d1
......@@ -208,8 +208,8 @@ namespace gpnaucrates
CBucket *bucket_other,
CDouble rows,
CDouble rows_other,
CBucket **result_bucket1_new,
CBucket **result_bucket2_new,
CBucket **bucket1_new,
CBucket **bucket2_new,
BOOL is_union_all = true
);
......
......@@ -275,14 +275,14 @@ namespace gpnaucrates
// create a new hash map of histograms after merging
// the histograms generated by the child of disjunctive predicate
static
UlongToHistogramMap *CreateHistHashMapAfterMergingDisjPreds
UlongToHistogramMap *MergeHistogramMapsForDisjPreds
(
CMemoryPool *mp,
CBitSet *non_updatable_cols,
UlongToHistogramMap *prev_histogram_map,
UlongToHistogramMap *disj_preds_histogram_map,
CDouble cummulative_rows,
CDouble num_rows_disj_child
UlongToHistogramMap *hmap1,
UlongToHistogramMap *hmap2,
CDouble rows1,
CDouble rows2
);
// helper method to copy the hash map of histograms
......
......@@ -1062,14 +1062,14 @@ CBucket::MakeBucketMerged
CBucket *bucket_other,
CDouble rows,
CDouble rows_other,
CBucket **result_bucket_new1,
CBucket **result_bucket_new2,
CBucket **bucket_new1,
CBucket **bucket_new2,
BOOL is_union_all
)
{
// we shouldn't be overwriting anything important
GPOS_ASSERT(NULL == *result_bucket_new1);
GPOS_ASSERT(NULL == *result_bucket_new2);
GPOS_ASSERT(NULL == *bucket_new1);
GPOS_ASSERT(NULL == *bucket_new2);
CPoint *result_lower_new = CPoint::MinPoint(this->GetLowerBound(), bucket_other->GetLowerBound());
CPoint *result_upper_new = CPoint::MinPoint(this->GetUpperBound(), bucket_other->GetUpperBound());
......@@ -1092,17 +1092,20 @@ CBucket::MakeBucketMerged
if (result_upper_new->IsLessThan(this->GetUpperBound()))
{
*result_bucket_new1 = this->MakeBucketScaleLower(mp, result_upper_new, !is_upper_closed);
// e.g [1, 150) + [50, 100) -> [100, 150)
*bucket_new1 = this->MakeBucketScaleLower(mp, result_upper_new, !is_upper_closed);
}
if (result_upper_new->IsLessThan(bucket_other->GetUpperBound()))
else if (result_upper_new->IsLessThan(bucket_other->GetUpperBound()))
{
*result_bucket_new2 = bucket_other->MakeBucketScaleLower(mp, result_upper_new, !is_upper_closed);
// e.g [1, 100) + [50, 150) -> [100, 150)
*bucket_new2 = bucket_other->MakeBucketScaleLower(mp, result_upper_new, !is_upper_closed);
}
result_lower_new->AddRef();
result_upper_new->AddRef();
// TODO: is is_lower_closed = true always? E.g (1, 150) + (50, 100)
return GPOS_NEW(mp) CBucket(result_lower_new, result_upper_new, true /* is_lower_closed */, is_upper_closed, frequency, distinct);
}
......
......@@ -234,35 +234,16 @@ CFilterStatsProcessor::MakeStatsFilter
}
else
{
if (CStatsPred::EsptDisj == base_pred_stats->GetPredStatsType())
{
CStatsPredDisj *pred_stats = CStatsPredDisj::ConvertPredStats(base_pred_stats);
histograms_new = MakeHistHashMapDisjFilter
(
mp,
stats_config,
histograms_copy,
input_rows,
pred_stats,
&scale_factor
);
}
else
{
GPOS_ASSERT(CStatsPred::EsptConj == base_pred_stats->GetPredStatsType());
CStatsPredConj *pred_stats = CStatsPredConj::ConvertPredStats(base_pred_stats);
num_predicates = pred_stats->GetNumPreds();
histograms_new = MakeHistHashMapConjFilter
histograms_new = MakeHistHashMapConjOrDisjFilter
(
mp,
stats_config,
histograms_copy,
input_rows,
pred_stats,
base_pred_stats,
&scale_factor
);
}
GPOS_ASSERT(CStatistics::MinRows.Get() <= scale_factor.Get());
rows_filter = input_rows / scale_factor;
rows_filter = std::max(CStatistics::MinRows.Get(), rows_filter.Get());
......@@ -649,7 +630,7 @@ CFilterStatsProcessor::MakeHistHashMapDisjFilter
GPOS_ASSERT(NULL == disjunctive_child_col_histogram);
CDouble current_rows_estimate = input_rows / CScaleFactorUtils::CalcScaleFactorCumulativeDisj(stats_config, scale_factors, input_rows);
UlongToHistogramMap *merged_histograms = CStatisticsUtils::CreateHistHashMapAfterMergingDisjPreds
UlongToHistogramMap *merged_histograms = CStatisticsUtils::MergeHistogramMapsForDisjPreds
(
mp,
non_updatable_cols,
......
......@@ -922,102 +922,74 @@ CStatisticsUtils::PrintHistogramMap
//
//---------------------------------------------------------------------------
UlongToHistogramMap *
CStatisticsUtils::CreateHistHashMapAfterMergingDisjPreds
CStatisticsUtils::MergeHistogramMapsForDisjPreds
(
CMemoryPool *mp,
CBitSet *non_updatable_cols,
UlongToHistogramMap *col_histogram_mapping,
UlongToHistogramMap *disj_preds_histogram_map,
CDouble cumulative_rows,
CDouble num_rows_disj_child
UlongToHistogramMap *hmap1,
UlongToHistogramMap *hmap2,
CDouble rows1,
CDouble rows2
)
{
GPOS_ASSERT(NULL != non_updatable_cols);
GPOS_ASSERT(NULL != col_histogram_mapping);
GPOS_ASSERT(NULL != disj_preds_histogram_map);
GPOS_ASSERT(NULL != hmap1);
GPOS_ASSERT(NULL != hmap2);
BOOL is_empty = (CStatistics::Epsilon >= num_rows_disj_child);
CDouble output_rows(CStatistics::MinRows.Get());
UlongToHistogramMap *merged_histogram = GPOS_NEW(mp) UlongToHistogramMap(mp);
UlongToHistogramMap *merged_hmap = GPOS_NEW(mp) UlongToHistogramMap(mp);
// iterate over the new hash map of histograms and only add
// histograms of columns whose output statistics can be updated
UlongToHistogramMapIter disj_hist_iter(disj_preds_histogram_map);
while (disj_hist_iter.Advance())
if (rows2 > CStatistics::Epsilon)
{
ULONG disj_child_colid = *(disj_hist_iter.Key());
const CHistogram *disj_child_histogram = disj_hist_iter.Value();
if (!non_updatable_cols->Get(disj_child_colid))
UlongToHistogramMapIter hmap2_iter(hmap2);
while (hmap2_iter.Advance())
{
if (!is_empty)
ULONG colid = *(hmap2_iter.Key());
const CHistogram *histogram = hmap2_iter.Value();
if (!non_updatable_cols->Get(colid))
{
AddHistogram(mp, disj_child_colid, disj_child_histogram, merged_histogram);
}
else
{
// add a dummy statistics object since the estimated number of rows for
// disjunction child is "0"
merged_histogram->Insert
(
GPOS_NEW(mp) ULONG(disj_child_colid),
GPOS_NEW(mp) CHistogram(mp, false /* is_well_defined */)
);
AddHistogram(mp, colid, histogram, merged_hmap);
}
GPOS_CHECK_ABORT;
}
GPOS_CHECK_ABORT;
}
// iterate over the previously generated histograms and
// union them with newly created hash map of histograms (if these columns are updatable)
UlongToHistogramMapIter col_hist_mapping_iter(col_histogram_mapping);
while (col_hist_mapping_iter.Advance())
if (rows1 > CStatistics::Epsilon)
{
ULONG colid = *(col_hist_mapping_iter.Key());
const CHistogram *histogram = col_hist_mapping_iter.Value();
if (NULL != histogram && !non_updatable_cols->Get(colid))
UlongToHistogramMapIter hmap1_iter(hmap1);
while (hmap1_iter.Advance())
{
if (is_empty)
ULONG colid = *(hmap1_iter.Key());
const CHistogram *histogram1 = hmap1_iter.Value();
if (NULL != histogram1 && !non_updatable_cols->Get(colid))
{
// since the estimated output of the disjunction child is "0" tuples
// no point merging histograms.
AddHistogram
(
mp,
colid,
histogram,
merged_histogram,
true /* replace_old */
);
}
else
{
const CHistogram *disj_child_histogram = disj_preds_histogram_map->Find(&colid);
CHistogram *normalized_union_histogram = histogram->MakeUnionHistogramNormalize
(
cumulative_rows,
disj_child_histogram,
num_rows_disj_child,
&output_rows
);
AddHistogram
(
mp,
colid,
normalized_union_histogram,
merged_histogram,
true /* fReplaceOld */
);
GPOS_DELETE(normalized_union_histogram);
}
// merge with viable histograms that were added to merged_hmap
const CHistogram *histogram2 = merged_hmap->Find(&colid);
if (NULL != histogram2)
{
CHistogram *normalized_union_histogram =
histogram1->MakeUnionHistogramNormalize(rows1, histogram2, rows2, &output_rows);
GPOS_CHECK_ABORT;
AddHistogram(mp, colid, normalized_union_histogram, merged_hmap, true /* fReplaceOld */);
GPOS_DELETE(normalized_union_histogram);
}
else
{
AddHistogram(mp, colid, histogram1, merged_hmap);
}
GPOS_CHECK_ABORT;
}
}
}
return merged_histogram;
return merged_hmap;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册