提交 c6355348 编写于 作者: H Heikki Linnakangas

Set locus correctly on Append node, if there are General locus children.

I found the logic to decide the target locus hard to understand, so I
rewrote it in a table-driven approach. I hope it's not just me.

Fixes github issue https://github.com/greenplum-db/gpdb/issues/8711Reviewed-by: NZhenghua Lyu <zlv@pivotal.io>
上级 99dfccc2
......@@ -1366,8 +1366,8 @@ set_append_path_locus(PlannerInfo *root, Path *pathnode, RelOptInfo *rel,
List *pathkeys)
{
ListCell *l;
bool fIsNotPartitioned = false;
bool fIsPartitionInEntry = false;
CdbLocusType targetlocustype;
CdbPathLocus targetlocus;
int numsegments;
List *subpaths;
List **subpaths_out;
......@@ -1390,144 +1390,206 @@ set_append_path_locus(PlannerInfo *root, Path *pathnode, RelOptInfo *rel,
return;
}
/* By default put Append node on all the segments */
numsegments = getgpsegmentCount();
/*
* Do a first pass over the children to determine what locus the result
* should have, based on the loci of the children.
*
* We only determine the target locus type here, the number of segments is
* figured out later. We treat also all partitioned types the same for now,
* using Strewn to represent them all, and figure out later if we can mark
* it hashed, or if have to leave it strewn.
*/
static const struct
{
CdbLocusType a;
CdbLocusType b;
CdbLocusType result;
} append_locus_compatibility_table[] =
{
/*
* If any of the children have 'entry' locus, bring all the subpaths
* to the entry db.
*/
{ CdbLocusType_Entry, CdbLocusType_Entry, CdbLocusType_Entry },
{ CdbLocusType_Entry, CdbLocusType_SingleQE, CdbLocusType_Entry },
{ CdbLocusType_Entry, CdbLocusType_Strewn, CdbLocusType_Entry },
{ CdbLocusType_Entry, CdbLocusType_Replicated, CdbLocusType_Entry },
{ CdbLocusType_Entry, CdbLocusType_SegmentGeneral, CdbLocusType_Entry },
{ CdbLocusType_Entry, CdbLocusType_General, CdbLocusType_Entry },
/* similarly, if there are single QE children, bring everything to a single QE */
{ CdbLocusType_SingleQE, CdbLocusType_SingleQE, CdbLocusType_SingleQE },
{ CdbLocusType_SingleQE, CdbLocusType_Strewn, CdbLocusType_SingleQE },
{ CdbLocusType_SingleQE, CdbLocusType_Replicated, CdbLocusType_SingleQE },
{ CdbLocusType_SingleQE, CdbLocusType_SegmentGeneral, CdbLocusType_SingleQE },
{ CdbLocusType_SingleQE, CdbLocusType_General, CdbLocusType_SingleQE },
/*
* If everything is partitioned, then the result can be partitioned, too.
* But if it's a mix of partitioned and replicated, then we have to bring
* everything to a single QE. Otherwise, the replicated (or general) children
* will contribute rows on every QE. XXX: it would be nice to force the child
* to be executed on a single QE, but I couldn't figure out how to do that.
* A motion from General to SingleQE is not possible.
*/
{ CdbLocusType_Strewn, CdbLocusType_Strewn, CdbLocusType_Strewn },
{ CdbLocusType_Strewn, CdbLocusType_Replicated, CdbLocusType_SingleQE },
{ CdbLocusType_Strewn, CdbLocusType_SegmentGeneral, CdbLocusType_SingleQE },
{ CdbLocusType_Strewn, CdbLocusType_General, CdbLocusType_SingleQE },
{ CdbLocusType_Replicated, CdbLocusType_Replicated, CdbLocusType_Replicated },
{ CdbLocusType_Replicated, CdbLocusType_SegmentGeneral, CdbLocusType_Replicated },
{ CdbLocusType_Replicated, CdbLocusType_General, CdbLocusType_Replicated },
{ CdbLocusType_SegmentGeneral, CdbLocusType_SegmentGeneral, CdbLocusType_SegmentGeneral },
{ CdbLocusType_SegmentGeneral, CdbLocusType_General, CdbLocusType_SegmentGeneral },
{ CdbLocusType_General, CdbLocusType_General, CdbLocusType_General },
};
targetlocustype = CdbLocusType_General;
foreach(l, subpaths)
{
Path *subpath = (Path *) lfirst(l);
CdbLocusType subtype;
int i;
if (CdbPathLocus_IsPartitioned(subpath->locus))
subtype = CdbLocusType_Strewn;
else
subtype = subpath->locus.locustype;
/* If any subplan is SingleQE, align Append numsegments with it */
if (CdbPathLocus_IsSingleQE(subpath->locus))
if (l == list_head(subpaths))
{
targetlocustype = subtype;
continue;
}
for (i = 0; i < lengthof(append_locus_compatibility_table); i++)
{
/* When there are multiple SingleQE, use the common segments */
numsegments = Min(numsegments,
CdbPathLocus_NumSegments(subpath->locus));
if ((append_locus_compatibility_table[i].a == targetlocustype &&
append_locus_compatibility_table[i].b == subtype) ||
(append_locus_compatibility_table[i].a == subtype &&
append_locus_compatibility_table[i].b == targetlocustype))
{
targetlocustype = append_locus_compatibility_table[i].result;
break;
}
}
if (i == lengthof(append_locus_compatibility_table))
elog(ERROR, "could not determine target locus for Append");
}
/*
* Do a first pass over the children to determine if there's any child
* which is not partitioned, i.e. is a bottleneck or replicated.
* Now compute the 'numsegments', and the hash keys if it's a partitioned
* type.
*/
foreach(l, subpaths)
if (targetlocustype == CdbLocusType_Entry)
{
Path *subpath = (Path *) lfirst(l);
/* If one of subplan is segment general, gather others to single QE */
if (CdbPathLocus_IsBottleneck(subpath->locus) ||
CdbPathLocus_IsSegmentGeneral(subpath->locus) ||
CdbPathLocus_IsReplicated(subpath->locus))
/* nothing more to do */
CdbPathLocus_MakeEntry(&targetlocus);
}
else if (targetlocustype == CdbLocusType_SingleQE ||
targetlocustype == CdbLocusType_Replicated ||
targetlocustype == CdbLocusType_SegmentGeneral ||
targetlocustype == CdbLocusType_General)
{
/* By default put Append node on all the segments */
numsegments = getgpsegmentCount();
foreach(l, subpaths)
{
fIsNotPartitioned = true;
Path *subpath = (Path *) lfirst(l);
/* check whether any partition is on entry db */
if (CdbPathLocus_IsEntry(subpath->locus))
/*
* Align numsegments to be the common segments among the children.
* Partitioned children will need to be motioned, so ignore them.
*/
if (!CdbPathLocus_IsPartitioned(subpath->locus))
{
fIsPartitionInEntry = true;
break;
/* When there are multiple SingleQE, use the common segments */
numsegments = Min(numsegments,
CdbPathLocus_NumSegments(subpath->locus));
}
}
CdbPathLocus_MakeSimple(&targetlocus, targetlocustype, numsegments);
}
new_subpaths = NIL;
foreach(l, subpaths)
else if (targetlocustype == CdbLocusType_Strewn)
{
Path *subpath = (Path *) lfirst(l);
CdbPathLocus projectedlocus;
bool isfirst = true;
/*
* In case any of the children is not partitioned convert all
* children to have singleQE locus
*/
if (fIsNotPartitioned)
/* By default put Append node on all the segments */
numsegments = getgpsegmentCount();
CdbPathLocus_MakeNull(&targetlocus, 0);
foreach(l, subpaths)
{
Path *subpath = (Path *) lfirst(l);
CdbPathLocus projectedlocus;
Assert(CdbPathLocus_IsPartitioned(subpath->locus));
/* Transform subpath locus into the appendrel's space for comparison. */
if (subpath->parent == rel ||
subpath->parent->reloptkind != RELOPT_OTHER_MEMBER_REL)
projectedlocus = subpath->locus;
else
projectedlocus =
cdbpathlocus_pull_above_projection(root,
subpath->locus,
subpath->parent->relids,
subpath->parent->reltargetlist,
rel->reltargetlist,
rel->relid);
/*
* if any partition is on entry db, we should gather all the
* partitions to QD to do the append
* CDB: If all the scans are distributed alike, set
* the result locus to match. Otherwise, if all are partitioned,
* set it to strewn. A mixture of partitioned and non-partitioned
* scans should not occur after above correction;
*
* CDB TODO: When the scans are not all partitioned alike, and the
* result is joined with another rel, consider pushing the join
* below the Append so that child tables that are properly
* distributed can be joined in place.
*/
if (fIsPartitionInEntry)
if (isfirst)
{
if (!CdbPathLocus_IsEntry(subpath->locus))
{
CdbPathLocus singleEntry;
CdbPathLocus_MakeEntry(&singleEntry);
subpath = cdbpath_create_motion_path(root, subpath, subpath->pathkeys, false, singleEntry);
}
targetlocus = projectedlocus;
isfirst = false;
}
else /* fIsNotPartitioned true, fIsPartitionInEntry false */
else if (cdbpathlocus_equal(targetlocus, projectedlocus))
{
if (!CdbPathLocus_IsSingleQE(subpath->locus))
{
CdbPathLocus singleQE;
/* Gather to SingleQE */
CdbPathLocus_MakeSingleQE(&singleQE, numsegments);
subpath = cdbpath_create_motion_path(root, subpath, subpath->pathkeys, false, singleQE);
}
else
{
/* Align all SingleQE to the common segments */
subpath->locus.numsegments = numsegments;
}
/* compatible */
}
else
{
/*
* subpaths have different distributed policy, mark it as random
* distributed and set the numsegments to the maximum of all
* subpaths to not missing any tuples.
*/
CdbPathLocus_MakeStrewn(&targetlocus,
Max(CdbPathLocus_NumSegments(targetlocus),
CdbPathLocus_NumSegments(projectedlocus)));
break;
}
}
}
else
elog(ERROR, "unexpected Append target locus type");
/* Transform subpath locus into the appendrel's space for comparison. */
if (subpath->parent == rel ||
subpath->parent->reloptkind != RELOPT_OTHER_MEMBER_REL)
projectedlocus = subpath->locus;
else
projectedlocus =
cdbpathlocus_pull_above_projection(root,
subpath->locus,
subpath->parent->relids,
subpath->parent->reltargetlist,
rel->reltargetlist,
rel->relid);
/* Ok, we now know the target locus. Add Motions to any subpaths that need it */
new_subpaths = NIL;
foreach(l, subpaths)
{
Path *subpath = (Path *) lfirst(l);
/*
* CDB: If all the scans are distributed alike, set
* the result locus to match. Otherwise, if all are partitioned,
* set it to strewn. A mixture of partitioned and non-partitioned
* scans should not occur after above correction;
*
* CDB TODO: When the scans are not all partitioned alike, and the
* result is joined with another rel, consider pushing the join
* below the Append so that child tables that are properly
* distributed can be joined in place.
*/
if (l == list_head(subpaths))
pathnode->locus = projectedlocus;
else if (cdbpathlocus_equal(pathnode->locus, projectedlocus))
if (CdbPathLocus_IsPartitioned(targetlocus))
{
/* compatible */
/* we already determined that all the loci are compatible */
Assert(CdbPathLocus_IsPartitioned(subpath->locus));
}
else if (CdbPathLocus_IsGeneral(pathnode->locus))
{
/* compatible */
pathnode->locus = projectedlocus;
}
else if (CdbPathLocus_IsGeneral(projectedlocus))
else
{
/* compatible */
subpath = cdbpath_create_motion_path(root, subpath, subpath->pathkeys, false, targetlocus);
}
else if (CdbPathLocus_IsPartitioned(pathnode->locus) &&
CdbPathLocus_IsPartitioned(projectedlocus))
{
/*
* subpaths have different distributed policy, mark it as random
* distributed and set the numsegments to the maximum of all
* subpaths to not missing any tuples.
*/
CdbPathLocus_MakeStrewn(&pathnode->locus,
Max(CdbPathLocus_NumSegments(pathnode->locus),
CdbPathLocus_NumSegments(projectedlocus)));
}
else
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg_internal("cannot append paths with incompatible distribution")));
pathnode->sameslice_relids = bms_union(pathnode->sameslice_relids, subpath->sameslice_relids);
......@@ -1539,6 +1601,7 @@ set_append_path_locus(PlannerInfo *root, Path *pathnode, RelOptInfo *rel,
new_subpaths = lappend(new_subpaths, subpath);
}
pathnode->locus = targetlocus;
*subpaths_out = new_subpaths;
}
......
......@@ -1712,6 +1712,38 @@ reset optimizer_segments;
-- @description union_update_test31: Negative Tests more than one row returned by a sub-query used as an expression
UPDATE dml_union_r SET b = ( SELECT a FROM dml_union_r EXCEPT ALL SELECT a FROM dml_union_s);
ERROR: more than one row returned by a subquery used as an expression
--
-- Test mixing a set-returning function, which can be evaluated anywhere,
-- (it has General locus) and a diststributed table, in an Append.
--
explain (costs off)
select a from dml_union_r where a > 95
union all
select g from generate_series(1,2) g;
QUERY PLAN
------------------------------------------------
Append
-> Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on dml_union_r
Filter: (a > 95)
-> Function Scan on generate_series g
Optimizer: Postgres query optimizer
(6 rows)
select a from dml_union_r where a > 95
union all
select g from generate_series(1,2) g;
a
-----
96
100
97
99
98
1
2
(7 rows)
--
-- Test for creation of MergeAppend paths.
--
......
......@@ -1730,6 +1730,40 @@ reset optimizer_segments;
UPDATE dml_union_r SET b = ( SELECT a FROM dml_union_r EXCEPT ALL SELECT a FROM dml_union_s);
ERROR: one or more assertions failed (seg1 antova-mbp.local:40011 pid=42293)
DETAIL: Expected no more than one row to be returned by expression
--
-- Test mixing a set-returning function, which can be evaluated anywhere,
-- (it has General locus) and a diststributed table, in an Append.
--
explain (costs off)
select a from dml_union_r where a > 95
union all
select g from generate_series(1,2) g;
QUERY PLAN
-------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Append
-> Seq Scan on dml_union_r
Filter: (a > 95)
-> Result
One-Time Filter: (gp_execution_segment() = 0)
-> Function Scan on generate_series
Optimizer: Pivotal Optimizer (GPORCA) version 3.72.0
(8 rows)
select a from dml_union_r where a > 95
union all
select g from generate_series(1,2) g;
a
-----
98
1
2
96
100
97
99
(7 rows)
--
-- Test for creation of MergeAppend paths.
--
......
......@@ -641,6 +641,18 @@ reset optimizer_segments;
-- @description union_update_test31: Negative Tests more than one row returned by a sub-query used as an expression
UPDATE dml_union_r SET b = ( SELECT a FROM dml_union_r EXCEPT ALL SELECT a FROM dml_union_s);
--
-- Test mixing a set-returning function, which can be evaluated anywhere,
-- (it has General locus) and a diststributed table, in an Append.
--
explain (costs off)
select a from dml_union_r where a > 95
union all
select g from generate_series(1,2) g;
select a from dml_union_r where a > 95
union all
select g from generate_series(1,2) g;
--
-- Test for creation of MergeAppend paths.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册