diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index a306d2d6e4cb0ac3831486209c5e220f2c32cade..7e978d7869264a42a4a907f5a51791d9d3d0ddfd 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1761,15 +1761,16 @@ set_append_path_locus(PlannerInfo *root, Path *pathnode, RelOptInfo *rel, /* * If everything is partitioned, then the result can be partitioned, too. * But if it's a mix of partitioned and replicated, then we have to bring - * everything to a single QE. Otherwise, the replicated (or general) children - * will contribute rows on every QE. XXX: it would be nice to force the child - * to be executed on a single QE, but I couldn't figure out how to do that. - * A motion from General to SingleQE is not possible. + * everything to a single QE. Otherwise, the replicated children + * will contribute rows on every QE. + * If it's a mix of partitioned and general, we still consider the + * result as partitioned. But the general part will be restricted to + * only produce rows on a single QE. */ { CdbLocusType_Strewn, CdbLocusType_Strewn, CdbLocusType_Strewn }, { CdbLocusType_Strewn, CdbLocusType_Replicated, CdbLocusType_SingleQE }, - { CdbLocusType_Strewn, CdbLocusType_SegmentGeneral, CdbLocusType_SingleQE }, - { CdbLocusType_Strewn, CdbLocusType_General, CdbLocusType_SingleQE }, + { CdbLocusType_Strewn, CdbLocusType_SegmentGeneral, CdbLocusType_Strewn }, + { CdbLocusType_Strewn, CdbLocusType_General, CdbLocusType_Strewn }, { CdbLocusType_Replicated, CdbLocusType_Replicated, CdbLocusType_Replicated }, { CdbLocusType_Replicated, CdbLocusType_SegmentGeneral, CdbLocusType_Replicated }, @@ -1867,20 +1868,28 @@ set_append_path_locus(PlannerInfo *root, Path *pathnode, RelOptInfo *rel, Path *subpath = (Path *) lfirst(l); CdbPathLocus projectedlocus; - Assert(CdbPathLocus_IsPartitioned(subpath->locus)); - - /* Transform subpath locus into the appendrel's space for comparison. */ - if (subpath->parent == rel || - subpath->parent->reloptkind != RELOPT_OTHER_MEMBER_REL) - projectedlocus = subpath->locus; + if (CdbPathLocus_IsGeneral(subpath->locus) || + CdbPathLocus_IsSegmentGeneral(subpath->locus)) + { + /* Afterwards, General/SegmentGeneral will be projected as Strewn */ + CdbPathLocus_MakeStrewn(&projectedlocus, numsegments); + } else - projectedlocus = - cdbpathlocus_pull_above_projection(root, - subpath->locus, - subpath->parent->relids, - subpath->parent->reltarget->exprs, - rel->reltarget->exprs, - rel->relid); + { + Assert(CdbPathLocus_IsPartitioned(subpath->locus)); + /* Transform subpath locus into the appendrel's space for comparison. */ + if (subpath->parent == rel || + subpath->parent->reloptkind != RELOPT_OTHER_MEMBER_REL) + projectedlocus = subpath->locus; + else + projectedlocus = + cdbpathlocus_pull_above_projection(root, + subpath->locus, + subpath->parent->relids, + subpath->parent->reltarget->exprs, + rel->reltarget->exprs, + rel->relid); + } /* * CDB: If all the scans are distributed alike, set @@ -1919,7 +1928,7 @@ set_append_path_locus(PlannerInfo *root, Path *pathnode, RelOptInfo *rel, else elog(ERROR, "unexpected Append target locus type"); - /* Ok, we now know the target locus. Add Motions to any subpaths that need it */ + /* Ok, we now know the target locus. Add Motions/Projections to any subpaths that need it */ new_subpaths = NIL; foreach(l, subpaths) { @@ -1927,6 +1936,37 @@ set_append_path_locus(PlannerInfo *root, Path *pathnode, RelOptInfo *rel, if (CdbPathLocus_IsPartitioned(targetlocus)) { + if (CdbPathLocus_IsGeneral(subpath->locus) || + CdbPathLocus_IsSegmentGeneral(subpath->locus)) + { + /* + * If a General/SegmentGeneral is mixed with other Strewn's, + * add a projection path with cdb_restrict_clauses, so that only + * a single QE will actually produce rows. + */ + if (CdbPathLocus_IsGeneral(subpath->locus)) + numsegments = targetlocus.numsegments; + else + numsegments = subpath->locus.numsegments; + RestrictInfo *restrict_info = + make_restrictinfo((Expr *) makeSegmentFilterExpr( + gp_session_id % numsegments), + false, + false, + true, + NULL, + NULL, + NULL); + subpath = (Path *) create_projection_path_with_quals( + root, + subpath->parent, + subpath, + subpath->pathtarget, + list_make1(restrict_info)); + CdbPathLocus_MakeStrewn(&(subpath->locus), + numsegments); + } + /* we already determined that all the loci are compatible */ Assert(CdbPathLocus_IsPartitioned(subpath->locus)); } diff --git a/src/test/regress/expected/qp_union_intersect.out b/src/test/regress/expected/qp_union_intersect.out index 4158651fe1327e86228d8401d8fad8cd7d786166..0e292bbf77b317fb76236346555bd54efbd5a06d 100644 --- a/src/test/regress/expected/qp_union_intersect.out +++ b/src/test/regress/expected/qp_union_intersect.out @@ -1720,15 +1720,17 @@ explain (costs off) select a from dml_union_r where a > 95 union all select g from generate_series(1,2) g; - QUERY PLAN ------------------------------------------------- - Append - -> Gather Motion 3:1 (slice1; segments: 3) + QUERY PLAN +------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Append -> Seq Scan on dml_union_r Filter: (a > 95) - -> Function Scan on generate_series g + -> Result + One-Time Filter: (gp_execution_segment() = 0) + -> Function Scan on generate_series g Optimizer: Postgres query optimizer -(6 rows) +(8 rows) select a from dml_union_r where a > 95 union all @@ -1744,6 +1746,118 @@ select g from generate_series(1,2) g; 2 (7 rows) +explain (costs off) +select sum(a) from ( + select a from dml_union_r where a > 95 + union all + select g from generate_series(1,2) g +) t; + QUERY PLAN +------------------------------------------------------------------------- + Finalize Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate + -> Append + -> Seq Scan on dml_union_r + Filter: (a > 95) + -> Result + One-Time Filter: (gp_execution_segment() = 0) + -> Function Scan on generate_series g + Optimizer: Postgres query optimizer +(10 rows) + +select sum(a) from ( + select a from dml_union_r where a > 95 + union all + select g from generate_series(1,2) g +) t; + sum +----- + 493 +(1 row) + +-- +-- Continue to test appending General to distributed table. +-- This time, the General is a dummy path, produced by pushing down condition. +-- (Only for planner, orca does not create dummy path here) +-- +create table t_test_append_hash(a int, b int, c int) distributed by (a); +insert into t_test_append_hash select i, i+1, i+2 from generate_series(1, 5)i; +explain (costs off) +with t(a, b, s) as ( + select a, b, sum(c) from t_test_append_hash where a > b group by a, b + union all + select a, b, sum(c) from t_test_append_hash where a < b group by a, b +) select * from t where t.a < t.b; + QUERY PLAN +-------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Append + -> Result + One-Time Filter: (gp_execution_segment() = 0) + -> HashAggregate + Group Key: a, b + -> Result + One-Time Filter: false + -> GroupAggregate + Group Key: t_test_append_hash.a, t_test_append_hash.b + -> Sort + Sort Key: t_test_append_hash.a, t_test_append_hash.b + -> Seq Scan on t_test_append_hash + Filter: ((a < b) AND (a < b)) + Optimizer: Postgres query optimizer +(15 rows) + +with t(a, b, s) as ( + select a, b, sum(c) from t_test_append_hash where a > b group by a, b + union all + select a, b, sum(c) from t_test_append_hash where a < b group by a, b +) select * from t where t.a < t.b; + a | b | s +---+---+--- + 2 | 3 | 4 + 3 | 4 | 5 + 4 | 5 | 6 + 1 | 2 | 3 + 5 | 6 | 7 +(5 rows) + +-- Test mixing a SegmentGeneral with distributed table. +create table t_test_append_rep(a int, b int, c int) distributed replicated; +insert into t_test_append_rep select i, i+1, i+2 from generate_series(5, 10)i; +explain (costs off) +select * from t_test_append_rep +union all +select * from t_test_append_hash; + QUERY PLAN +------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Append + -> Result + One-Time Filter: (gp_execution_segment() = 0) + -> Seq Scan on t_test_append_rep + -> Seq Scan on t_test_append_hash + Optimizer: Postgres query optimizer +(7 rows) + +select * from t_test_append_rep +union all +select * from t_test_append_hash; + a | b | c +----+----+---- + 5 | 6 | 7 + 1 | 2 | 3 + 5 | 6 | 7 + 6 | 7 | 8 + 7 | 8 | 9 + 8 | 9 | 10 + 9 | 10 | 11 + 10 | 11 | 12 + 2 | 3 | 4 + 3 | 4 | 5 + 4 | 5 | 6 +(11 rows) + -- -- Test for creation of MergeAppend paths. -- diff --git a/src/test/regress/expected/qp_union_intersect_optimizer.out b/src/test/regress/expected/qp_union_intersect_optimizer.out index ea36d5b946b2f04d0f4ab99d2069bb3b37c61060..91a47f1d7e2babe4dc3a14bacfdc45bdd24b4487 100644 --- a/src/test/regress/expected/qp_union_intersect_optimizer.out +++ b/src/test/regress/expected/qp_union_intersect_optimizer.out @@ -1764,6 +1764,117 @@ select g from generate_series(1,2) g; 99 (7 rows) +explain (costs off) +select sum(a) from ( + select a from dml_union_r where a > 95 + union all + select g from generate_series(1,2) g +) t; + QUERY PLAN +------------------------------------------------------------------------- + Finalize Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate + -> Append + -> Seq Scan on dml_union_r + Filter: (a > 95) + -> Result + One-Time Filter: (gp_execution_segment() = 2) + -> Function Scan on generate_series + Optimizer: Pivotal Optimizer (GPORCA) version 3.93.0 +(10 rows) + +select sum(a) from ( + select a from dml_union_r where a > 95 + union all + select g from generate_series(1,2) g +) t; + sum +----- + 493 +(1 row) + +-- +-- Continue to test appending General to distributed table. +-- This time, the General is a dummy path, produced by pushing down condition. +-- (Only for planner, orca does not create dummy path here) +-- +create table t_test_append_hash(a int, b int, c int) distributed by (a); +insert into t_test_append_hash select i, i+1, i+2 from generate_series(1, 5)i; +explain (costs off) +with t(a, b, s) as ( + select a, b, sum(c) from t_test_append_hash where a > b group by a, b + union all + select a, b, sum(c) from t_test_append_hash where a < b group by a, b +) select * from t where t.a < t.b; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Append + -> GroupAggregate + Group Key: t_test_append_hash.a, t_test_append_hash.b + -> Sort + Sort Key: t_test_append_hash.a, t_test_append_hash.b + -> Seq Scan on t_test_append_hash + Filter: ((a > b) AND (a < b)) + -> GroupAggregate + Group Key: t_test_append_hash_1.a, t_test_append_hash_1.b + -> Sort + Sort Key: t_test_append_hash_1.a, t_test_append_hash_1.b + -> Seq Scan on t_test_append_hash t_test_append_hash_1 + Filter: ((a < b) AND (a < b)) + Optimizer: Pivotal Optimizer (GPORCA) version 3.93.0 +(15 rows) + +with t(a, b, s) as ( + select a, b, sum(c) from t_test_append_hash where a > b group by a, b + union all + select a, b, sum(c) from t_test_append_hash where a < b group by a, b +) select * from t where t.a < t.b; + a | b | s +---+---+--- + 2 | 3 | 4 + 3 | 4 | 5 + 4 | 5 | 6 + 5 | 6 | 7 + 1 | 2 | 3 +(5 rows) + +-- Test mixing a SegmentGeneral with distributed table. +create table t_test_append_rep(a int, b int, c int) distributed replicated; +insert into t_test_append_rep select i, i+1, i+2 from generate_series(5, 10)i; +explain (costs off) +select * from t_test_append_rep +union all +select * from t_test_append_hash; + QUERY PLAN +--------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Append + -> Seq Scan on t_test_append_rep + -> Broadcast Motion 3:1 (slice2; segments: 3) + -> Seq Scan on t_test_append_hash + Optimizer: Pivotal Optimizer (GPORCA) version 3.93.0 +(6 rows) + +select * from t_test_append_rep +union all +select * from t_test_append_hash; + a | b | c +----+----+---- + 5 | 6 | 7 + 6 | 7 | 8 + 7 | 8 | 9 + 8 | 9 | 10 + 9 | 10 | 11 + 10 | 11 | 12 + 1 | 2 | 3 + 5 | 6 | 7 + 2 | 3 | 4 + 3 | 4 | 5 + 4 | 5 | 6 +(11 rows) + -- -- Test for creation of MergeAppend paths. -- diff --git a/src/test/regress/sql/qp_union_intersect.sql b/src/test/regress/sql/qp_union_intersect.sql index 623f34f25a7dd1fc39614c954f74bc583cff5433..109b3b39ff61048c40a419edb8e3418fecd0b6a2 100644 --- a/src/test/regress/sql/qp_union_intersect.sql +++ b/src/test/regress/sql/qp_union_intersect.sql @@ -654,6 +654,53 @@ select a from dml_union_r where a > 95 union all select g from generate_series(1,2) g; +explain (costs off) +select sum(a) from ( + select a from dml_union_r where a > 95 + union all + select g from generate_series(1,2) g +) t; + +select sum(a) from ( + select a from dml_union_r where a > 95 + union all + select g from generate_series(1,2) g +) t; + +-- +-- Continue to test appending General to distributed table. +-- This time, the General is a dummy path, produced by pushing down condition. +-- (Only for planner, orca does not create dummy path here) +-- +create table t_test_append_hash(a int, b int, c int) distributed by (a); +insert into t_test_append_hash select i, i+1, i+2 from generate_series(1, 5)i; + +explain (costs off) +with t(a, b, s) as ( + select a, b, sum(c) from t_test_append_hash where a > b group by a, b + union all + select a, b, sum(c) from t_test_append_hash where a < b group by a, b +) select * from t where t.a < t.b; + +with t(a, b, s) as ( + select a, b, sum(c) from t_test_append_hash where a > b group by a, b + union all + select a, b, sum(c) from t_test_append_hash where a < b group by a, b +) select * from t where t.a < t.b; + +-- Test mixing a SegmentGeneral with distributed table. +create table t_test_append_rep(a int, b int, c int) distributed replicated; +insert into t_test_append_rep select i, i+1, i+2 from generate_series(5, 10)i; + +explain (costs off) +select * from t_test_append_rep +union all +select * from t_test_append_hash; + +select * from t_test_append_rep +union all +select * from t_test_append_hash; + -- -- Test for creation of MergeAppend paths. --