未验证 提交 75c4769a 编写于 作者: P prajnamort 提交者: GitHub

Improve Append plan when General children exist.

When we append General and Partitioned children together, consider
General as Strewn (rather than SingleQE) to postpone gather motion.
Add a special projection path to General, so that only a single QE
will actually produce rows.
上级 67321b1c
......@@ -1761,15 +1761,16 @@ set_append_path_locus(PlannerInfo *root, Path *pathnode, RelOptInfo *rel,
/*
* If everything is partitioned, then the result can be partitioned, too.
* But if it's a mix of partitioned and replicated, then we have to bring
* everything to a single QE. Otherwise, the replicated (or general) children
* will contribute rows on every QE. XXX: it would be nice to force the child
* to be executed on a single QE, but I couldn't figure out how to do that.
* A motion from General to SingleQE is not possible.
* everything to a single QE. Otherwise, the replicated children
* will contribute rows on every QE.
* If it's a mix of partitioned and general, we still consider the
* result as partitioned. But the general part will be restricted to
* only produce rows on a single QE.
*/
{ CdbLocusType_Strewn, CdbLocusType_Strewn, CdbLocusType_Strewn },
{ CdbLocusType_Strewn, CdbLocusType_Replicated, CdbLocusType_SingleQE },
{ CdbLocusType_Strewn, CdbLocusType_SegmentGeneral, CdbLocusType_SingleQE },
{ CdbLocusType_Strewn, CdbLocusType_General, CdbLocusType_SingleQE },
{ CdbLocusType_Strewn, CdbLocusType_SegmentGeneral, CdbLocusType_Strewn },
{ CdbLocusType_Strewn, CdbLocusType_General, CdbLocusType_Strewn },
{ CdbLocusType_Replicated, CdbLocusType_Replicated, CdbLocusType_Replicated },
{ CdbLocusType_Replicated, CdbLocusType_SegmentGeneral, CdbLocusType_Replicated },
......@@ -1867,20 +1868,28 @@ set_append_path_locus(PlannerInfo *root, Path *pathnode, RelOptInfo *rel,
Path *subpath = (Path *) lfirst(l);
CdbPathLocus projectedlocus;
Assert(CdbPathLocus_IsPartitioned(subpath->locus));
/* Transform subpath locus into the appendrel's space for comparison. */
if (subpath->parent == rel ||
subpath->parent->reloptkind != RELOPT_OTHER_MEMBER_REL)
projectedlocus = subpath->locus;
if (CdbPathLocus_IsGeneral(subpath->locus) ||
CdbPathLocus_IsSegmentGeneral(subpath->locus))
{
/* Afterwards, General/SegmentGeneral will be projected as Strewn */
CdbPathLocus_MakeStrewn(&projectedlocus, numsegments);
}
else
projectedlocus =
cdbpathlocus_pull_above_projection(root,
subpath->locus,
subpath->parent->relids,
subpath->parent->reltarget->exprs,
rel->reltarget->exprs,
rel->relid);
{
Assert(CdbPathLocus_IsPartitioned(subpath->locus));
/* Transform subpath locus into the appendrel's space for comparison. */
if (subpath->parent == rel ||
subpath->parent->reloptkind != RELOPT_OTHER_MEMBER_REL)
projectedlocus = subpath->locus;
else
projectedlocus =
cdbpathlocus_pull_above_projection(root,
subpath->locus,
subpath->parent->relids,
subpath->parent->reltarget->exprs,
rel->reltarget->exprs,
rel->relid);
}
/*
* CDB: If all the scans are distributed alike, set
......@@ -1919,7 +1928,7 @@ set_append_path_locus(PlannerInfo *root, Path *pathnode, RelOptInfo *rel,
else
elog(ERROR, "unexpected Append target locus type");
/* Ok, we now know the target locus. Add Motions to any subpaths that need it */
/* Ok, we now know the target locus. Add Motions/Projections to any subpaths that need it */
new_subpaths = NIL;
foreach(l, subpaths)
{
......@@ -1927,6 +1936,37 @@ set_append_path_locus(PlannerInfo *root, Path *pathnode, RelOptInfo *rel,
if (CdbPathLocus_IsPartitioned(targetlocus))
{
if (CdbPathLocus_IsGeneral(subpath->locus) ||
CdbPathLocus_IsSegmentGeneral(subpath->locus))
{
/*
* If a General/SegmentGeneral is mixed with other Strewn's,
* add a projection path with cdb_restrict_clauses, so that only
* a single QE will actually produce rows.
*/
if (CdbPathLocus_IsGeneral(subpath->locus))
numsegments = targetlocus.numsegments;
else
numsegments = subpath->locus.numsegments;
RestrictInfo *restrict_info =
make_restrictinfo((Expr *) makeSegmentFilterExpr(
gp_session_id % numsegments),
false,
false,
true,
NULL,
NULL,
NULL);
subpath = (Path *) create_projection_path_with_quals(
root,
subpath->parent,
subpath,
subpath->pathtarget,
list_make1(restrict_info));
CdbPathLocus_MakeStrewn(&(subpath->locus),
numsegments);
}
/* we already determined that all the loci are compatible */
Assert(CdbPathLocus_IsPartitioned(subpath->locus));
}
......
......@@ -1720,15 +1720,17 @@ explain (costs off)
select a from dml_union_r where a > 95
union all
select g from generate_series(1,2) g;
QUERY PLAN
------------------------------------------------
Append
-> Gather Motion 3:1 (slice1; segments: 3)
QUERY PLAN
-------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Append
-> Seq Scan on dml_union_r
Filter: (a > 95)
-> Function Scan on generate_series g
-> Result
One-Time Filter: (gp_execution_segment() = 0)
-> Function Scan on generate_series g
Optimizer: Postgres query optimizer
(6 rows)
(8 rows)
select a from dml_union_r where a > 95
union all
......@@ -1744,6 +1746,118 @@ select g from generate_series(1,2) g;
2
(7 rows)
explain (costs off)
select sum(a) from (
select a from dml_union_r where a > 95
union all
select g from generate_series(1,2) g
) t;
QUERY PLAN
-------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Append
-> Seq Scan on dml_union_r
Filter: (a > 95)
-> Result
One-Time Filter: (gp_execution_segment() = 0)
-> Function Scan on generate_series g
Optimizer: Postgres query optimizer
(10 rows)
select sum(a) from (
select a from dml_union_r where a > 95
union all
select g from generate_series(1,2) g
) t;
sum
-----
493
(1 row)
--
-- Continue to test appending General to distributed table.
-- This time, the General is a dummy path, produced by pushing down condition.
-- (Only for planner, orca does not create dummy path here)
--
create table t_test_append_hash(a int, b int, c int) distributed by (a);
insert into t_test_append_hash select i, i+1, i+2 from generate_series(1, 5)i;
explain (costs off)
with t(a, b, s) as (
select a, b, sum(c) from t_test_append_hash where a > b group by a, b
union all
select a, b, sum(c) from t_test_append_hash where a < b group by a, b
) select * from t where t.a < t.b;
QUERY PLAN
--------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Append
-> Result
One-Time Filter: (gp_execution_segment() = 0)
-> HashAggregate
Group Key: a, b
-> Result
One-Time Filter: false
-> GroupAggregate
Group Key: t_test_append_hash.a, t_test_append_hash.b
-> Sort
Sort Key: t_test_append_hash.a, t_test_append_hash.b
-> Seq Scan on t_test_append_hash
Filter: ((a < b) AND (a < b))
Optimizer: Postgres query optimizer
(15 rows)
with t(a, b, s) as (
select a, b, sum(c) from t_test_append_hash where a > b group by a, b
union all
select a, b, sum(c) from t_test_append_hash where a < b group by a, b
) select * from t where t.a < t.b;
a | b | s
---+---+---
2 | 3 | 4
3 | 4 | 5
4 | 5 | 6
1 | 2 | 3
5 | 6 | 7
(5 rows)
-- Test mixing a SegmentGeneral with distributed table.
create table t_test_append_rep(a int, b int, c int) distributed replicated;
insert into t_test_append_rep select i, i+1, i+2 from generate_series(5, 10)i;
explain (costs off)
select * from t_test_append_rep
union all
select * from t_test_append_hash;
QUERY PLAN
-------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Append
-> Result
One-Time Filter: (gp_execution_segment() = 0)
-> Seq Scan on t_test_append_rep
-> Seq Scan on t_test_append_hash
Optimizer: Postgres query optimizer
(7 rows)
select * from t_test_append_rep
union all
select * from t_test_append_hash;
a | b | c
----+----+----
5 | 6 | 7
1 | 2 | 3
5 | 6 | 7
6 | 7 | 8
7 | 8 | 9
8 | 9 | 10
9 | 10 | 11
10 | 11 | 12
2 | 3 | 4
3 | 4 | 5
4 | 5 | 6
(11 rows)
--
-- Test for creation of MergeAppend paths.
--
......
......@@ -1764,6 +1764,117 @@ select g from generate_series(1,2) g;
99
(7 rows)
explain (costs off)
select sum(a) from (
select a from dml_union_r where a > 95
union all
select g from generate_series(1,2) g
) t;
QUERY PLAN
-------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Append
-> Seq Scan on dml_union_r
Filter: (a > 95)
-> Result
One-Time Filter: (gp_execution_segment() = 2)
-> Function Scan on generate_series
Optimizer: Pivotal Optimizer (GPORCA) version 3.93.0
(10 rows)
select sum(a) from (
select a from dml_union_r where a > 95
union all
select g from generate_series(1,2) g
) t;
sum
-----
493
(1 row)
--
-- Continue to test appending General to distributed table.
-- This time, the General is a dummy path, produced by pushing down condition.
-- (Only for planner, orca does not create dummy path here)
--
create table t_test_append_hash(a int, b int, c int) distributed by (a);
insert into t_test_append_hash select i, i+1, i+2 from generate_series(1, 5)i;
explain (costs off)
with t(a, b, s) as (
select a, b, sum(c) from t_test_append_hash where a > b group by a, b
union all
select a, b, sum(c) from t_test_append_hash where a < b group by a, b
) select * from t where t.a < t.b;
QUERY PLAN
------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Append
-> GroupAggregate
Group Key: t_test_append_hash.a, t_test_append_hash.b
-> Sort
Sort Key: t_test_append_hash.a, t_test_append_hash.b
-> Seq Scan on t_test_append_hash
Filter: ((a > b) AND (a < b))
-> GroupAggregate
Group Key: t_test_append_hash_1.a, t_test_append_hash_1.b
-> Sort
Sort Key: t_test_append_hash_1.a, t_test_append_hash_1.b
-> Seq Scan on t_test_append_hash t_test_append_hash_1
Filter: ((a < b) AND (a < b))
Optimizer: Pivotal Optimizer (GPORCA) version 3.93.0
(15 rows)
with t(a, b, s) as (
select a, b, sum(c) from t_test_append_hash where a > b group by a, b
union all
select a, b, sum(c) from t_test_append_hash where a < b group by a, b
) select * from t where t.a < t.b;
a | b | s
---+---+---
2 | 3 | 4
3 | 4 | 5
4 | 5 | 6
5 | 6 | 7
1 | 2 | 3
(5 rows)
-- Test mixing a SegmentGeneral with distributed table.
create table t_test_append_rep(a int, b int, c int) distributed replicated;
insert into t_test_append_rep select i, i+1, i+2 from generate_series(5, 10)i;
explain (costs off)
select * from t_test_append_rep
union all
select * from t_test_append_hash;
QUERY PLAN
---------------------------------------------------------
Gather Motion 1:1 (slice1; segments: 1)
-> Append
-> Seq Scan on t_test_append_rep
-> Broadcast Motion 3:1 (slice2; segments: 3)
-> Seq Scan on t_test_append_hash
Optimizer: Pivotal Optimizer (GPORCA) version 3.93.0
(6 rows)
select * from t_test_append_rep
union all
select * from t_test_append_hash;
a | b | c
----+----+----
5 | 6 | 7
6 | 7 | 8
7 | 8 | 9
8 | 9 | 10
9 | 10 | 11
10 | 11 | 12
1 | 2 | 3
5 | 6 | 7
2 | 3 | 4
3 | 4 | 5
4 | 5 | 6
(11 rows)
--
-- Test for creation of MergeAppend paths.
--
......
......@@ -654,6 +654,53 @@ select a from dml_union_r where a > 95
union all
select g from generate_series(1,2) g;
explain (costs off)
select sum(a) from (
select a from dml_union_r where a > 95
union all
select g from generate_series(1,2) g
) t;
select sum(a) from (
select a from dml_union_r where a > 95
union all
select g from generate_series(1,2) g
) t;
--
-- Continue to test appending General to distributed table.
-- This time, the General is a dummy path, produced by pushing down condition.
-- (Only for planner, orca does not create dummy path here)
--
create table t_test_append_hash(a int, b int, c int) distributed by (a);
insert into t_test_append_hash select i, i+1, i+2 from generate_series(1, 5)i;
explain (costs off)
with t(a, b, s) as (
select a, b, sum(c) from t_test_append_hash where a > b group by a, b
union all
select a, b, sum(c) from t_test_append_hash where a < b group by a, b
) select * from t where t.a < t.b;
with t(a, b, s) as (
select a, b, sum(c) from t_test_append_hash where a > b group by a, b
union all
select a, b, sum(c) from t_test_append_hash where a < b group by a, b
) select * from t where t.a < t.b;
-- Test mixing a SegmentGeneral with distributed table.
create table t_test_append_rep(a int, b int, c int) distributed replicated;
insert into t_test_append_rep select i, i+1, i+2 from generate_series(5, 10)i;
explain (costs off)
select * from t_test_append_rep
union all
select * from t_test_append_hash;
select * from t_test_append_rep
union all
select * from t_test_append_hash;
--
-- Test for creation of MergeAppend paths.
--
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册