未验证 提交 300d3c19 编写于 作者: Z Zhenghua Lyu 提交者: GitHub

Fix lateral PANIC issue when subquery contain limit or groupby.

Previous commit 62579728 fixes a lateral panic issue but does
not handle all the bad cases because it only check if the query
tree contains limit clause. Bad cases for example: if the subquery
is like `q1 union all (q2 limit 1)` then the whole query tree
does not contain limit clause.

Another bad case is the lateral subquery may contain groupby.
like:

    select * from t1_lateral_limit t1 cross join lateral
    (select (c).x+t2.a, sum(t2.a+t2.b) from t2_lateral_limit t2
     group by (c).x+t2.a)x;

When planning the lateraled subquery we do not know where is
the param in the subquery's query tree. Thus it is a bit complicated
to precisely and efficiently resolve this issue.

This commit adopts a simple method to fix panic issue: it justs
check the subquery's query tree to see if there is any group-by
or limit clause, if so, force gather each relation and materialize
them. This is not the best plan we might get. But let's make it
correct first and I think in future we should seriously consider
how to fully and efficiently support lateral.
上级 ab69cf9e
......@@ -120,8 +120,8 @@ static void subquery_push_qual(Query *subquery,
RangeTblEntry *rte, Index rti, Node *qual);
static void recurse_push_qual(Node *setOp, Query *topquery,
RangeTblEntry *rte, Index rti, Node *qual);
static void bring_to_singleQE(PlannerInfo *root, RelOptInfo *rel, List *outer_quals);
static void bring_to_singleQE(PlannerInfo *root, RelOptInfo *rel, List *outer_quals);
static bool is_query_contain_limit_groupby(Query *parse);
/*
* make_one_rel
......@@ -1469,9 +1469,8 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
* it to singleQE and materialize the data because we
* cannot pass params across motion.
*/
config->force_singleQE = false;
if ((!bms_is_empty(required_outer)) &&
(subquery->limitCount || subquery->limitOffset))
is_query_contain_limit_groupby(subquery))
config->force_singleQE = true;
rel->subplan = subquery_planner(root->glob, subquery,
......@@ -2731,6 +2730,31 @@ recurse_push_qual(Node *setOp, Query *topquery,
}
}
static bool
is_query_contain_limit_groupby(Query *parse)
{
if (parse->limitCount || parse->limitOffset ||
parse->groupClause || parse->distinctClause)
return true;
if (parse->setOperations)
{
SetOperationStmt *sop_stmt = (SetOperationStmt *) (parse->setOperations);
RangeTblRef *larg = (RangeTblRef *) sop_stmt->larg;
RangeTblRef *rarg = (RangeTblRef *) sop_stmt->rarg;
RangeTblEntry *lrte = list_nth(parse->rtable, larg->rtindex-1);
RangeTblEntry *rrte = list_nth(parse->rtable, rarg->rtindex-1);
if ((lrte->rtekind == RTE_SUBQUERY &&
is_query_contain_limit_groupby(lrte->subquery)) ||
(rrte->rtekind == RTE_SUBQUERY &&
is_query_contain_limit_groupby(rrte->subquery)))
return true;
}
return false;
}
/*****************************************************************************
* DEBUG SUPPORT
*****************************************************************************/
......
......@@ -1009,6 +1009,72 @@ select * from t1_lateral_limit as t1 cross join lateral
1 | 2 | (2,2) | 4
(2 rows)
-- Continue with the above cases, if the lateral subquery contains union all
-- and in some of its appendquerys contain limit, it may also lead to bad plan.
-- The best solution may be to walk the query to and do some static analysis
-- to find out which rel has to be gathered and materialized. But it is complicated
-- to do so and this seems less efficient. I believe in future we should do big
-- refactor to make greenplum support lateral well so now, let's just make sure
-- we will not panic.
explain (costs off) select * from t1_lateral_limit as t1 cross join lateral
((select ((c).x+t2.b) as n from t2_lateral_limit as t2 order by n limit 1) union all select 1)s;
QUERY PLAN
------------------------------------------------------------------------------------
Nested Loop
-> Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on t1_lateral_limit t1
-> Materialize
-> Append
-> Limit
-> Sort
Sort Key: (((t1.c).x + t2.b))
-> Result
-> Materialize
-> Gather Motion 3:1 (slice2; segments: 3)
-> Seq Scan on t2_lateral_limit t2
-> Result
Optimizer: Postgres query optimizer
(14 rows)
select * from t1_lateral_limit as t1 cross join lateral
((select ((c).x+t2.b) as n from t2_lateral_limit as t2 order by n limit 1) union all select 1)s;
a | b | c | n
---+---+-------+---
1 | 1 | (1,1) | 3
1 | 1 | (1,1) | 1
1 | 2 | (2,2) | 4
1 | 2 | (2,2) | 1
(4 rows)
-- test lateral subquery contains group by (group-by is another place that
-- may add motions in the subquery's plan).
explain select * from t1_lateral_limit t1 cross join lateral
(select (c).x+t2.a, sum(t2.a+t2.b) from t2_lateral_limit t2 group by (c).x+t2.a)x;
QUERY PLAN
----------------------------------------------------------------------------------------------------------
Nested Loop (cost=10000000001.06..10000000002.12 rows=4 width=49)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.03 rows=1 width=37)
-> Seq Scan on t1_lateral_limit t1 (cost=0.00..1.01 rows=1 width=37)
-> Materialize (cost=1.05..1.08 rows=1 width=12)
-> HashAggregate (cost=1.05..1.07 rows=1 width=12)
Group Key: ((t1.c).x + t2.a)
-> Result (cost=0.00..1.05 rows=1 width=12)
-> Materialize (cost=0.00..1.03 rows=1 width=8)
-> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..1.03 rows=1 width=8)
-> Seq Scan on t2_lateral_limit t2 (cost=0.00..1.01 rows=1 width=8)
Optimizer: Postgres query optimizer
(11 rows)
select * from t1_lateral_limit t1 cross join lateral
(select (c).x+t2.a, sum(t2.a+t2.b) from t2_lateral_limit t2 group by (c).x+t2.a)x;
a | b | c | ?column? | sum
---+---+-------+----------+-----
1 | 1 | (1,1) | 4 | 6
1 | 1 | (1,1) | 3 | 4
1 | 2 | (2,2) | 5 | 6
1 | 2 | (2,2) | 4 | 4
(4 rows)
-- The following case is from Github Issue
-- https://github.com/greenplum-db/gpdb/issues/8860
-- It is the same issue as the above test suite.
......
......@@ -1025,6 +1025,72 @@ select * from t1_lateral_limit as t1 cross join lateral
1 | 2 | (2,2) | 4
(2 rows)
-- Continue with the above cases, if the lateral subquery contains union all
-- and in some of its appendquerys contain limit, it may also lead to bad plan.
-- The best solution may be to walk the query to and do some static analysis
-- to find out which rel has to be gathered and materialized. But it is complicated
-- to do so and this seems less efficient. I believe in future we should do big
-- refactor to make greenplum support lateral well so now, let's just make sure
-- we will not panic.
explain (costs off) select * from t1_lateral_limit as t1 cross join lateral
((select ((c).x+t2.b) as n from t2_lateral_limit as t2 order by n limit 1) union all select 1)s;
QUERY PLAN
------------------------------------------------------------------------------------
Nested Loop
-> Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on t1_lateral_limit t1
-> Materialize
-> Append
-> Limit
-> Sort
Sort Key: (((t1.c).x + t2.b))
-> Result
-> Materialize
-> Gather Motion 3:1 (slice2; segments: 3)
-> Seq Scan on t2_lateral_limit t2
-> Result
Optimizer: Postgres query optimizer
(14 rows)
select * from t1_lateral_limit as t1 cross join lateral
((select ((c).x+t2.b) as n from t2_lateral_limit as t2 order by n limit 1) union all select 1)s;
a | b | c | n
---+---+-------+---
1 | 1 | (1,1) | 3
1 | 1 | (1,1) | 1
1 | 2 | (2,2) | 4
1 | 2 | (2,2) | 1
(4 rows)
-- test lateral subquery contains group by (group-by is another place that
-- may add motions in the subquery's plan).
explain select * from t1_lateral_limit t1 cross join lateral
(select (c).x+t2.a, sum(t2.a+t2.b) from t2_lateral_limit t2 group by (c).x+t2.a)x;
QUERY PLAN
----------------------------------------------------------------------------------------------------------
Nested Loop (cost=10000000001.06..10000000002.12 rows=4 width=49)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.03 rows=1 width=37)
-> Seq Scan on t1_lateral_limit t1 (cost=0.00..1.01 rows=1 width=37)
-> Materialize (cost=1.05..1.08 rows=1 width=12)
-> HashAggregate (cost=1.05..1.07 rows=1 width=12)
Group Key: ((t1.c).x + t2.a)
-> Result (cost=0.00..1.05 rows=1 width=12)
-> Materialize (cost=0.00..1.03 rows=1 width=8)
-> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..1.03 rows=1 width=8)
-> Seq Scan on t2_lateral_limit t2 (cost=0.00..1.01 rows=1 width=8)
Optimizer: Postgres query optimizer
(11 rows)
select * from t1_lateral_limit t1 cross join lateral
(select (c).x+t2.a, sum(t2.a+t2.b) from t2_lateral_limit t2 group by (c).x+t2.a)x;
a | b | c | ?column? | sum
---+---+-------+----------+-----
1 | 1 | (1,1) | 4 | 6
1 | 1 | (1,1) | 3 | 4
1 | 2 | (2,2) | 5 | 6
1 | 2 | (2,2) | 4 | 4
(4 rows)
-- The following case is from Github Issue
-- https://github.com/greenplum-db/gpdb/issues/8860
-- It is the same issue as the above test suite.
......
......@@ -498,6 +498,27 @@ explain select * from t1_lateral_limit as t1 cross join lateral
select * from t1_lateral_limit as t1 cross join lateral
(select ((c).x+t2.b) as n from t2_lateral_limit as t2 order by n limit 1)s;
-- Continue with the above cases, if the lateral subquery contains union all
-- and in some of its appendquerys contain limit, it may also lead to bad plan.
-- The best solution may be to walk the query to and do some static analysis
-- to find out which rel has to be gathered and materialized. But it is complicated
-- to do so and this seems less efficient. I believe in future we should do big
-- refactor to make greenplum support lateral well so now, let's just make sure
-- we will not panic.
explain (costs off) select * from t1_lateral_limit as t1 cross join lateral
((select ((c).x+t2.b) as n from t2_lateral_limit as t2 order by n limit 1) union all select 1)s;
select * from t1_lateral_limit as t1 cross join lateral
((select ((c).x+t2.b) as n from t2_lateral_limit as t2 order by n limit 1) union all select 1)s;
-- test lateral subquery contains group by (group-by is another place that
-- may add motions in the subquery's plan).
explain select * from t1_lateral_limit t1 cross join lateral
(select (c).x+t2.a, sum(t2.a+t2.b) from t2_lateral_limit t2 group by (c).x+t2.a)x;
select * from t1_lateral_limit t1 cross join lateral
(select (c).x+t2.a, sum(t2.a+t2.b) from t2_lateral_limit t2 group by (c).x+t2.a)x;
-- The following case is from Github Issue
-- https://github.com/greenplum-db/gpdb/issues/8860
-- It is the same issue as the above test suite.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册