提交 5838bc1b 编写于 作者: X Xin Zhang 提交者: Xin Zhang

Fix for issue with EXISTS subquery with CTE

When we have a correlated subquery with EXISTS and CTE, the planner
produces wrong plan as:

```
pivotal=# explain with t as (select 1) select * from foo where exists (select * from bar where foo.a = 'a' and foo.b = 'b');
                                             QUERY PLAN
----------------------------------------------------------------------------------------------------
 Gather Motion 3:1  (slice3; segments: 3)  (cost=0.01..1522.03 rows=5055210000 width=16)
   ->  Nested Loop  (cost=0.01..1522.03 rows=1685070000 width=16)
         ->  Broadcast Motion 1:3  (slice2; segments: 1)  (cost=0.01..0.03 rows=1 width=0)
               ->  Limit  (cost=0.01..0.01 rows=1 width=0)
                     ->  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.01..0.03 rows=1 width=0)
                           ->  Limit  (cost=0.01..0.01 rows=1 width=0)
                                 ->  Result  (cost=0.01..811.00 rows=23700 width=0)
                                       One-Time Filter: $0 = 'a'::bpchar AND $1 = 'b'::bpchar
                                       ->  Seq Scan on bar  (cost=0.01..811.00 rows=23700 width=0)
         ->  Seq Scan on foo  (cost=0.00..811.00 rows=23700 width=16)
 Optimizer status: legacy query optimizer
(11 rows)
```

This failed during execution because $0 is referenced across the slices.

Root Cause:
That's because planner produce a plan with `$0` aka `param` but without
`subplan`.  The `param` is created by `replace_outer_var()`, when planner
detects a query referring to relations from its outer/parent query.  Such `var`
is created when removing `sublink` in `convert_EXISTS_to_join()` function.  In
that function, when handling the `EXISTS` query, we convert the `EXISTS_sublink`
to a `subquery RTE` (and expect it to get pulled up later by
`pull_up_subquery()`.  However the subquery cannot be pulled-up by
`pull_up_subquery()` since it is not a simple subquery (`is_simple_subquery()`
returns false because of CTE in this case).  In this case, the `sublink` got
removed, hence it cannot produce the `subplan` (which is an valid option).  And
the `var` left behind as outer-reference, and then covered to param, which is
blowed up during query execution.  There is a mismatching conditions between
`convert_EXISTS_to_join()` and `pull_up_subquery()` about whether this subquery
can be pulled-up.

The fix is to reuse `is_simple_subquery()` checking when
`convert_EXISTS_to_join()`, so that it can be consistent with
`pull_up_subquery()` on whether subquery can be pulled or not.

The correct plan after fix is:

```
pivotal=# explain with t as (select 1) select * from foo where exists (select * from bar where foo.a = 'a' and foo.b = 'b');
                                                      QUERY PLAN
----------------------------------------------------------------------------------------------------------------------
 Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..1977.50 rows=35550 width=16)
   ->  Seq Scan on foo  (cost=0.00..1977.50 rows=11850 width=16)
         Filter: (subplan)
         SubPlan 1
           ->  Result  (cost=0.01..811.00 rows=23700 width=16)
                 One-Time Filter: $0 = 'a'::bpchar AND $1 = 'b'::bpchar
                 ->  Result  (cost=882.11..1593.11 rows=23700 width=16)
                       ->  Materialize  (cost=882.11..1593.11 rows=23700 width=16)
                             ->  Broadcast Motion 3:3  (slice1; segments: 3)  (cost=0.01..811.00 rows=23700 width=16)
                                   ->  Seq Scan on bar  (cost=0.01..811.00 rows=23700 width=16)
 Optimizer status: legacy query optimizer
(11 rows)
```
Signed-off-by: NDhanashree Kashid <dkashid@pivotal.io>
上级 644b2a9d
......@@ -28,6 +28,8 @@
#include "lib/stringinfo.h"
#include "cdb/cdbpullup.h"
extern bool is_simple_subquery(PlannerInfo *root, Query *subquery);
static Node *convert_IN_to_antijoin(PlannerInfo *root, List **rtrlist_inout, SubLink *sublink);
static int add_expr_subquery_rte(Query *parse, Query *subselect);
......@@ -902,6 +904,15 @@ convert_EXISTS_to_join(PlannerInfo *root, List **rtrlist_inout, SubLink *sublink
if (IsSubqueryMultiLevelCorrelated(subselect))
return (Node *) sublink;
/*
* Don't remove the sublink if we cannot pull-up the subquery
* later during pull_up_simple_subquery()
*/
if (!is_simple_subquery(root, subselect))
{
return (Node *) sublink;
}
/*
* 'LIMIT n' makes EXISTS false when n <= 0, and doesn't affect the
* outcome when n > 0. Delete subquery's LIMIT and build (0 < n) expr to
......
......@@ -54,7 +54,7 @@ static Node *pull_up_simple_subquery(PlannerInfo *root, Node *jtnode,
RangeTblEntry *rte,
bool below_outer_join,
bool append_rel_member);
static bool is_simple_subquery(PlannerInfo *root, Query *subquery);
bool is_simple_subquery(PlannerInfo *root, Query *subquery);
static bool has_nullable_targetlist(Query *subquery);
static bool is_safe_append_member(Query *subquery);
static void resolvenew_in_jointree(Node *jtnode, int varno,
......@@ -680,7 +680,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
* Check a subquery in the range table to see if it's simple enough
* to pull up into the parent query.
*/
static bool
bool
is_simple_subquery(PlannerInfo *root, Query *subquery)
{
/*
......
......@@ -809,6 +809,19 @@ select A.i from A where exists(select B.i from B where A.i = B.i) order by A.i;
1
(2 rows)
-- with CTE
with t as (select 1) select b from qp_csq_t1 where exists(select * from qp_csq_t2 where y=a);
b
---
2
(1 row)
with t as (select * from qp_csq_t2) select b from qp_csq_t1 where exists(select * from t where y=a);
b
---
2
(1 row)
-- Not supported select A.i, B.i, C.j from A, B, C where exists (select C.j from C where C.j = A.j and exists (select C.i from C where C.i = A.i and C.i !=10)) order by A.i, B.i, C.j limit 20;
select * from A where exists (select * from C where C.j = A.j) order by 1,2;
i | j
......
......@@ -823,6 +823,19 @@ select A.i from A where exists(select B.i from B where A.i = B.i) order by A.i;
1
(2 rows)
-- with CTE
with t as (select 1) select b from qp_csq_t1 where exists(select * from qp_csq_t2 where y=a);
b
---
2
(1 row)
with t as (select * from qp_csq_t2) select b from qp_csq_t1 where exists(select * from t where y=a);
b
---
2
(1 row)
-- Not supported select A.i, B.i, C.j from A, B, C where exists (select C.j from C where C.j = A.j and exists (select C.i from C where C.i = A.i and C.i !=10)) order by A.i, B.i, C.j limit 20;
select * from A where exists (select * from C where C.j = A.j) order by 1,2;
i | j
......
......@@ -329,6 +329,10 @@ insert into C values(2,7);
select b from qp_csq_t1 where exists(select * from qp_csq_t2 where y=a);
select A.i from A where exists(select B.i from B where A.i = B.i) order by A.i;
-- with CTE
with t as (select 1) select b from qp_csq_t1 where exists(select * from qp_csq_t2 where y=a);
with t as (select * from qp_csq_t2) select b from qp_csq_t1 where exists(select * from t where y=a);
-- Not supported select A.i, B.i, C.j from A, B, C where exists (select C.j from C where C.j = A.j and exists (select C.i from C where C.i = A.i and C.i !=10)) order by A.i, B.i, C.j limit 20;
select * from A where exists (select * from C where C.j = A.j) order by 1,2;
select * from A where exists (select * from C,B where C.j = A.j and exists (select * from C where C.i = B.i)) order by 1,2;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册