From 52d31de9f422adb90cfbca6f47a923e7acc5f6fa Mon Sep 17 00:00:00 2001 From: Bhuvnesh Chaudhary Date: Thu, 31 Aug 2017 12:43:42 -0700 Subject: [PATCH] Dont pullup correlated subqueries with limit/offset clause Queries in which there is a IN clause on top of a correlated subquery containing limit/offset clause, planner tries to identify if the IN clause can be converted to a join in convert_IN_to_join() and creates a RTE for the join if its possible and does not consider limit/offset clause while making a decision. However, later in pull_up_subqueries(), check enforced by is_simple_subquery causes the subquery containing limit/offset clauses to be not pulled up. This inconsistency causes a plan to be generated with a param, however, with no corresponding subplan. The patch fixes the issues by adding the relevant checks in convert_IN_to_join() to identify if the subquery is correlated and contains limit/offset clause, in such cases the sublink will not be converted to a join and a plan with subplan will be created. --- src/backend/optimizer/plan/subselect.c | 9 +--- src/test/regress/expected/subselect_gp.out | 53 ++++++++++++++++++ .../expected/subselect_gp_optimizer.out | 54 +++++++++++++++++++ src/test/regress/sql/subselect_gp.sql | 10 ++++ 4 files changed, 119 insertions(+), 7 deletions(-) diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 41d6e2a196..f1c8a9fa2c 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -40,6 +40,7 @@ #include "cdb/cdbsubselect.h" #include "cdb/cdbvars.h" +extern bool is_simple_subquery(PlannerInfo *root, Query *subquery); typedef struct convert_testexpr_context { @@ -1105,13 +1106,7 @@ convert_IN_to_join(PlannerInfo *root, List **rtrlist_inout, SubLink *sublink) /* * Under certain conditions, we cannot pull up the subquery as a join. */ - if (subselect->hasAggs - || (subselect->jointree->fromlist == NULL) - || subselect->havingQual - || subselect->groupClause - || subselect->hasWindFuncs - || subselect->distinctClause - || subselect->setOperations) + if (!is_simple_subquery(root, subselect)) return (Node *) sublink; /* diff --git a/src/test/regress/expected/subselect_gp.out b/src/test/regress/expected/subselect_gp.out index 19f347055b..851cb5e6f9 100644 --- a/src/test/regress/expected/subselect_gp.out +++ b/src/test/regress/expected/subselect_gp.out @@ -1110,6 +1110,59 @@ where t1.a = foo.a; ---+--- (0 rows) +--- +--- Correlated subqueries with limit/offset clause must not be pulled up as join +--- +insert into t1 values (1); +insert into t2 values (1); +explain select 1 from t1 where a in (select b from t2 where a = 1 limit 1); + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..1.53 rows=1 width=0) + -> Seq Scan on t1 (cost=0.00..1.53 rows=1 width=0) + Filter: (subplan) + SubPlan 1 + -> Limit (cost=0.00..1.03 rows=1 width=4) + -> Limit (cost=0.00..1.01 rows=1 width=4) + -> Result (cost=0.00..1.01 rows=1 width=4) + One-Time Filter: $0 = 1 + -> Result (cost=1.01..1.02 rows=1 width=4) + -> Materialize (cost=1.01..1.02 rows=1 width=4) + -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..1.01 rows=1 width=4) + -> Seq Scan on t2 (cost=0.00..1.01 rows=1 width=4) + Settings: optimizer=off; optimizer_nestloop_factor=1; optimizer_segments=3 + Optimizer status: legacy query optimizer +(14 rows) + +explain select 1 from t1 where a in (select b from t2 where a = 1 offset 1); + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..2.05 rows=1 width=0) + -> Seq Scan on t1 (cost=0.00..2.05 rows=1 width=0) + Filter: (subplan) + SubPlan 1 + -> Limit (cost=1.03..1.03 rows=1 width=4) + -> Result (cost=0.00..1.01 rows=1 width=4) + One-Time Filter: $0 = 1 + -> Result (cost=1.01..1.02 rows=1 width=4) + -> Materialize (cost=1.01..1.02 rows=1 width=4) + -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..1.01 rows=1 width=4) + -> Seq Scan on t2 (cost=0.00..1.01 rows=1 width=4) + Settings: optimizer=off; optimizer_nestloop_factor=1; optimizer_segments=3 + Optimizer status: legacy query optimizer +(13 rows) + +select 1 from t1 where a in (select b from t2 where a = 1 limit 1); + ?column? +---------- + 1 +(1 row) + +select 1 from t1 where a in (select b from t2 where a = 1 offset 1); + ?column? +---------- +(0 rows) + drop table if exists t1; drop table if exists t2; -- diff --git a/src/test/regress/expected/subselect_gp_optimizer.out b/src/test/regress/expected/subselect_gp_optimizer.out index 2b9878be70..f8349b0668 100644 --- a/src/test/regress/expected/subselect_gp_optimizer.out +++ b/src/test/regress/expected/subselect_gp_optimizer.out @@ -1090,6 +1090,60 @@ where t1.a = foo.a; ---+--- (0 rows) +--- +--- Correlated subqueries with limit/offset clause must not be pulled up as join +--- +insert into t1 values (1); +insert into t2 values (1); +explain select 1 from t1 where a in (select b from t2 where a = 1 limit 1); + QUERY PLAN +-------------------------------------------------------------------------------------------------------------- + Result (cost=0.00..1293.00 rows=1 width=4) + -> Result (cost=0.00..1293.00 rows=1 width=1) + Filter: (subplan) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=4) + -> Table Scan on t1 (cost=0.00..431.00 rows=1 width=4) + SubPlan 1 + -> Limit (cost=0.00..431.00 rows=1 width=4) + -> Result (cost=0.00..431.00 rows=1 width=4) + Filter: $1 = 1 + -> Materialize (cost=0.00..431.00 rows=1 width=4) + -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4) + -> Table Scan on t2 (cost=0.00..431.00 rows=1 width=4) + Settings: optimizer=on; optimizer_nestloop_factor=1; optimizer_segments=3 + Optimizer status: PQO version 2.39.2 +(14 rows) + +explain select 1 from t1 where a in (select b from t2 where a = 1 offset 1); + QUERY PLAN +-------------------------------------------------------------------------------------------------------------- + Result (cost=0.00..1293.00 rows=1 width=4) + -> Result (cost=0.00..1293.00 rows=1 width=1) + Filter: (subplan) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=4) + -> Table Scan on t1 (cost=0.00..431.00 rows=1 width=4) + SubPlan 1 + -> Limit (cost=0.00..431.00 rows=1 width=4) + -> Result (cost=0.00..431.00 rows=1 width=4) + Filter: $1 = 1 + -> Materialize (cost=0.00..431.00 rows=1 width=4) + -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4) + -> Table Scan on t2 (cost=0.00..431.00 rows=1 width=4) + Settings: optimizer=on; optimizer_nestloop_factor=1; optimizer_segments=3 + Optimizer status: PQO version 2.39.2 +(14 rows) + +select 1 from t1 where a in (select b from t2 where a = 1 limit 1); + ?column? +---------- + 1 +(1 row) + +select 1 from t1 where a in (select b from t2 where a = 1 offset 1); + ?column? +---------- +(0 rows) + drop table if exists t1; drop table if exists t2; -- diff --git a/src/test/regress/sql/subselect_gp.sql b/src/test/regress/sql/subselect_gp.sql index a04e5e3a42..13882e0aa1 100644 --- a/src/test/regress/sql/subselect_gp.sql +++ b/src/test/regress/sql/subselect_gp.sql @@ -460,6 +460,16 @@ select * from t1, (select * from t1 where a=1 and a=2 and a > (select t2.b from t2)) foo where t1.a = foo.a; +--- +--- Correlated subqueries with limit/offset clause must not be pulled up as join +--- +insert into t1 values (1); +insert into t2 values (1); +explain select 1 from t1 where a in (select b from t2 where a = 1 limit 1); +explain select 1 from t1 where a in (select b from t2 where a = 1 offset 1); +select 1 from t1 where a in (select b from t2 where a = 1 limit 1); +select 1 from t1 where a in (select b from t2 where a = 1 offset 1); + drop table if exists t1; drop table if exists t2; -- GitLab