Avoid Duplicate subplans in partition selection key

The condition containing subplans will be duplicated as the partition selection key in the PartitionSelector node. It is not OK to duplicate the expression, if it contains SubPlans, because the code that adds motion nodes to a subplan gets confused if there are multiple SubPlans referring the same subplan ID.

Avoid Duplicate subplans in partition selection key
The condition containing subplans will be duplicated as the partition selection key in the PartitionSelector node. It is not OK to duplicate the expression, if it contains SubPlans, because the code that adds motion nodes to a subplan gets confused if there are multiple SubPlans referring the same subplan ID.
9d63d3c1 · Bhuvnesh Chaudhary · Bhuvnesh · 8adc22f4 · 9d63d3c1 · 9d63d3c1
4 changed file
--- a/src/backend/optimizer/plan/planpartition.c
+++ b/src/backend/optimizer/plan/planpartition.c
@@ -305,7 +305,18 @@ FindEqKey(PlannerInfo *root, Bitmapset *inner_relids,

 					if (!bms_is_subset(inner_em->em_relids, inner_relids))
 						continue; /* not computable on the inner side */
-
+					/*
+					 * The condition will be duplicated as the partition
+					 * selection key in the PartitionSelector node. It is
+					 * not OK to duplicate the expression, if it contains
+					 * SubPlans, because the code that adds motion nodes to a
+					 * subplan gets confused if there are multiple SubPlans
+					 * referring the same subplan ID. It would probably
+					 * perform badly too, since subplans are typically quite
+					 * expensive.
+					 */
+					if (contain_subplans((Node *) inner_em->em_expr))
+						continue;
 					/*
 					 * This can be computed from the inner side.
 					 *

--- a/src/test/regress/expected/subselect_gp.out
+++ b/src/test/regress/expected/subselect_gp.out
@@ -1345,4 +1345,20 @@ SELECT * FROM bar_s T1 WHERE c = (SELECT max(c) FROM bar_s T2 WHERE T2.d = T1.d
 9 | 9
 (1 row)

+CREATE TABLE foo_s (a integer, b integer)  PARTITION BY RANGE(b)
+    (PARTITION sub_one START (1) END (10),
+     PARTITION sub_two START (11) END (22));
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+NOTICE:  CREATE TABLE will create partition "foo_s_1_prt_sub_one" for table "foo_s"
+NOTICE:  CREATE TABLE will create partition "foo_s_1_prt_sub_two" for table "foo_s"
+INSERT INTO foo_s VALUES (9,9);
+INSERT INTO foo_s VALUES (2,9);
+SELECT bar_s.c from bar_s, foo_s WHERE foo_s.a=2 AND foo_s.b = (SELECT max(b) FROM foo_s WHERE bar_s.c = 9);
+ c 
+---
+ 9
+(1 row)
+
 DROP TABLE bar_s;
+DROP TABLE foo_s;
--- a/src/test/regress/expected/subselect_gp_optimizer.out
+++ b/src/test/regress/expected/subselect_gp_optimizer.out
@@ -1324,9 +1324,25 @@ NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 INSERT INTO bar_s VALUES (9,9);
 SELECT * FROM bar_s T1 WHERE c = (SELECT max(c) FROM bar_s T2 WHERE T2.d = T1.d GROUP BY c) AND c < 10;
- c | d
+ c | d 
 ---+---
 9 | 9
 (1 row)

+CREATE TABLE foo_s (a integer, b integer)  PARTITION BY RANGE(b)
+    (PARTITION sub_one START (1) END (10),
+     PARTITION sub_two START (11) END (22));
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+NOTICE:  CREATE TABLE will create partition "foo_s_1_prt_sub_one" for table "foo_s"
+NOTICE:  CREATE TABLE will create partition "foo_s_1_prt_sub_two" for table "foo_s"
+INSERT INTO foo_s VALUES (9,9);
+INSERT INTO foo_s VALUES (2,9);
+SELECT bar_s.c from bar_s, foo_s WHERE foo_s.a=2 AND foo_s.b = (SELECT max(b) FROM foo_s WHERE bar_s.c = 9);
+ c 
+---
+ 9
+(1 row)
+
 DROP TABLE bar_s;
+DROP TABLE foo_s;
--- a/src/test/regress/sql/subselect_gp.sql
+++ b/src/test/regress/sql/subselect_gp.sql
@@ -616,4 +616,11 @@ where exists (select 1 from CT where CT.a = foo.a);
 CREATE TABLE bar_s (c integer, d character varying(10));
 INSERT INTO bar_s VALUES (9,9);
 SELECT * FROM bar_s T1 WHERE c = (SELECT max(c) FROM bar_s T2 WHERE T2.d = T1.d GROUP BY c) AND c < 10;
+CREATE TABLE foo_s (a integer, b integer)  PARTITION BY RANGE(b)
+    (PARTITION sub_one START (1) END (10),
+     PARTITION sub_two START (11) END (22));
+INSERT INTO foo_s VALUES (9,9);
+INSERT INTO foo_s VALUES (2,9);
+SELECT bar_s.c from bar_s, foo_s WHERE foo_s.a=2 AND foo_s.b = (SELECT max(b) FROM foo_s WHERE bar_s.c = 9);
 DROP TABLE bar_s;
+DROP TABLE foo_s;