Fix handling of sub-plans that return multiple Params.

The code to remove unnecessary InitPlans assumed that an InitPlan can only return a single Param, which is wrong. Fix the code to handle multiple Params. Bug report and test case by CK Tan.

Fix handling of sub-plans that return multiple Params.
The code to remove unnecessary InitPlans assumed that an InitPlan can only return a single Param, which is wrong. Fix the code to handle multiple Params. Bug report and test case by CK Tan.
9926d5e3 · Heikki Linnakangas · 06c11b10 · 9926d5e3 · 9926d5e3 · 9926d5e3
4 changed file
--- a/src/backend/cdb/cdbmutate.c
+++ b/src/backend/cdb/cdbmutate.c
@@ -2682,64 +2682,42 @@ static void remove_unused_initplans_helper(Plan *plan, Bitmapset **usedParams, B
 	if (NIL != plan->initPlan)
 	{
-		/* gather initplans from current node, and keep track of their param ids */
+		List	   *newInitPlans = NIL;
-		List *paramids = NIL;
+		ListCell *lc;
-		List *planids = NIL;
-		ListCell *lc = NULL;
 		foreach (lc, plan->initPlan)
 		{
 			SubPlan *initplan = (SubPlan *) lfirst(lc);
-			Assert(initplan->is_initplan);
+			ListCell *lc_paramid;
-			Assert(1 == list_length(initplan->setParam));
+			bool		anyused;
-			planids = lappend_int(planids, initplan->plan_id);
+			Assert(initplan->is_initplan);
-			paramids = lappend_int(paramids, linitial_int(initplan->setParam));
-		}
-		/* remove from these lists the params that are used */
+			/* Are any of this Init Plan's output parameters actually used? */
-		int paramid = bms_first_from(context.paramids, 0);
+			anyused = false;
-		while (0 <= paramid)
+			foreach (lc_paramid, initplan->setParam)
-		{
-			int index = list_find_int(paramids, paramid);
-			if (0 <= index)
 			{
-				int planid = list_nth_int(planids, index);
+				int			paramid = lfirst_int(lc_paramid);
-				paramids = list_delete_int(paramids, paramid);
-				planids = list_delete_int(planids, planid);
-			}
-			paramid = bms_first_from(context.paramids, paramid + 1);
-		}
-		/* delete unused initplans */
+				if (bms_is_member(paramid, context.paramids))
-		List *oldInitPlans = plan->initPlan;
+				{
-		plan->initPlan = NIL;
+					anyused = true;
+					break;
-		foreach (lc, oldInitPlans)
+				}
-		{
-			SubPlan *initplan = (SubPlan *) lfirst(lc);
-			if (0 > list_find_int(planids, initplan->plan_id))
-			{
-				plan->initPlan = lappend(plan->initPlan, initplan);
 			}
+			/* If none of its params are used, leave out from the new list */
+			if (anyused)
+				newInitPlans = lappend(newInitPlans, initplan);
 			else
-			{
+				elog(DEBUG2, "removing unused InitPlan %s", initplan->plan_name);
-				pfree(initplan);
-			}
 		}
 		/* remove unused params */
-		foreach (lc, paramids)
+		plan->allParam = bms_intersect(plan->allParam, context.paramids);
-		{
-			int paramid = lfirst_int(lc);
-			plan->allParam = bms_del_member(plan->allParam, paramid);
-		}
-		/* cleanup */
+		list_free(plan->initPlan);
-		list_free(oldInitPlans);
+		plan->initPlan = newInitPlans;
-		list_free(planids);
-		list_free(paramids);
 	}
 	Bitmapset *oldbms = *usedParams;

--- a/src/test/regress/expected/subselect_gp.out
+++ b/src/test/regress/expected/subselect_gp.out
@@ -1112,6 +1112,20 @@ order by 1;
 drop table if exists initplan_x;
 drop table if exists initplan_y;
 --
+-- Test Initplans that return multiple params.
+--
+create table initplan_test(i int, j int, m int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into initplan_test values (1,1,1);
+select * from initplan_test where row(j, m) = (select j, m from initplan_test where i = 1);
+ i | j | m 
+---+---+---
+ 1 | 1 | 1
+(1 row)
+drop table initplan_test;
+--
 -- apply parallelization for subplan MPP-24563
 --
 create table t1_mpp_24563 (id int, value int) distributed by (id);

--- a/src/test/regress/expected/subselect_gp_optimizer.out
+++ b/src/test/regress/expected/subselect_gp_optimizer.out
@@ -1116,6 +1116,20 @@ order by 1;
 drop table if exists initplan_x;
 drop table if exists initplan_y;
 --
+-- Test Initplans that return multiple params.
+--
+create table initplan_test(i int, j int, m int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into initplan_test values (1,1,1);
+select * from initplan_test where row(j, m) = (select j, m from initplan_test where i = 1);
+ i | j | m 
+---+---+---
+ 1 | 1 | 1
+(1 row)
+drop table initplan_test;
+--
 -- apply parallelization for subplan MPP-24563
 --
 create table t1_mpp_24563 (id int, value int) distributed by (id);

--- a/src/test/regress/sql/subselect_gp.sql
+++ b/src/test/regress/sql/subselect_gp.sql
@@ -481,6 +481,15 @@ order by 1;
 drop table if exists initplan_x;
 drop table if exists initplan_y;
+--
+-- Test Initplans that return multiple params.
+--
+create table initplan_test(i int, j int, m int);
+insert into initplan_test values (1,1,1);
+select * from initplan_test where row(j, m) = (select j, m from initplan_test where i = 1);
+drop table initplan_test;
 --
 -- apply parallelization for subplan MPP-24563
 --