From 9926d5e310272ee654bfc8b625e37e874ece7ae8 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 10 Mar 2016 17:00:24 +0200 Subject: [PATCH] Fix handling of sub-plans that return multiple Params. The code to remove unnecessary InitPlans assumed that an InitPlan can only return a single Param, which is wrong. Fix the code to handle multiple Params. Bug report and test case by CK Tan. --- src/backend/cdb/cdbmutate.c | 66 +++++++------------ src/test/regress/expected/subselect_gp.out | 14 ++++ .../expected/subselect_gp_optimizer.out | 14 ++++ src/test/regress/sql/subselect_gp.sql | 9 +++ 4 files changed, 59 insertions(+), 44 deletions(-) diff --git a/src/backend/cdb/cdbmutate.c b/src/backend/cdb/cdbmutate.c index 53912da83c..8fb993f508 100644 --- a/src/backend/cdb/cdbmutate.c +++ b/src/backend/cdb/cdbmutate.c @@ -2682,64 +2682,42 @@ static void remove_unused_initplans_helper(Plan *plan, Bitmapset **usedParams, B if (NIL != plan->initPlan) { - /* gather initplans from current node, and keep track of their param ids */ - List *paramids = NIL; - List *planids = NIL; + List *newInitPlans = NIL; + ListCell *lc; - ListCell *lc = NULL; foreach (lc, plan->initPlan) { SubPlan *initplan = (SubPlan *) lfirst(lc); - Assert(initplan->is_initplan); - Assert(1 == list_length(initplan->setParam)); + ListCell *lc_paramid; + bool anyused; - planids = lappend_int(planids, initplan->plan_id); - paramids = lappend_int(paramids, linitial_int(initplan->setParam)); - } + Assert(initplan->is_initplan); - /* remove from these lists the params that are used */ - int paramid = bms_first_from(context.paramids, 0); - while (0 <= paramid) - { - int index = list_find_int(paramids, paramid); - if (0 <= index) + /* Are any of this Init Plan's output parameters actually used? */ + anyused = false; + foreach (lc_paramid, initplan->setParam) { - int planid = list_nth_int(planids, index); - paramids = list_delete_int(paramids, paramid); - planids = list_delete_int(planids, planid); - } - - paramid = bms_first_from(context.paramids, paramid + 1); - } + int paramid = lfirst_int(lc_paramid); - /* delete unused initplans */ - List *oldInitPlans = plan->initPlan; - plan->initPlan = NIL; - - foreach (lc, oldInitPlans) - { - SubPlan *initplan = (SubPlan *) lfirst(lc); - if (0 > list_find_int(planids, initplan->plan_id)) - { - plan->initPlan = lappend(plan->initPlan, initplan); + if (bms_is_member(paramid, context.paramids)) + { + anyused = true; + break; + } } + + /* If none of its params are used, leave out from the new list */ + if (anyused) + newInitPlans = lappend(newInitPlans, initplan); else - { - pfree(initplan); - } + elog(DEBUG2, "removing unused InitPlan %s", initplan->plan_name); } /* remove unused params */ - foreach (lc, paramids) - { - int paramid = lfirst_int(lc); - plan->allParam = bms_del_member(plan->allParam, paramid); - } + plan->allParam = bms_intersect(plan->allParam, context.paramids); - /* cleanup */ - list_free(oldInitPlans); - list_free(planids); - list_free(paramids); + list_free(plan->initPlan); + plan->initPlan = newInitPlans; } Bitmapset *oldbms = *usedParams; diff --git a/src/test/regress/expected/subselect_gp.out b/src/test/regress/expected/subselect_gp.out index 57b9847b09..05caa6616d 100644 --- a/src/test/regress/expected/subselect_gp.out +++ b/src/test/regress/expected/subselect_gp.out @@ -1112,6 +1112,20 @@ order by 1; drop table if exists initplan_x; drop table if exists initplan_y; -- +-- Test Initplans that return multiple params. +-- +create table initplan_test(i int, j int, m int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into initplan_test values (1,1,1); +select * from initplan_test where row(j, m) = (select j, m from initplan_test where i = 1); + i | j | m +---+---+--- + 1 | 1 | 1 +(1 row) + +drop table initplan_test; +-- -- apply parallelization for subplan MPP-24563 -- create table t1_mpp_24563 (id int, value int) distributed by (id); diff --git a/src/test/regress/expected/subselect_gp_optimizer.out b/src/test/regress/expected/subselect_gp_optimizer.out index 331ed46e0c..81b0a678a6 100644 --- a/src/test/regress/expected/subselect_gp_optimizer.out +++ b/src/test/regress/expected/subselect_gp_optimizer.out @@ -1116,6 +1116,20 @@ order by 1; drop table if exists initplan_x; drop table if exists initplan_y; -- +-- Test Initplans that return multiple params. +-- +create table initplan_test(i int, j int, m int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into initplan_test values (1,1,1); +select * from initplan_test where row(j, m) = (select j, m from initplan_test where i = 1); + i | j | m +---+---+--- + 1 | 1 | 1 +(1 row) + +drop table initplan_test; +-- -- apply parallelization for subplan MPP-24563 -- create table t1_mpp_24563 (id int, value int) distributed by (id); diff --git a/src/test/regress/sql/subselect_gp.sql b/src/test/regress/sql/subselect_gp.sql index 99747c1948..142b6ae206 100644 --- a/src/test/regress/sql/subselect_gp.sql +++ b/src/test/regress/sql/subselect_gp.sql @@ -481,6 +481,15 @@ order by 1; drop table if exists initplan_x; drop table if exists initplan_y; +-- +-- Test Initplans that return multiple params. +-- +create table initplan_test(i int, j int, m int); +insert into initplan_test values (1,1,1); +select * from initplan_test where row(j, m) = (select j, m from initplan_test where i = 1); + +drop table initplan_test; + -- -- apply parallelization for subplan MPP-24563 -- -- GitLab