diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c index 3047c8c9c422c6edeeca1ac02ba7e89bbb978cc1..c0482a4855999dcc8c62ef7b0e70ffeea7ed809e 100644 --- a/src/backend/nodes/list.c +++ b/src/backend/nodes/list.c @@ -1003,8 +1003,11 @@ list_append_unique_oid(List *list, Oid datum) * via equal(). * * This is almost the same functionality as list_union(), but list1 is - * modified in-place rather than being copied. Note also that list2's cells - * are not inserted in list1, so the analogy to list_concat() isn't perfect. + * modified in-place rather than being copied. However, callers of this + * function may have strict ordering expectations -- i.e. that the relative + * order of those list2 elements that are not duplicates is preserved. Note + * also that list2's cells are not inserted in list1, so the analogy to + * list_concat() isn't perfect. */ List * list_concat_unique(List *list1, List *list2) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index d3ef90499bb6753fb0bf607afc97a490b8c4aa1b..4585026677f13a6ca0ca1d704dddb3634edd309a 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -89,6 +89,17 @@ typedef struct List *activeWindows; /* active windows, if any */ } standard_qp_extra; +/* + * Temporary structure for use during WindowClause reordering in order to be + * be able to sort WindowClauses on partitioning/ordering prefix. + */ +typedef struct +{ + WindowClause *wc; + List *uniqueOrder; /* A List of unique ordering/partitioning + * clauses per Window */ +} WindowClauseSortData; + /* Local functions */ static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind); static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode); @@ -129,6 +140,7 @@ static void get_column_info_for_window(PlannerInfo *root, WindowClause *wc, int *ordNumCols, AttrNumber **ordColIdx, Oid **ordOperators); +static int common_prefix_cmp(const void *a, const void *b); static Bitmapset *canonicalize_colref_list(Node *node); static List *canonicalize_gs_list(List *gsl, bool ordinary); @@ -4366,65 +4378,121 @@ postprocess_setop_tlist(List *new_tlist, List *orig_tlist) static List * select_active_windows(PlannerInfo *root, WindowFuncLists *wflists) { - List *result; - List *actives; + List *windowClause = root->parse->windowClause; + List *result = NIL; ListCell *lc; + int nActive = 0; + WindowClauseSortData *actives = palloc(sizeof(WindowClauseSortData) + * list_length(windowClause)); - /* First, make a list of the active windows */ - actives = NIL; - foreach(lc, root->parse->windowClause) + /* First, construct an array of the active windows */ + foreach(lc, windowClause) { WindowClause *wc = (WindowClause *) lfirst(lc); /* It's only active if wflists shows some related WindowFuncs */ Assert(wc->winref <= wflists->maxWinRef); - if (wflists->windowFuncs[wc->winref] != NIL) - actives = lappend(actives, wc); + if (wflists->windowFuncs[wc->winref] == NIL) + continue; + + actives[nActive].wc = wc; /* original clause */ + + /* + * For sorting, we want the list of partition keys followed by the + * list of sort keys. But pathkeys construction will remove duplicates + * between the two, so we can as well (even though we can't detect all + * of the duplicates, since some may come from ECs - that might mean + * we miss optimization chances here). We must, however, ensure that + * the order of entries is preserved with respect to the ones we do + * keep. + * + * partitionClause and orderClause had their own duplicates removed in + * parse analysis, so we're only concerned here with removing + * orderClause entries that also appear in partitionClause. + */ + actives[nActive].uniqueOrder = + list_concat_unique(list_copy(wc->partitionClause), + wc->orderClause); + nActive++; } /* - * Now, ensure that windows with identical partitioning/ordering clauses - * are adjacent in the list. This is required by the SQL standard, which - * says that only one sort is to be used for such windows, even if they - * are otherwise distinct (eg, different names or framing clauses). + * Sort active windows by their partitioning/ordering clauses, ignoring + * any framing clauses, so that the windows that need the same sorting are + * adjacent in the list. When we come to generate paths, this will avoid + * inserting additional Sort nodes. + * + * This is how we implement a specific requirement from the SQL standard, + * which says that when two or more windows are order-equivalent (i.e. + * have matching partition and order clauses, even if their names or + * framing clauses differ), then all peer rows must be presented in the + * same order in all of them. If we allowed multiple sort nodes for such + * cases, we'd risk having the peer rows end up in different orders in + * equivalent windows due to sort instability. (See General Rule 4 of + * in SQL2008 - SQL2016.) * - * There is room to be much smarter here, for example detecting whether - * one window's sort keys are a prefix of another's (so that sorting for - * the latter would do for the former), or putting windows first that - * match a sort order available for the underlying query. For the moment - * we are content with meeting the spec. + * Additionally, if the entire list of clauses of one window is a prefix + * of another, put first the window with stronger sorting requirements. + * This way we will first sort for stronger window, and won't have to sort + * again for the weaker one. */ - result = NIL; - while (actives != NIL) - { - WindowClause *wc = (WindowClause *) linitial(actives); - ListCell *prev; - ListCell *next; + qsort(actives, nActive, sizeof(WindowClauseSortData), common_prefix_cmp); - /* Move wc from actives to result */ - actives = list_delete_first(actives); - result = lappend(result, wc); + /* build ordered list of the original WindowClause nodes */ + for (int i = 0; i < nActive; i++) + result = lappend(result, actives[i].wc); - /* Now move any matching windows from actives to result */ - prev = NULL; - for (lc = list_head(actives); lc; lc = next) - { - WindowClause *wc2 = (WindowClause *) lfirst(lc); + pfree(actives); - next = lnext(lc); - /* framing options are NOT to be compared here! */ - if (equal(wc->partitionClause, wc2->partitionClause) && - equal(wc->orderClause, wc2->orderClause)) - { - actives = list_delete_cell(actives, lc, prev); - result = lappend(result, wc2); - } - else - prev = lc; - } + return result; +} + +/* + * common_prefix_cmp + * QSort comparison function for WindowClauseSortData + * + * Sort the windows by the required sorting clauses. First, compare the sort + * clauses themselves. Second, if one window's clauses are a prefix of another + * one's clauses, put the window with more sort clauses first. + */ +static int +common_prefix_cmp(const void *a, const void *b) +{ + const WindowClauseSortData *wcsa = a; + const WindowClauseSortData *wcsb = b; + ListCell *item_a; + ListCell *item_b; + + forboth(item_a, wcsa->uniqueOrder, item_b, wcsb->uniqueOrder) + { + /* + * GPDB_100_MERGE_FIXME: replace with lfirst_node() calls when commit + * 8f0530f58061b185dc385df42e62d78a18d4ae3e is merged. + */ + SortGroupClause *sca = (SortGroupClause *) lfirst(item_a); + SortGroupClause *scb = (SortGroupClause *) lfirst(item_b); + + if (sca->tleSortGroupRef > scb->tleSortGroupRef) + return -1; + else if (sca->tleSortGroupRef < scb->tleSortGroupRef) + return 1; + else if (sca->sortop > scb->sortop) + return -1; + else if (sca->sortop < scb->sortop) + return 1; + else if (sca->nulls_first && !scb->nulls_first) + return -1; + else if (!sca->nulls_first && scb->nulls_first) + return 1; + /* no need to compare eqop, since it is fully determined by sortop */ } - return result; + if (list_length(wcsa->uniqueOrder) > list_length(wcsb->uniqueOrder)) + return -1; + else if (list_length(wcsa->uniqueOrder) < list_length(wcsb->uniqueOrder)) + return 1; + + return 0; } /* diff --git a/src/test/regress/expected/olap_window_seq.out b/src/test/regress/expected/olap_window_seq.out index 7b06675eb03540101a17b6e6ffd86f0de39f6242..2b400f013a6b282b2d10f74c62df264538cd4122 100755 --- a/src/test/regress/expected/olap_window_seq.out +++ b/src/test/regress/expected/olap_window_seq.out @@ -7889,23 +7889,20 @@ select count(*) over (partition by 1 order by cn rows between 1 preceding and 1 -- MPP-13710 create table redundant_sort_check (i int, j int, k int) distributed by (i); explain select count(*) over (order by i), count(*) over (partition by i order by j) from redundant_sort_check; - QUERY PLAN --------------------------------------------------------------------------------------------------------- - WindowAgg (cost=14900.48..16458.48 rows=77900 width=8) - Partition By: i - Order By: j - -> Sort (cost=14900.48..15095.23 rows=77900 width=8) - Sort Key: i, j - -> WindowAgg (cost=7208.12..8571.37 rows=77900 width=8) - Order By: i - -> Gather Motion 3:1 (slice1; segments: 3) (cost=7208.12..7402.87 rows=77900 width=8) - Merge Key: i - -> Sort (cost=7208.12..7402.87 rows=25967 width=8) - Sort Key: i - -> Seq Scan on redundant_sort_check (cost=0.00..879.00 rows=25967 width=8) - Settings: optimizer=off - Optimizer status: legacy query optimizer -(14 rows) + QUERY PLAN +-------------------------------------------------------------------------------------------------- + WindowAgg (cost=7208.12..9934.62 rows=77900 width=8) + Order By: i + -> Gather Motion 3:1 (slice1; segments: 3) (cost=7208.12..8766.12 rows=77900 width=8) + Merge Key: i, j + -> WindowAgg (cost=7208.12..8766.12 rows=25967 width=8) + Partition By: i + Order By: j + -> Sort (cost=7208.12..7402.87 rows=25967 width=8) + Sort Key: i, j + -> Seq Scan on redundant_sort_check (cost=0.00..879.00 rows=25967 width=8) + Optimizer: legacy query optimizer +(11 rows) -- End of MPP-13710 -- MPP-13879 @@ -8151,39 +8148,34 @@ EXPLAIN SELECT count(*) over (PARTITION BY a ORDER BY b, c, d) as count1, count(*) over (PARTITION BY a ORDER BY c, b) as count2, count(*) over (PARTITION BY a ORDER BY c, b, d) as count3 FROM foo; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=4.81..5.06 rows=10 width=16) - -> WindowAgg (cost=4.81..5.06 rows=4 width=16) + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=4.06..4.68 rows=10 width=16) + -> WindowAgg (cost=4.06..4.68 rows=4 width=16) Partition By: a - Order By: c, b, d - -> Sort (cost=4.81..4.84 rows=4 width=16) - Sort Key: a, c, b, d - -> WindowAgg (cost=4.42..4.65 rows=4 width=16) + Order By: b + -> WindowAgg (cost=4.06..4.51 rows=4 width=16) + Partition By: a + Order By: b, c + -> WindowAgg (cost=4.06..4.31 rows=4 width=16) Partition By: a - Order By: c, b - -> Sort (cost=4.42..4.45 rows=4 width=16) - Sort Key: a, c, b - -> WindowAgg (cost=4.06..4.26 rows=4 width=16) + Order By: b, c, d + -> Sort (cost=4.06..4.08 rows=4 width=16) + Sort Key: a, b, c, d + -> WindowAgg (cost=3.27..3.89 rows=4 width=16) Partition By: a Order By: c - -> Sort (cost=4.06..4.08 rows=4 width=16) - Sort Key: a, c - -> WindowAgg (cost=3.27..3.89 rows=4 width=16) + -> WindowAgg (cost=3.27..3.72 rows=4 width=16) + Partition By: a + Order By: c, b + -> WindowAgg (cost=3.27..3.52 rows=4 width=16) Partition By: a - Order By: b - -> WindowAgg (cost=3.27..3.72 rows=4 width=16) - Partition By: a - Order By: b, c - -> WindowAgg (cost=3.27..3.52 rows=4 width=16) - Partition By: a - Order By: b, c, d - -> Sort (cost=3.27..3.29 rows=4 width=16) - Sort Key: a, b, c, d - -> Seq Scan on foo (cost=0.00..3.10 rows=4 width=16) - Settings: optimizer=off - Optimizer status: legacy query optimizer -(30 rows) + Order By: c, b, d + -> Sort (cost=3.27..3.29 rows=4 width=16) + Sort Key: a, c, b, d + -> Seq Scan on foo (cost=0.00..3.10 rows=4 width=16) + Optimizer: legacy query optimizer +(25 rows) drop table foo; -- test predicate push down in subqueries for quals containing windowref nodes diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out index 0b6b7000d850a9244ddfc2d47ca7290dd0ffb66e..aa4464eb629b843105dfd8d600ff1fbb2db1ab5b 100644 --- a/src/test/regress/expected/window.out +++ b/src/test/regress/expected/window.out @@ -511,9 +511,9 @@ SELECT sum(salary), FROM empsalary GROUP BY depname; sum | row_number | sum -------+------------+------- - 14600 | 3 | 14600 - 7400 | 2 | 22000 25100 | 1 | 47100 + 7400 | 2 | 22000 + 14600 | 3 | 14600 (3 rows) -- identical windows with different names @@ -1057,6 +1057,59 @@ SELECT ntile(0) OVER (ORDER BY ten), ten, four FROM tenk1; ERROR: argument of ntile must be greater than zero SELECT nth_value(four, 0) OVER (ORDER BY ten), ten, four FROM tenk1; ERROR: argument of nth_value must be greater than zero +-- Test Sort node collapsing +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT depname, + sum(salary) OVER (PARTITION BY depname order by empno) depsalary, + min(salary) OVER (PARTITION BY depname, empno order by enroll_date) depminsalary + FROM empsalary) emp +WHERE depname = 'sales'; + QUERY PLAN +------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice3; segments: 3) + -> Subquery Scan on emp + -> WindowAgg + Order By: empsalary.empno + -> Sort + Sort Key: empsalary.empno + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: empsalary.depname + -> WindowAgg + Partition By: empsalary.empno + Order By: empsalary.enroll_date + -> Sort + Sort Key: empsalary.empno, empsalary.enroll_date + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: empsalary.depname, empsalary.empno + -> Seq Scan on empsalary + Filter: ((depname)::text = 'sales'::text) + Optimizer: legacy query optimizer +(18 rows) + +-- Test Sort node reordering +EXPLAIN (COSTS OFF) +SELECT + lead(1) OVER (PARTITION BY depname ORDER BY salary, enroll_date), + lag(1) OVER (PARTITION BY depname ORDER BY salary,enroll_date,empno) +FROM empsalary; + QUERY PLAN +------------------------------------------------------------------------ + Gather Motion 3:1 (slice2; segments: 3) + -> WindowAgg + Partition By: depname + Order By: salary, enroll_date + -> WindowAgg + Partition By: depname + Order By: salary, enroll_date, empno + -> Sort + Sort Key: depname, salary, enroll_date, empno + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: depname + -> Seq Scan on empsalary + Optimizer: legacy query optimizer +(13 rows) + -- cleanup DROP TABLE empsalary; -- diff --git a/src/test/regress/expected/window_optimizer.out b/src/test/regress/expected/window_optimizer.out index 8122693bdd10a1f960554a96d4f6e2b92c8c585e..5709ccb98c2980564e0c87168ebe9d260ebc4fea 100644 --- a/src/test/regress/expected/window_optimizer.out +++ b/src/test/regress/expected/window_optimizer.out @@ -511,9 +511,9 @@ SELECT sum(salary), FROM empsalary GROUP BY depname; sum | row_number | sum -------+------------+------- - 14600 | 3 | 14600 - 7400 | 2 | 22000 25100 | 1 | 47100 + 7400 | 2 | 22000 + 14600 | 3 | 14600 (3 rows) -- identical windows with different names @@ -1059,6 +1059,55 @@ SELECT ntile(0) OVER (ORDER BY ten), ten, four FROM tenk1; ERROR: argument of ntile must be greater than zero SELECT nth_value(four, 0) OVER (ORDER BY ten), ten, four FROM tenk1; ERROR: argument of nth_value must be greater than zero +-- Test Sort node collapsing +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT depname, + sum(salary) OVER (PARTITION BY depname order by empno) depsalary, + min(salary) OVER (PARTITION BY depname, empno order by enroll_date) depminsalary + FROM empsalary) emp +WHERE depname = 'sales'; + QUERY PLAN +----------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Result + -> WindowAgg + Partition By: depname, empno + Order By: enroll_date + -> Sort + Sort Key: depname, empno, enroll_date + -> WindowAgg + Partition By: depname + Order By: empno + -> Sort + Sort Key: depname, empno + -> Table Scan on empsalary + Filter: ((depname)::text = 'sales'::text) +(15 rows) + +-- Test Sort node reordering +EXPLAIN (COSTS OFF) +SELECT + lead(1) OVER (PARTITION BY depname ORDER BY salary, enroll_date), + lag(1) OVER (PARTITION BY depname ORDER BY salary,enroll_date,empno) +FROM empsalary; + QUERY PLAN +------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Result + -> WindowAgg + Partition By: depname + Order By: salary, enroll_date, empno + -> Sort + Sort Key: depname, salary, enroll_date, empno + -> WindowAgg + Partition By: depname + Order By: salary, enroll_date + -> Sort + Sort Key: depname, salary, enroll_date + -> Table Scan on empsalary +(14 rows) + -- cleanup DROP TABLE empsalary; -- diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql index da07425ebf25c3bde6b5034eeb9a3df294739071..066c5c88af3a13e1603fd097b60ebaaaa601ded3 100644 --- a/src/test/regress/sql/window.sql +++ b/src/test/regress/sql/window.sql @@ -276,6 +276,22 @@ SELECT ntile(0) OVER (ORDER BY ten), ten, four FROM tenk1; SELECT nth_value(four, 0) OVER (ORDER BY ten), ten, four FROM tenk1; +-- Test Sort node collapsing +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT depname, + sum(salary) OVER (PARTITION BY depname order by empno) depsalary, + min(salary) OVER (PARTITION BY depname, empno order by enroll_date) depminsalary + FROM empsalary) emp +WHERE depname = 'sales'; + +-- Test Sort node reordering +EXPLAIN (COSTS OFF) +SELECT + lead(1) OVER (PARTITION BY depname ORDER BY salary, enroll_date), + lag(1) OVER (PARTITION BY depname ORDER BY salary,enroll_date,empno) +FROM empsalary; + -- cleanup DROP TABLE empsalary;