Only apply transformGroupedWindows() with ORCA. (#10306)

* Only apply transformGroupedWindows() with ORCA. The Postgres planner doesn't need it. Move the code to do it, so that it's only used before passing a tree to ORCA. This doesn't change anything with ORCA, but with the Postgres planner, it has some benefits: * Some cases before this patch do not give correct results and now they run correctly (e.g. case `regress/olap_window_seq`) * Fixes github issue #10143. * Make transformGroupedWindows walk the entire tree The transformGroupedWindows function now recursively transforms any Query node in the tree that has both window functions and groupby or aggregates. Also fixed a pre-existing bug where we put a subquery in the target list of such a Query node into the upper query, Q'. This meant that any outer references to the scope of Q' no longer had the correct varattno. The fix is to place the subquery into the target list of the lower query, Q'' instead, which has the same range table as the original query Q. Therefore, the varattnos to outer references to the scope of Q (now Q'') don't need to be updated. Note that varlevelsup to scopes above Q still need to be adjusted, since we inserted a new scope Q'. (See comments in code for explanations of Q, Q', Q''). Co-authored-by: N Heikki Linnakangas <hlinnakangas@pivotal.io> Co-authored-by: N Hans Zeller <hzeller@vmware.com> Co-authored-by: N Abhijit Subramanya <asubramanya@pivotal.io>

Only apply transformGroupedWindows() with ORCA. (#10306)
* Only apply transformGroupedWindows() with ORCA. The Postgres planner doesn't need it. Move the code to do it, so that it's only used before passing a tree to ORCA. This doesn't change anything with ORCA, but with the Postgres planner, it has some benefits: * Some cases before this patch do not give correct results and now they run correctly (e.g. case `regress/olap_window_seq`) * Fixes github issue #10143. * Make transformGroupedWindows walk the entire tree The transformGroupedWindows function now recursively transforms any Query node in the tree that has both window functions and groupby or aggregates. Also fixed a pre-existing bug where we put a subquery in the target list of such a Query node into the upper query, Q'. This meant that any outer references to the scope of Q' no longer had the correct varattno. The fix is to place the subquery into the target list of the lower query, Q'' instead, which has the same range table as the original query Q. Therefore, the varattnos to outer references to the scope of Q (now Q'') don't need to be updated. Note that varlevelsup to scopes above Q still need to be adjusted, since we inserted a new scope Q'. (See comments in code for explanations of Q, Q', Q''). Co-authored-by: N Heikki Linnakangas <hlinnakangas@pivotal.io> Co-authored-by: N Hans Zeller <hzeller@vmware.com> Co-authored-by: N Abhijit Subramanya <asubramanya@pivotal.io>
e52dd032 · Heikki Linnakangas · GitHub · 33c4582e · e52dd032 · e52dd032
9 changed file
--- a/src/backend/optimizer/plan/orca.c
+++ b/src/backend/optimizer/plan/orca.c
--- a/src/backend/optimizer/plan/transform.c
+++ b/src/backend/optimizer/plan/transform.c
@@ -39,27 +39,6 @@ static Query *make_sirvf_subquery(FuncExpr *fe);
 static bool safe_to_replace_sirvf_tle(Query *query);
 static bool safe_to_replace_sirvf_rte(Query *query);

-/**
- * Preprocess query structure for consumption by the optimizer
- */
-Query *
-preprocess_query_optimizer(PlannerInfo *root, Query *query, ParamListInfo boundParams)
-{
-#ifdef USE_ASSERT_CHECKING
-	Query *qcopy = (Query *) copyObject(query);
-#endif
-
-	/* fold all constant expressions */
-	Query *res = fold_constants(root, query, boundParams, GPOPT_MAX_FOLDED_CONSTANT_SIZE);
-
-#ifdef USE_ASSERT_CHECKING
-	Assert(equal(qcopy, query) && "Preprocessing should not modify original query object");
-#endif
-
-	return res;
-
-}
-
 /**
 * Normalize query before planning.
 */

--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
--- a/src/include/optimizer/transform.h
+++ b/src/include/optimizer/transform.h
@@ -21,6 +21,4 @@

 extern Query *normalize_query(Query *query);

-/* preprocess the query for the optimizer */
-extern Query *preprocess_query_optimizer(PlannerInfo *root, Query *query, ParamListInfo boundParams);
 #endif /* TRANSFORM_H */
--- a/src/test/regress/expected/bfv_olap.out
+++ b/src/test/regress/expected/bfv_olap.out
@@ -591,6 +591,101 @@ select cn, sum(qty) from sale group by rollup(cn,vn) having sum(qty)=1144;
    | 1144
 (1 row)

+--
+-- Test a query with window function over an aggregate, and a subquery.
+--
+-- Github Issue https://github.com/greenplum-db/gpdb/issues/10143
+create table t1_github_issue_10143(
+  base_ym varchar(6),
+  code varchar(5),
+  name varchar(60)
+);
+create table t2_github_issue_10143(
+  base_ym varchar(6),
+  dong varchar(8),
+  code varchar(6),
+  salary numeric(18)
+);
+insert into t1_github_issue_10143 values ('a', 'acode', 'aname');
+insert into t2_github_issue_10143 values ('a', 'adong', 'acode', 1000);
+insert into t2_github_issue_10143 values ('b', 'bdong', 'bcode', 1100);
+set optimizer_trace_fallback = on;
+explain select (select name from t1_github_issue_10143 where code = a.code limit 1) as dongnm
+,sum(sum(a.salary)) over()
+from t2_github_issue_10143 a
+group by a.code;
+                                                QUERY PLAN                                                
+----------------------------------------------------------------------------------------------------------
+ WindowAgg  (cost=1.02..2002.10 rows=1 width=176)
+   ->  Finalize GroupAggregate  (cost=1.02..1001.06 rows=1 width=38)
+         Group Key: a.code
+         ->  Gather Motion 3:1  (slice2; segments: 3)  (cost=1.02..1001.04 rows=1 width=38)
+               Merge Key: a.code
+               ->  Partial GroupAggregate  (cost=1.02..1.04 rows=1 width=38)
+                     Group Key: a.code
+                     ->  Sort  (cost=1.02..1.02 rows=1 width=11)
+                           Sort Key: a.code
+                           ->  Seq Scan on t2_github_issue_10143 a  (cost=0.00..1.01 rows=1 width=11)
+   SubPlan 1
+     ->  Limit  (cost=0.00..1001.02 rows=1 width=6)
+           ->  Result  (cost=0.00..1001.02 rows=1 width=6)
+                 Filter: ((t1_github_issue_10143.code)::text = (a.code)::text)
+                 ->  Materialize  (cost=0.00..1001.01 rows=1 width=12)
+                       ->  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..1001.01 rows=1 width=12)
+                             ->  Seq Scan on t1_github_issue_10143  (cost=0.00..1.01 rows=1 width=12)
+ Optimizer: Postgres query optimizer
+(18 rows)
+
+select (select name from t1_github_issue_10143 where code = a.code limit 1) as dongnm
+,sum(sum(a.salary)) over()
+from t2_github_issue_10143 a
+group by a.code;
+ dongnm | sum  
+--------+------
+ aname  | 2100
+        | 2100
+(2 rows)
+
+select * from (select sum(a.salary) over(), count(*)
+               from t2_github_issue_10143 a
+               group by a.salary) T;
+ sum  | count 
+------+-------
+ 2100 |     1
+ 2100 |     1
+(2 rows)
+
+-- this query currently falls back, needs to be fixed
+select (select rn from (select row_number() over () as rn, name
+                        from t1_github_issue_10143
+                        where code = a.code
+                        group by name) T
+       ) as dongnm
+,sum(sum(a.salary)) over()
+from t2_github_issue_10143 a
+group by a.code;
+ dongnm | sum  
+--------+------
+      1 | 2100
+        | 2100
+(2 rows)
+
+with cte as (select row_number() over (order by code) as rn1, code
+             from t2_github_issue_10143
+             group by code)
+select row_number() over (order by name) as rn2, name
+from t1_github_issue_10143
+group by name
+union all
+select * from cte;
+ rn2 | name  
+-----+-------
+   1 | aname
+   1 | acode
+   2 | bcode
+(3 rows)
+
+reset optimizer_trace_fallback;
 -- CLEANUP
 -- start_ignore
 drop schema bfv_olap cascade;

--- a/src/test/regress/expected/bfv_olap_optimizer.out
+++ b/src/test/regress/expected/bfv_olap_optimizer.out
+-- Tests for old bugs related to OLAP queries.
+-- First create a schema to contain the test tables, and few common test
+-- tables that are shared by several test queries.
+create schema bfv_olap;
+set search_path=bfv_olap;
+create table customer
+(
+  cn int not null,
+  cname text not null,
+  cloc text,
+  primary key (cn)
+) distributed by (cn);
+insert into customer values
+  ( 1, 'Macbeth', 'Inverness'),
+  ( 2, 'Duncan', 'Forres'),
+  ( 3, 'Lady Macbeth', 'Inverness'),
+  ( 4, 'Witches, Inc', 'Lonely Heath');
+create table vendor
+(
+  vn int not null,
+  vname text not null,
+  vloc text,
+  primary key (vn)
+) distributed by (vn);
+insert into vendor values
+  ( 10, 'Witches, Inc', 'Lonely Heath'),
+  ( 20, 'Lady Macbeth', 'Inverness'),
+  ( 30, 'Duncan', 'Forres'),
+  ( 40, 'Macbeth', 'Inverness'),
+  ( 50, 'Macduff', 'Fife');
+create table sale
+(
+  cn int not null,
+  vn int not null,
+  pn int not null,
+  dt date not null,
+  qty int not null,
+  prc float not null,
+  primary key (cn, vn, pn)
+) distributed by (cn,vn,pn);
+insert into sale values
+  ( 2, 40, 100, '1401-1-1', 1100, 2400),
+  ( 1, 10, 200, '1401-3-1', 1, 0),
+  ( 3, 40, 200, '1401-4-1', 1, 0),
+  ( 1, 20, 100, '1401-5-1', 1, 0),
+  ( 1, 30, 300, '1401-5-2', 1, 0),
+  ( 1, 50, 400, '1401-6-1', 1, 0),
+  ( 2, 50, 400, '1401-6-1', 1, 0),
+  ( 1, 30, 500, '1401-6-1', 12, 5),
+  ( 3, 30, 500, '1401-6-1', 12, 5),
+  ( 3, 30, 600, '1401-6-1', 12, 5),
+  ( 4, 40, 700, '1401-6-1', 1, 1),
+  ( 4, 40, 800, '1401-6-1', 1, 1);
+--
+-- Test case errors out when we define aggregates without combine functions
+-- and use it as an aggregate derived window function.
+--
+-- SETUP
+-- start_ignore
+drop table if exists toy;
+NOTICE:  table "toy" does not exist, skipping
+drop aggregate if exists mysum1(int4);
+NOTICE:  aggregate mysum1(int4) does not exist, skipping
+drop aggregate if exists mysum2(int4);
+NOTICE:  aggregate mysum2(int4) does not exist, skipping
+-- end_ignore
+create table toy(id,val) as select i,i from generate_series(1,5) i;
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+create aggregate mysum1(int4) (sfunc = int4_sum, combinefunc=int8pl, stype=bigint);
+create aggregate mysum2(int4) (sfunc = int4_sum, stype=bigint);
+-- TEST
+select id, val, sum(val) over (w), mysum1(val) over (w), mysum2(val) over (w) from toy window w as (order by id rows 2 preceding);
+ id | val | sum | mysum1 | mysum2 
+----+-----+-----+--------+--------
+  1 |   1 |   1 |      1 |      1
+  2 |   2 |   3 |      3 |      3
+  3 |   3 |   6 |      6 |      6
+  4 |   4 |   9 |      9 |      9
+  5 |   5 |  12 |     12 |     12
+(5 rows)
+
+-- CLEANUP
+-- start_ignore
+drop aggregate if exists mysum1(int4);
+drop aggregate if exists mysum2(int4);
+drop table if exists toy;
+-- end_ignore
+--
+-- Test case errors out when we define aggregates without preliminary functions and use it as an aggregate derived window function.
+--
+-- SETUP
+-- start_ignore
+drop type if exists ema_type cascade;
+NOTICE:  type "ema_type" does not exist, skipping
+drop function if exists ema_adv(t ema_type, v float, x float) cascade;
+ERROR:  type "ema_type" does not exist
+drop function if exists ema_fin(t ema_type) cascade;
+ERROR:  type "ema_type" does not exist
+drop aggregate if exists ema(float, float);
+NOTICE:  aggregate ema(pg_catalog.float8,pg_catalog.float8) does not exist, skipping
+drop table if exists ema_test cascade;
+NOTICE:  table "ema_test" does not exist, skipping
+-- end_ignore
+create type ema_type as (x float, e float);
+create function ema_adv(t ema_type, v float, x float)
+    returns ema_type
+    as $$
+        begin
+            if t.e is null then
+                t.e = v;
+                t.x = x;
+            else
+                if t.x != x then
+                    raise exception 'ema smoothing x may not vary';
+                end if;
+                t.e = t.e + (v - t.e) * t.x;
+            end if;
+            return t;
+        end;
+    $$ language plpgsql;
+create function ema_fin(t ema_type)
+    returns float
+    as $$
+       begin
+           return t.e;
+       end;
+    $$ language plpgsql;
+create aggregate ema(float, float) (
+    sfunc = ema_adv,
+    stype = ema_type,
+    finalfunc = ema_fin,
+    initcond = '(,)');
+create table ema_test (k int, v float ) distributed by (k);
+insert into ema_test select i, 4*(i::float/20) + 10.0*(1+cos(radians(i*5))) from generate_series(0,19) i(i);
+-- TEST
+select k, v, ema(v, 0.9) over (order by k) from ema_test order by k;
+ k  |        v         |       ema        
+----+------------------+------------------
+  0 |               20 |               20
+  1 | 20.1619469809175 | 20.1457522828257
+  2 | 20.2480775301221 | 20.2378450053924
+  3 | 20.2592582628907 | 20.2571169371409
+  4 | 20.1969262078591 | 20.2029452807873
+  5 | 20.0630778703665 | 20.0770646114086
+  6 | 19.8602540378444 | 19.8819350952008
+  7 | 19.5915204428899 |  19.620561908121
+  8 | 19.2604444311898 | 19.2964561788829
+  9 | 18.8710678118655 | 18.9136066485672
+ 10 | 18.4278760968654 | 18.4764491520356
+ 11 | 17.9357643635105 |  17.989832842363
+ 12 |             17.4 | 17.4589832842363
+ 13 |  16.826182617407 | 16.8894626840899
+ 14 | 16.2202014332567 |   16.28712755834
+ 15 | 15.5881904510252 | 15.6580841617567
+ 16 | 14.9364817766693 |  15.008642015178
+ 17 | 14.2715574274766 | 14.3452658862467
+ 18 |             13.6 | 13.6745265886247
+ 19 | 12.9284425725234 | 13.0030509741335
+(20 rows)
+
+select k, v, ema(v, 0.9) over (order by k rows between unbounded preceding and current row) from ema_test order by k;
+ k  |        v         |       ema        
+----+------------------+------------------
+  0 |               20 |               20
+  1 | 20.1619469809175 | 20.1457522828257
+  2 | 20.2480775301221 | 20.2378450053924
+  3 | 20.2592582628907 | 20.2571169371409
+  4 | 20.1969262078591 | 20.2029452807873
+  5 | 20.0630778703665 | 20.0770646114086
+  6 | 19.8602540378444 | 19.8819350952008
+  7 | 19.5915204428899 |  19.620561908121
+  8 | 19.2604444311898 | 19.2964561788829
+  9 | 18.8710678118655 | 18.9136066485672
+ 10 | 18.4278760968654 | 18.4764491520356
+ 11 | 17.9357643635105 |  17.989832842363
+ 12 |             17.4 | 17.4589832842363
+ 13 |  16.826182617407 | 16.8894626840899
+ 14 | 16.2202014332567 |   16.28712755834
+ 15 | 15.5881904510252 | 15.6580841617567
+ 16 | 14.9364817766693 |  15.008642015178
+ 17 | 14.2715574274766 | 14.3452658862467
+ 18 |             13.6 | 13.6745265886247
+ 19 | 12.9284425725234 | 13.0030509741335
+(20 rows)
+
+-- CLEANUP
+-- start_ignore
+drop table if exists ema_test cascade;
+drop aggregate if exists ema(float, float);
+drop function if exists ema_fin(t ema_type) cascade;
+drop function if exists ema_adv(t ema_type, v float, x float) cascade;
+drop type if exists ema_type cascade;
+-- end_ignore
+--
+-- Test with/without group by
+--
+-- SETUP
+-- start_ignore
+DROP TABLE IF EXISTS r;
+NOTICE:  table "r" does not exist, skipping
+-- end_ignore
+CREATE TABLE r
+(
+    a INT NOT NULL, 
+    b INT, 
+    c CHARACTER VARYING(200),  
+    d NUMERIC(10,0), 
+    e DATE
+) DISTRIBUTED BY (a,b);
+ALTER TABLE r ADD CONSTRAINT PKEY PRIMARY KEY (b);
+NOTICE:  updating distribution policy to match new PRIMARY KEY
+--TEST
+SELECT MAX(a) AS m FROM r GROUP BY b ORDER BY m;
+ m 
+---
+(0 rows)
+
+SELECT MAX(a) AS m FROM r GROUP BY a ORDER BY m;
+ m 
+---
+(0 rows)
+
+SELECT MAX(a) AS m FROM r GROUP BY b;
+ m 
+---
+(0 rows)
+
+-- ORDER BY clause includes some grouping column or not
+SELECT MAX(a) AS m FROM R GROUP BY b ORDER BY m,b;
+ m 
+---
+(0 rows)
+
+SELECT MAX(a) AS m FROM R GROUP BY b,e ORDER BY m,b,e;
+ m 
+---
+(0 rows)
+
+SELECT MAX(a) AS m FROM R GROUP BY b,e ORDER BY m;
+ m 
+---
+(0 rows)
+
+-- ORDER BY 1 or more columns
+SELECT MAX(a),d,e AS m FROM r GROUP BY b,d,e ORDER BY m,e,d;
+ max | d | m 
+-----+---+---
+(0 rows)
+
+SELECT MIN(a),d,e AS m FROM r GROUP BY b,e,d ORDER BY e,d;
+ min | d | m 
+-----+---+---
+(0 rows)
+
+SELECT MAX(a) AS m FROM r GROUP BY b,c,d,e ORDER BY e,d;
+ m 
+---
+(0 rows)
+
+SELECT MAX(a) AS m FROM r GROUP BY b,e ORDER BY e;
+ m 
+---
+(0 rows)
+
+SELECT MAX(e) AS m FROM r GROUP BY b ORDER BY m;
+ m 
+---
+(0 rows)
+
+-- CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS r;
+-- end_ignore
+--
+-- ORDER BY clause includes some grouping column or not
+--
+-- SETUP
+-- start_ignore
+DROP TABLE IF EXISTS dm_calendar;
+NOTICE:  table "dm_calendar" does not exist, skipping
+-- end_ignore
+CREATE TABLE dm_calendar (
+    calendar_id bigint NOT NULL,
+    date_name character varying(200),
+    date_name_cn character varying(200),
+    calendar_date date,
+    current_day numeric(10,0),
+    month_id numeric(10,0),
+    month_name character varying(200),
+    month_name_cn character varying(200),
+    month_name_short character varying(200),
+    month_name_short_cn character varying(200),
+    days_in_month numeric(10,0),
+    first_of_month numeric(10,0),
+    last_month_id numeric(10,0),
+    month_end numeric(10,0),
+    quarter_id numeric(10,0),
+    quarter_name character varying(200),
+    quarter_name_cn character varying(200),
+    quarter_name_short character varying(200),
+    quarter_name_short_cn character varying(200),
+    year_id numeric(10,0),
+    year_name character varying(200),
+    year_name_cn character varying(200),
+    description character varying(500),
+    create_date timestamp without time zone,
+    month_week_num character varying(100),
+    month_week_begin character varying(100),
+    month_week_end character varying(100),
+    half_year character varying(100),
+    weekend_flag character varying(100),
+    holidays_flag character varying(100),
+    workday_flag character varying(100),
+    month_number numeric(10,0)
+) DISTRIBUTED BY (calendar_id);
+ALTER TABLE ONLY dm_calendar ADD CONSTRAINT dm_calendar_pkey PRIMARY KEY (calendar_id);
+--TEST
+SELECT "year_id" as id , min("year_name") as a  from (select "year_id" as "year_id" , min("year_name") as "year_name" from  "dm_calendar" group by "year_id") "dm_calendar3" group by "year_id" order by a ASC ;
+ id | a 
+----+---
+(0 rows)
+
+-- CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS dm_calendar;
+-- end_ignore
+--
+-- Test with/without group by with primary key as dist key
+--
+-- SETUP
+-- start_ignore
+drop table if exists t;
+NOTICE:  table "t" does not exist, skipping
+-- end_ignore
+create table t
+(
+    a int NOT NULL,
+    b int,
+    c character varying(200),
+    d numeric(10,0),
+    e date
+) distributed by (b);
+alter table t ADD CONSTRAINT pkey primary key (b);
+-- TEST
+SELECT MAX(a) AS m FROM t GROUP BY b ORDER BY m;
+ m 
+---
+(0 rows)
+
+-- CLEANUP
+-- start_ignore
+drop table if exists t;
+-- end_ignore
+--
+-- Passing through distribution matching type in default implementation
+--
+  
+-- TEST
+select cname,
+rank() over (partition by sale.cn order by vn)
+from sale, customer
+where sale.cn = customer.cn
+order by 1, 2;
+    cname     | rank 
+--------------+------
+ Duncan       |    1
+ Duncan       |    2
+ Lady Macbeth |    1
+ Lady Macbeth |    1
+ Lady Macbeth |    3
+ Macbeth      |    1
+ Macbeth      |    2
+ Macbeth      |    3
+ Macbeth      |    3
+ Macbeth      |    5
+ Witches, Inc |    1
+ Witches, Inc |    1
+(12 rows)
+
+--
+-- Optimizer query crashing for logical window with no window functions
+--
+-- SETUP
+create table mpp23240(a int, b int, c int, d int, e int, f int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+-- TEST
+select a, b,
+       case 1
+        when 10 then
+          sum(c) over(partition by a)
+        when 20 then
+          sum(d) over(partition by a)
+        else
+          5
+       end as sum1
+from (select * from mpp23240 where f > 10) x;
+ a | b | sum1 
+---+---+------
+(0 rows)
+
+-- CLEANUP
+-- start_ignore
+drop table mpp23240;
+-- end_ignore
+--
+-- Test for the bug reported at https://github.com/greenplum-db/gpdb/issues/2236
+--
+create table test1 (x int, y int, z double precision);
+insert into test1 select a, b, a*10 + b from generate_series(1, 5) a, generate_series(1, 5) b;
+select sum(z) over (partition by x) as sumx, sum(z) over (partition by y) as sumy from test1;
+ sumx | sumy 
+------+------
+   65 |  155
+   65 |  160
+   65 |  165
+   65 |  170
+   65 |  175
+  115 |  155
+  115 |  160
+  115 |  165
+  115 |  170
+  115 |  175
+  165 |  155
+  165 |  160
+  165 |  165
+  165 |  170
+  165 |  175
+  215 |  155
+  215 |  160
+  215 |  165
+  215 |  170
+  215 |  175
+  265 |  155
+  265 |  160
+  265 |  165
+  265 |  170
+  265 |  175
+(25 rows)
+
+drop table test1;
+--
+-- This failed at one point because of an over-zealous syntax check, with
+-- "window functions not allowed in WHERE clause" error.
+--
+select sum(g) from generate_series(1, 5) g
+where g in (
+  select rank() over (order by x) from generate_series(1,5) x
+);
+ sum 
+-----
+  15
+(1 row)
+
+--
+-- This caused a crash in ROLLUP planning at one point.
+--
+SELECT sale.vn
+FROM sale,vendor
+WHERE sale.vn=vendor.vn
+GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn));
+ vn 
+----
+ 40
+   
+   
+ 20
+   
+   
+ 30
+   
+   
+   
+ 10
+ 40
+   
+   
+ 50
+   
+   
+ 30
+   
+   
+ 40
+   
+   
+   
+   
+   
+   
+ 30
+   
+ 50
+   
+ 30
+ 40
+   
+(34 rows)
+
+SELECT DISTINCT sale.vn
+FROM sale,vendor
+WHERE sale.vn=vendor.vn
+GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn));
+ vn 
+----
+ 10
+ 20
+ 30
+ 40
+ 50
+   
+(6 rows)
+
+--
+-- Another ROLLUP query, that hit a bug in setting up the planner-generated
+-- subquery's targetlist. (https://github.com/greenplum-db/gpdb/issues/6754)
+--
+SELECT sale.vn, rank() over (partition by sale.vn)
+FROM vendor, sale
+WHERE sale.vn=vendor.vn
+GROUP BY ROLLUP( sale.vn);
+ vn | rank 
+----+------
+    |    1
+ 10 |    1
+ 20 |    1
+ 30 |    1
+ 40 |    1
+ 50 |    1
+(6 rows)
+
+--
+-- Test window function with constant PARTITION BY
+--
+CREATE TABLE testtab (a int4);
+insert into testtab values (1), (2);
+SELECT count(*) OVER (PARTITION BY 1) AS count FROM testtab;
+ count 
+-------
+     2
+     2
+(2 rows)
+
+-- Another variant, where the PARTITION BY is not a literal, but the
+-- planner can deduce that it's a constant through equivalence classes.
+SELECT 1
+FROM (
+  SELECT a, count(*) OVER (PARTITION BY a) FROM (VALUES (1,1)) AS foo(a)
+) AS sup(c, d)
+WHERE c = 87 ;
+ ?column? 
+----------
+(0 rows)
+
+--
+-- This used to crash, and/or produce incorrect results. The culprit was that a Hash Agg
+-- was used, but the planner put a Gather Merge at the top, without a Sort, even though
+-- a Hash Agg doesn't preserve the sort order.
+--
+SELECT sale.qty
+FROM sale
+GROUP BY ROLLUP((qty)) order by 1;
+ qty  
+------
+    1
+   12
+ 1100
+     
+(4 rows)
+
+--
+-- Test two-stage aggregate with grouping sets and a HAVING clause
+--
+-- persuade planner to choose a two-stage plan.
+set gp_motion_cost_per_row TO 1000;
+select cn, sum(qty) from sale group by rollup(cn,vn) having sum(qty)=1;
+ cn | sum 
+----+-----
+  1 |   1
+  1 |   1
+  1 |   1
+  2 |   1
+  3 |   1
+(5 rows)
+
+-- same, but the HAVING clause matches a rolled up row.
+select cn, sum(qty) from sale group by rollup(cn,vn) having sum(qty)=1144;
+ cn | sum  
+----+------
+    | 1144
+(1 row)
+
+--
+-- Test a query with window function over an aggregate, and a subquery.
+--
+-- Github Issue https://github.com/greenplum-db/gpdb/issues/10143
+create table t1_github_issue_10143(
+  base_ym varchar(6),
+  code varchar(5),
+  name varchar(60)
+);
+create table t2_github_issue_10143(
+  base_ym varchar(6),
+  dong varchar(8),
+  code varchar(6),
+  salary numeric(18)
+);
+insert into t1_github_issue_10143 values ('a', 'acode', 'aname');
+insert into t2_github_issue_10143 values ('a', 'adong', 'acode', 1000);
+insert into t2_github_issue_10143 values ('b', 'bdong', 'bcode', 1100);
+set optimizer_trace_fallback = on;
+explain select (select name from t1_github_issue_10143 where code = a.code limit 1) as dongnm
+,sum(sum(a.salary)) over()
+from t2_github_issue_10143 a
+group by a.code;
+                                                QUERY PLAN                                                
+------------------------------------------------------------------------------------------------------------------------
+ WindowAgg  (cost=0.00..1324055.31 rows=1 width=16)
+   ->  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..1324055.31 rows=1 width=16)
+         ->  GroupAggregate  (cost=0.00..431.00 rows=1 width=14)
+               Group Key: t2_github_issue_10143.code
+               ->  Sort  (cost=0.00..431.00 rows=1 width=11)
+                     Sort Key: t2_github_issue_10143.code
+                     ->  Redistribute Motion 3:3  (slice2; segments: 3)  (cost=0.00..431.00 rows=1 width=11)
+                           Hash Key: t2_github_issue_10143.code
+                           ->  Seq Scan on t2_github_issue_10143  (cost=0.00..431.00 rows=1 width=11)
+   SubPlan 1
+                 ->  Limit  (cost=0.00..431.00 rows=1 width=6)
+                       ->  Result  (cost=0.00..431.00 rows=1 width=6)
+                             Filter: ((t1_github_issue_10143.code)::text = (t2_github_issue_10143.code)::text)
+                             ->  Materialize  (cost=0.00..431.00 rows=1 width=12)
+                                   ->  Broadcast Motion 3:3  (slice3; segments: 3)  (cost=0.00..431.00 rows=1 width=12)
+                                         ->  Seq Scan on t1_github_issue_10143  (cost=0.00..431.00 rows=1 width=12)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(17 rows)
+
+select (select name from t1_github_issue_10143 where code = a.code limit 1) as dongnm
+,sum(sum(a.salary)) over()
+from t2_github_issue_10143 a
+group by a.code;
+ dongnm | sum  
+--------+------
+ aname  | 2100
+        | 2100
+(2 rows)
+
+select * from (select sum(a.salary) over(), count(*)
+               from t2_github_issue_10143 a
+               group by a.salary) T;
+ sum  | count 
+------+-------
+ 2100 |     1
+ 2100 |     1
+(2 rows)
+
+-- this query currently falls back, needs to be fixed
+select (select rn from (select row_number() over () as rn, name
+                        from t1_github_issue_10143
+                        where code = a.code
+                        group by name) T
+       ) as dongnm
+,sum(sum(a.salary)) over()
+from t2_github_issue_10143 a
+group by a.code;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Query-to-DXL Translation: No variable entry found due to incorrect normalization of query
+ dongnm | sum  
+--------+------
+      1 | 2100
+        | 2100
+(2 rows)
+
+with cte as (select row_number() over (order by code) as rn1, code
+             from t2_github_issue_10143
+             group by code)
+select row_number() over (order by name) as rn2, name
+from t1_github_issue_10143
+group by name
+union all
+select * from cte;
+ rn2 | name  
+-----+-------
+   1 | aname
+   1 | acode
+   2 | bcode
+(3 rows)
+
+reset optimizer_trace_fallback;
+-- CLEANUP
+-- start_ignore
+drop schema bfv_olap cascade;
+-- end_ignore
--- a/src/test/regress/expected/olap_window_seq.out
+++ b/src/test/regress/expected/olap_window_seq.out
@@ -7853,8 +7853,11 @@ from
 group by 
    2*b.g,
    lower(f);
-ERROR:  unresolved grouping key in window query
-HINT:  You might need to use explicit aliases and/or to refer to grouping keys in the same way throughout the query.
+ ?column? | lower | sum 
+----------+-------+-----
+        2 | b     |   2
+(1 row)
+
 -- End MPP-12082
 -- MPP-12913
 select count(*) over (partition by 1 order by cn rows between 1 preceding and 1 preceding) from sale;

--- a/src/test/regress/expected/window.out
+++ b/src/test/regress/expected/window.out
@@ -626,20 +626,19 @@ explain (costs off)
 select first_value(max(x)) over (), y
  from (select unique1 as x, ten+four as y from tenk1) ss
  group by y;
-                               QUERY PLAN                               
------------------------------------------------------------------------
+                            QUERY PLAN                            
+------------------------------------------------------------------
 WindowAgg
   ->  Gather Motion 3:1  (slice1; segments: 3)
-         ->  Subquery Scan on "Window"
-               ->  Finalize HashAggregate
-                     Group Key: ((tenk1.ten + tenk1.four))
-                     ->  Redistribute Motion 3:3  (slice2; segments: 3)
-                           Hash Key: ((tenk1.ten + tenk1.four))
-                           ->  Partial HashAggregate
-                                 Group Key: (tenk1.ten + tenk1.four)
-                                 ->  Seq Scan on tenk1
+         ->  Finalize HashAggregate
+               Group Key: ((tenk1.ten + tenk1.four))
+               ->  Redistribute Motion 3:3  (slice2; segments: 3)
+                     Hash Key: ((tenk1.ten + tenk1.four))
+                     ->  Partial HashAggregate
+                           Group Key: (tenk1.ten + tenk1.four)
+                           ->  Seq Scan on tenk1
 Optimizer: Postgres query optimizer
-(11 rows)
+(10 rows)

 -- test non-default frame specifications
 SELECT four, ten,

--- a/src/test/regress/sql/bfv_olap.sql
+++ b/src/test/regress/sql/bfv_olap.sql
@@ -392,6 +392,64 @@ select cn, sum(qty) from sale group by rollup(cn,vn) having sum(qty)=1;
 select cn, sum(qty) from sale group by rollup(cn,vn) having sum(qty)=1144;


+--
+-- Test a query with window function over an aggregate, and a subquery.
+--
+-- Github Issue https://github.com/greenplum-db/gpdb/issues/10143
+create table t1_github_issue_10143(
+  base_ym varchar(6),
+  code varchar(5),
+  name varchar(60)
+);
+
+create table t2_github_issue_10143(
+  base_ym varchar(6),
+  dong varchar(8),
+  code varchar(6),
+  salary numeric(18)
+);
+
+insert into t1_github_issue_10143 values ('a', 'acode', 'aname');
+insert into t2_github_issue_10143 values ('a', 'adong', 'acode', 1000);
+insert into t2_github_issue_10143 values ('b', 'bdong', 'bcode', 1100);
+
+set optimizer_trace_fallback = on;
+
+explain select (select name from t1_github_issue_10143 where code = a.code limit 1) as dongnm
+,sum(sum(a.salary)) over()
+from t2_github_issue_10143 a
+group by a.code;
+
+select (select name from t1_github_issue_10143 where code = a.code limit 1) as dongnm
+,sum(sum(a.salary)) over()
+from t2_github_issue_10143 a
+group by a.code;
+
+select * from (select sum(a.salary) over(), count(*)
+               from t2_github_issue_10143 a
+               group by a.salary) T;
+
+-- this query currently falls back, needs to be fixed
+select (select rn from (select row_number() over () as rn, name
+                        from t1_github_issue_10143
+                        where code = a.code
+                        group by name) T
+       ) as dongnm
+,sum(sum(a.salary)) over()
+from t2_github_issue_10143 a
+group by a.code;
+
+with cte as (select row_number() over (order by code) as rn1, code
+             from t2_github_issue_10143
+             group by code)
+select row_number() over (order by name) as rn2, name
+from t1_github_issue_10143
+group by name
+union all
+select * from cte;
+
+reset optimizer_trace_fallback;
+
 -- CLEANUP
 -- start_ignore
 drop schema bfv_olap cascade;