BFV test migration for aggregate, partition and olap tests

8443c62e · Omer Arap · Xin Zhang · 3b2f9a0a · 8443c62e · 8443c62e
10 changed file
--- a/src/test/regress/expected/bfv_aggregate.out
+++ b/src/test/regress/expected/bfv_aggregate.out
+---
+--- Window function with outer references in PARTITION BY/ORDER BY clause
+---
+-- SETUP
+-- start_ignore
+DROP TABLE IF EXISTS x_outer;
+NOTICE:  table "x_outer" does not exist, skipping
+DROP TABLE IF EXISTS y_inner;
+NOTICE:  table "y_inner" does not exist, skipping
+-- end_ignore
+create table x_outer (a int, b int, c int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+create table y_inner (d int, e int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'd' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into x_outer select i%3, i, i from generate_series(1,10) i;
+insert into y_inner select i%3, i from generate_series(1,10) i;
+analyze x_outer;
+analyze y_inner;
+-- TEST
+select * from x_outer where a in (select row_number() over(partition by a) from y_inner) order by 1, 2;
+ a | b  | c  
+---+----+----
+ 1 |  1 |  1
+ 1 |  4 |  4
+ 1 |  7 |  7
+ 1 | 10 | 10
+ 2 |  2 |  2
+ 2 |  5 |  5
+ 2 |  8 |  8
+(7 rows)
+select * from x_outer where a in (select rank() over(order by a) from y_inner) order by 1, 2;
+ a | b  | c  
+---+----+----
+ 1 |  1 |  1
+ 1 |  4 |  4
+ 1 |  7 |  7
+ 1 | 10 | 10
+(4 rows)
+select * from x_outer where a not in (select rank() over(order by a) from y_inner) order by 1, 2;
+ a | b | c 
+---+---+---
+ 0 | 3 | 3
+ 0 | 6 | 6
+ 0 | 9 | 9
+ 2 | 2 | 2
+ 2 | 5 | 5
+ 2 | 8 | 8
+(6 rows)
+select * from x_outer where exists (select rank() over(order by a) from y_inner where d = a) order by 1, 2;
+ a | b  | c  
+---+----+----
+ 0 |  3 |  3
+ 0 |  6 |  6
+ 0 |  9 |  9
+ 1 |  1 |  1
+ 1 |  4 |  4
+ 1 |  7 |  7
+ 1 | 10 | 10
+ 2 |  2 |  2
+ 2 |  5 |  5
+ 2 |  8 |  8
+(10 rows)
+select * from x_outer where not exists (select rank() over(order by a) from y_inner where d = a) order by 1, 2;
+ a | b | c 
+---+---+---
+(0 rows)
+select * from x_outer where a in (select last_value(d) over(partition by b order by e rows between e preceding and e+1 following) from y_inner) order by 1, 2;
+ a | b  | c  
+---+----+----
+ 0 |  3 |  3
+ 0 |  6 |  6
+ 0 |  9 |  9
+ 1 |  1 |  1
+ 1 |  4 |  4
+ 1 |  7 |  7
+ 1 | 10 | 10
+ 2 |  2 |  2
+ 2 |  5 |  5
+ 2 |  8 |  8
+(10 rows)
+-- CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS x_outer;
+DROP TABLE IF EXISTS y_inner;
+-- end_ignore
+---
+--- Testing aggregation in a query
+---
+-- SETUP
+create table d (col1 timestamp, col2 int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'col1' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into d select to_date('2014-01-01', 'YYYY-DD-MM'), generate_series(1,100);
+-- TEST
+select 1, to_char(col1, 'YYYY'), median(col2) from d group by 1, 2;
+ ?column? | to_char | median 
+----------+---------+--------
+        1 | 2014    |   50.5
+(1 row)
+-- CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS d;
+-- end_ignore
+---
+--- Testing if aggregate derived window function produces incorrect results
+---
+-- SETUP
+-- start_ignore
+drop table if exists toy;
+NOTICE:  table "toy" does not exist, skipping
+drop aggregate mysum1(int4);
+ERROR:  aggregate mysum1(integer) does not exist
+drop aggregate mysum2(int4);
+ERROR:  aggregate mysum2(integer) does not exist
+-- end_ignore
+create table toy(id,val) as select i,i from generate_series(1,5) i;
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+create aggregate mysum1(int4) (sfunc = int4_sum, prefunc=int8pl, stype=bigint);
+create aggregate mysum2(int4) (sfunc = int4_sum, stype=bigint);
+-- TEST
+select
+   id, val,
+   sum(val) over (w),
+   mysum1(val) over (w),
+   mysum2(val) over (w)
+from toy
+window w as (order by id rows 2 preceding);
+ERROR:  aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
+-- CLEANUP
+-- start_ignore
+drop table if exists toy;
+drop aggregate mysum1(int4);
+drop aggregate mysum2(int4);
+-- end_ignore
+---
+--- Error executing for aggregate with anyarry as return type
+---
+-- SETUP
+CREATE OR REPLACE FUNCTION tfp(anyarray,anyelement) RETURNS anyarray AS
+'select $1 || $2' LANGUAGE SQL;
+CREATE OR REPLACE FUNCTION ffp(anyarray) RETURNS anyarray AS
+'select $1' LANGUAGE SQL;
+CREATE AGGREGATE myaggp20a(BASETYPE = anyelement, SFUNC = tfp,
+  STYPE = anyarray, FINALFUNC = ffp, INITCOND = '{}');
+-- Adding a sql function to sory the array
+CREATE OR REPLACE FUNCTION array_sort (ANYARRAY)
+RETURNS ANYARRAY LANGUAGE SQL
+AS $$
+SELECT ARRAY(SELECT unnest($1) ORDER BY 1)
+$$;
+create temp table t(f1 int, f2 int[], f3 text);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+-- TEST
+insert into t values(1,array[1],'a');
+insert into t values(1,array[11],'b');
+insert into t values(1,array[111],'c');
+insert into t values(2,array[2],'a');
+insert into t values(2,array[22],'b');
+insert into t values(2,array[222],'c');
+insert into t values(3,array[3],'a');
+insert into t values(3,array[3],'b');
+select f3, array_sort(myaggp20a(f1)) from t group by f3 order by f3;
+ f3 | array_sort 
+----+------------
+ a  | {1,2,3}
+ b  | {1,2,3}
+ c  | {1,2}
+(3 rows)
+-- CLEANUP
+-- start_ignore
+drop table if exists t;
+drop function array_sort (ANYARRAY) cascade;
+drop function tfp(anyarray,anyelement) cascade;
+NOTICE:  drop cascades to function myaggp20a(anyelement)
+drop function ffp(anyarray) cascade;
+-- end_ignore
+-- start_ignore
+-- start_ignore
+drop language if exists plpythonu;
+create language plpythonu;
+-- end_ignore
+create or replace function count_operator(explain_query text, operator text) returns int as
+$$
+rv = plpy.execute(explain_query)
+search_text = operator
+result = 0
+for i in range(len(rv)):
+    cur_line = rv[i]['QUERY PLAN']
+    if search_text.lower() in cur_line.lower():
+        result = result+1
+return result
+$$
+language plpythonu;
+---
+--- Testing adding a traceflag to favor multi-stage aggregation
+---
+-- SETUP
+-- start_ignore
+DROP TABLE IF EXISTS multi_stage_test;
+NOTICE:  table "multi_stage_test" does not exist, skipping
+-- end_ignore
+create table multi_stage_test(a int, b int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into multi_stage_test select i, i%4 from generate_series(1,10) i;
+analyze multi_stage_test;
+-- TEST
+-- start_ignore
+set optimizer_segments=2;
+set optimizer_prefer_multistage_agg = on;
+-- end_ignore
+select count_operator('explain select count(*) from multi_stage_test group by b;','GroupAggregate');
+ count_operator 
+----------------
+              0
+(1 row)
+-- start_ignore
+set optimizer_prefer_multistage_agg = off;
+-- end_ignore
+select count_operator('explain select count(*) from multi_stage_test group by b;','GroupAggregate');
+ count_operator 
+----------------
+              0
+(1 row)
+--CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS multi_stage_test;
+reset optimizer_segments;
+set optimizer_prefer_multistage_agg = off;
+-- end_ignore
+---
+--- Testing not picking HashAgg for aggregates without preliminary functions
+---
+-- SETUP
+-- start_ignore
+SET optimizer_disable_missing_stats_collection=on;
+DROP TABLE IF EXISTS attribute_table;
+NOTICE:  table "attribute_table" does not exist, skipping
+-- end_ignore
+CREATE TABLE attribute_table (product_id integer, attribute_id integer,attribute text, attribute2 text,attribute_ref_lists text,short_name text,attribute6 text,attribute5 text,measure double precision,unit character varying(60)) DISTRIBUTED BY (product_id ,attribute_id);
+-- create the transition function
+CREATE OR REPLACE FUNCTION do_concat(text,text)
+RETURNS text
+--concatenates 2 strings
+AS 'SELECT CASE WHEN $1 IS NULL THEN $2
+WHEN $2 IS NULL THEN $1
+ELSE $1 || $2 END;'
+     LANGUAGE SQL
+     IMMUTABLE
+     RETURNS NULL ON NULL INPUT;
+-- UDA definition. No PREFUNC exists
+-- start_ignore
+DROP AGGREGATE IF EXISTS concat(text);
+NOTICE:  aggregate concat(text) does not exist, skipping
+-- end_ignore
+CREATE AGGREGATE concat(text) (
+   --text/string concatenation
+   SFUNC = do_concat, --Function to call for each string that builds the aggregate
+   STYPE = text,--FINALFUNC=final_func, --Function to call after everything has been aggregated
+   INITCOND = '' --Initialize as an empty string when starting
+);
+-- TEST
+-- cook some stats
+-- start_ignore
+set allow_system_table_mods='DML';
+-- end_ignore
+UPDATE pg_class set reltuples=524592::real, relpages=2708::integer where oid = 'attribute_table'::regclass;
+select count_operator('explain select product_id,concat(E''#attribute_''||attribute_id::varchar||E'':''||attribute) as attr FROM attribute_table GROUP BY product_id;','HashAggregate');
+ count_operator 
+----------------
+              0
+(1 row)
+-- CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS attribute_table;
+DROP AGGREGATE IF EXISTS concat(text);
+drop function do_concat(text,text) cascade;
+SET optimizer_disable_missing_stats_collection=off;
+-- end_ignore
+---
+--- Testing fallback to planner when the agg used in window does not have either prelim or inverse prelim function.
+---
+-- SETUP
+-- start_ignore
+DROP TABLE IF EXISTS foo;
+NOTICE:  table "foo" does not exist, skipping
+-- end_ignore
+create table foo(a int, b text) distributed by (a);
+-- TEST
+insert into foo values (1,'aaa'), (2,'bbb'), (3,'ccc');
+-- should fall back
+select string_agg(b) over (partition by a) from foo order by 1;
+ string_agg 
+------------
+ aaa
+ bbb
+ ccc
+(3 rows)
+select string_agg(b) over (partition by a,b) from foo order by 1;
+ string_agg 
+------------
+ aaa
+ bbb
+ ccc
+(3 rows)
+-- should not fall back
+select max(b) over (partition by a) from foo order by 1;
+ max 
+-----
+ aaa
+ bbb
+ ccc
+(3 rows)
+select count_operator('explain select max(b) over (partition by a) from foo order by 1;', 'Table Scan');
+ count_operator 
+----------------
+              0
+(1 row)
+-- fall back
+select string_agg(b) over (partition by a+1) from foo order by 1;
+ string_agg 
+------------
+ aaa
+ bbb
+ ccc
+(3 rows)
+select string_agg(b || 'txt') over (partition by a) from foo order by 1;
+ string_agg 
+------------
+ aaatxt
+ bbbtxt
+ ccctxt
+(3 rows)
+select string_agg(b || 'txt') over (partition by a+1) from foo order by 1;
+ string_agg 
+------------
+ aaatxt
+ bbbtxt
+ ccctxt
+(3 rows)
+-- fall back and planner's plan produces unsupported execution
+select string_agg(b) over (partition by a order by a) from foo order by 1;
+ERROR:  aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
+select string_agg(b || 'txt') over (partition by a,b order by a,b) from foo order by 1;
+ERROR:  aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
+select '1' || string_agg(b) over (partition by a+1 order by a+1) from foo;
+ERROR:  aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
+-- CLEANUP
+-- start_ignore
+drop function count_operator(text,text);
+DROP TABLE IF EXISTS foo;
+drop function if exists count_operator(explain_query text, operator text);
+NOTICE:  function count_operator(text,text) does not exist, skipping
+drop language if exists plpythonu;
+-- end_ignore
--- a/src/test/regress/expected/bfv_aggregate_optimizer.out
+++ b/src/test/regress/expected/bfv_aggregate_optimizer.out
+---
+--- Window function with outer references in PARTITION BY/ORDER BY clause
+---
+-- SETUP
+-- start_ignore
+DROP TABLE IF EXISTS x_outer;
+NOTICE:  table "x_outer" does not exist, skipping
+DROP TABLE IF EXISTS y_inner;
+NOTICE:  table "y_inner" does not exist, skipping
+-- end_ignore
+create table x_outer (a int, b int, c int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+create table y_inner (d int, e int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'd' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into x_outer select i%3, i, i from generate_series(1,10) i;
+insert into y_inner select i%3, i from generate_series(1,10) i;
+analyze x_outer;
+analyze y_inner;
+-- TEST
+select * from x_outer where a in (select row_number() over(partition by a) from y_inner) order by 1, 2;
+ a | b  | c  
+---+----+----
+ 1 |  1 |  1
+ 1 |  4 |  4
+ 1 |  7 |  7
+ 1 | 10 | 10
+ 2 |  2 |  2
+ 2 |  5 |  5
+ 2 |  8 |  8
+(7 rows)
+select * from x_outer where a in (select rank() over(order by a) from y_inner) order by 1, 2;
+ a | b  | c  
+---+----+----
+ 1 |  1 |  1
+ 1 |  4 |  4
+ 1 |  7 |  7
+ 1 | 10 | 10
+(4 rows)
+select * from x_outer where a not in (select rank() over(order by a) from y_inner) order by 1, 2;
+ a | b | c 
+---+---+---
+ 0 | 3 | 3
+ 0 | 6 | 6
+ 0 | 9 | 9
+ 2 | 2 | 2
+ 2 | 5 | 5
+ 2 | 8 | 8
+(6 rows)
+select * from x_outer where exists (select rank() over(order by a) from y_inner where d = a) order by 1, 2;
+ a | b  | c  
+---+----+----
+ 0 |  3 |  3
+ 0 |  6 |  6
+ 0 |  9 |  9
+ 1 |  1 |  1
+ 1 |  4 |  4
+ 1 |  7 |  7
+ 1 | 10 | 10
+ 2 |  2 |  2
+ 2 |  5 |  5
+ 2 |  8 |  8
+(10 rows)
+select * from x_outer where not exists (select rank() over(order by a) from y_inner where d = a) order by 1, 2;
+ a | b | c 
+---+---+---
+(0 rows)
+select * from x_outer where a in (select last_value(d) over(partition by b order by e rows between e preceding and e+1 following) from y_inner) order by 1, 2;
+ a | b  | c  
+---+----+----
+ 0 |  3 |  3
+ 0 |  6 |  6
+ 0 |  9 |  9
+ 1 |  1 |  1
+ 1 |  4 |  4
+ 1 |  7 |  7
+ 1 | 10 | 10
+ 2 |  2 |  2
+ 2 |  5 |  5
+ 2 |  8 |  8
+(10 rows)
+-- CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS x_outer;
+DROP TABLE IF EXISTS y_inner;
+-- end_ignore
+---
+--- Testing aggregation in a query
+---
+-- SETUP
+create table d (col1 timestamp, col2 int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'col1' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into d select to_date('2014-01-01', 'YYYY-DD-MM'), generate_series(1,100);
+-- TEST
+select 1, to_char(col1, 'YYYY'), median(col2) from d group by 1, 2;
+ ?column? | to_char | median 
+----------+---------+--------
+        1 | 2014    |   50.5
+(1 row)
+-- CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS d;
+-- end_ignore
+---
+--- Testing if aggregate derived window function produces incorrect results
+---
+-- SETUP
+-- start_ignore
+drop table if exists toy;
+NOTICE:  table "toy" does not exist, skipping
+drop aggregate mysum1(int4);
+ERROR:  aggregate mysum1(integer) does not exist
+drop aggregate mysum2(int4);
+ERROR:  aggregate mysum2(integer) does not exist
+-- end_ignore
+create table toy(id,val) as select i,i from generate_series(1,5) i;
+NOTICE:  Table doesn't have 'distributed by' clause. Creating a NULL policy entry.
+create aggregate mysum1(int4) (sfunc = int4_sum, prefunc=int8pl, stype=bigint);
+create aggregate mysum2(int4) (sfunc = int4_sum, stype=bigint);
+-- TEST
+select
+   id, val,
+   sum(val) over (w),
+   mysum1(val) over (w),
+   mysum2(val) over (w)
+from toy
+window w as (order by id rows 2 preceding);
+ERROR:  aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
+-- CLEANUP
+-- start_ignore
+drop table if exists toy;
+drop aggregate mysum1(int4);
+drop aggregate mysum2(int4);
+-- end_ignore
+---
+--- Error executing for aggregate with anyarry as return type
+---
+-- SETUP
+CREATE OR REPLACE FUNCTION tfp(anyarray,anyelement) RETURNS anyarray AS
+'select $1 || $2' LANGUAGE SQL;
+CREATE OR REPLACE FUNCTION ffp(anyarray) RETURNS anyarray AS
+'select $1' LANGUAGE SQL;
+CREATE AGGREGATE myaggp20a(BASETYPE = anyelement, SFUNC = tfp,
+  STYPE = anyarray, FINALFUNC = ffp, INITCOND = '{}');
+-- Adding a sql function to sory the array
+CREATE OR REPLACE FUNCTION array_sort (ANYARRAY)
+RETURNS ANYARRAY LANGUAGE SQL
+AS $$
+SELECT ARRAY(SELECT unnest($1) ORDER BY 1)
+$$;
+create temp table t(f1 int, f2 int[], f3 text);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+-- TEST
+insert into t values(1,array[1],'a');
+insert into t values(1,array[11],'b');
+insert into t values(1,array[111],'c');
+insert into t values(2,array[2],'a');
+insert into t values(2,array[22],'b');
+insert into t values(2,array[222],'c');
+insert into t values(3,array[3],'a');
+insert into t values(3,array[3],'b');
+select f3, array_sort(myaggp20a(f1)) from t group by f3 order by f3;
+ f3 | array_sort 
+----+------------
+ a  | {1,2,3}
+ b  | {1,2,3}
+ c  | {1,2}
+(3 rows)
+-- CLEANUP
+-- start_ignore
+drop table if exists t;
+drop function array_sort (ANYARRAY) cascade;
+drop function tfp(anyarray,anyelement) cascade;
+NOTICE:  drop cascades to function myaggp20a(anyelement)
+drop function ffp(anyarray) cascade;
+-- end_ignore
+-- start_ignore
+-- start_ignore
+drop language if exists plpythonu;
+create language plpythonu;
+-- end_ignore
+create or replace function count_operator(explain_query text, operator text) returns int as
+$$
+rv = plpy.execute(explain_query)
+search_text = operator
+result = 0
+for i in range(len(rv)):
+    cur_line = rv[i]['QUERY PLAN']
+    if search_text.lower() in cur_line.lower():
+        result = result+1
+return result
+$$
+language plpythonu;
+---
+--- Testing adding a traceflag to favor multi-stage aggregation
+---
+-- SETUP
+-- start_ignore
+DROP TABLE IF EXISTS multi_stage_test;
+NOTICE:  table "multi_stage_test" does not exist, skipping
+-- end_ignore
+create table multi_stage_test(a int, b int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into multi_stage_test select i, i%4 from generate_series(1,10) i;
+analyze multi_stage_test;
+-- TEST
+-- start_ignore
+set optimizer_segments=2;
+set optimizer_prefer_multistage_agg = on;
+-- end_ignore
+select count_operator('explain select count(*) from multi_stage_test group by b;','GroupAggregate');
+ count_operator 
+----------------
+              2
+(1 row)
+-- start_ignore
+set optimizer_prefer_multistage_agg = off;
+-- end_ignore
+select count_operator('explain select count(*) from multi_stage_test group by b;','GroupAggregate');
+ count_operator 
+----------------
+              1
+(1 row)
+--CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS multi_stage_test;
+reset optimizer_segments;
+set optimizer_prefer_multistage_agg = off;
+-- end_ignore
+---
+--- Testing not picking HashAgg for aggregates without preliminary functions
+---
+-- SETUP
+-- start_ignore
+SET optimizer_disable_missing_stats_collection=on;
+DROP TABLE IF EXISTS attribute_table;
+NOTICE:  table "attribute_table" does not exist, skipping
+-- end_ignore
+CREATE TABLE attribute_table (product_id integer, attribute_id integer,attribute text, attribute2 text,attribute_ref_lists text,short_name text,attribute6 text,attribute5 text,measure double precision,unit character varying(60)) DISTRIBUTED BY (product_id ,attribute_id);
+-- create the transition function
+CREATE OR REPLACE FUNCTION do_concat(text,text)
+RETURNS text
+--concatenates 2 strings
+AS 'SELECT CASE WHEN $1 IS NULL THEN $2
+WHEN $2 IS NULL THEN $1
+ELSE $1 || $2 END;'
+     LANGUAGE SQL
+     IMMUTABLE
+     RETURNS NULL ON NULL INPUT;
+-- UDA definition. No PREFUNC exists
+-- start_ignore
+DROP AGGREGATE IF EXISTS concat(text);
+NOTICE:  aggregate concat(text) does not exist, skipping
+-- end_ignore
+CREATE AGGREGATE concat(text) (
+   --text/string concatenation
+   SFUNC = do_concat, --Function to call for each string that builds the aggregate
+   STYPE = text,--FINALFUNC=final_func, --Function to call after everything has been aggregated
+   INITCOND = '' --Initialize as an empty string when starting
+);
+-- TEST
+-- cook some stats
+-- start_ignore
+set allow_system_table_mods='DML';
+-- end_ignore
+UPDATE pg_class set reltuples=524592::real, relpages=2708::integer where oid = 'attribute_table'::regclass;
+select count_operator('explain select product_id,concat(E''#attribute_''||attribute_id::varchar||E'':''||attribute) as attr FROM attribute_table GROUP BY product_id;','HashAggregate');
+ count_operator 
+----------------
+              0
+(1 row)
+-- CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS attribute_table;
+DROP AGGREGATE IF EXISTS concat(text);
+drop function do_concat(text,text) cascade;
+SET optimizer_disable_missing_stats_collection=off;
+-- end_ignore
+---
+--- Testing fallback to planner when the agg used in window does not have either prelim or inverse prelim function.
+---
+-- SETUP
+-- start_ignore
+DROP TABLE IF EXISTS foo;
+NOTICE:  table "foo" does not exist, skipping
+-- end_ignore
+create table foo(a int, b text) distributed by (a);
+-- TEST
+insert into foo values (1,'aaa'), (2,'bbb'), (3,'ccc');
+-- should fall back
+select string_agg(b) over (partition by a) from foo order by 1;
+ string_agg 
+------------
+ aaa
+ bbb
+ ccc
+(3 rows)
+select string_agg(b) over (partition by a,b) from foo order by 1;
+ string_agg 
+------------
+ aaa
+ bbb
+ ccc
+(3 rows)
+-- should not fall back
+select max(b) over (partition by a) from foo order by 1;
+ max 
+-----
+ aaa
+ bbb
+ ccc
+(3 rows)
+select count_operator('explain select max(b) over (partition by a) from foo order by 1;', 'Table Scan');
+ count_operator 
+----------------
+              1
+(1 row)
+-- fall back
+select string_agg(b) over (partition by a+1) from foo order by 1;
+ string_agg 
+------------
+ aaa
+ bbb
+ ccc
+(3 rows)
+select string_agg(b || 'txt') over (partition by a) from foo order by 1;
+ string_agg 
+------------
+ aaatxt
+ bbbtxt
+ ccctxt
+(3 rows)
+select string_agg(b || 'txt') over (partition by a+1) from foo order by 1;
+ string_agg 
+------------
+ aaatxt
+ bbbtxt
+ ccctxt
+(3 rows)
+-- fall back and planner's plan produces unsupported execution
+select string_agg(b) over (partition by a order by a) from foo order by 1;
+ERROR:  aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
+select string_agg(b || 'txt') over (partition by a,b order by a,b) from foo order by 1;
+ERROR:  aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
+select '1' || string_agg(b) over (partition by a+1 order by a+1) from foo;
+ERROR:  aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
+-- CLEANUP
+-- start_ignore
+drop function count_operator(text,text);
+DROP TABLE IF EXISTS foo;
+drop function if exists count_operator(explain_query text, operator text);
+NOTICE:  function count_operator(text,text) does not exist, skipping
+drop language if exists plpythonu;
+-- end_ignore
--- a/src/test/regress/expected/bfv_olap.out
+++ b/src/test/regress/expected/bfv_olap.out
+---
+--- Test case errors out when we define aggregates without preliminary functions and use it as an aggregate derived window function.
+---
+-- SETUP
+-- start_ignore
+drop table if exists toy;
+NOTICE:  table "toy" does not exist, skipping
+drop aggregate if exists mysum1(int4);
+NOTICE:  aggregate mysum1(int4) does not exist, skipping
+drop aggregate if exists mysum2(int4);
+NOTICE:  aggregate mysum2(int4) does not exist, skipping
+-- end_ignore
+create table toy(id,val) as select i,i from generate_series(1,5) i;
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+create aggregate mysum1(int4) (sfunc = int4_sum, prefunc=int8pl, stype=bigint);
+create aggregate mysum2(int4) (sfunc = int4_sum, stype=bigint);
+-- TEST
+select id, val, sum(val) over (w), mysum1(val) over (w), mysum2(val) over (w) from toy window w as (order by id rows 2 preceding);
+ERROR:  aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
+-- CLEANUP
+-- start_ignore
+drop aggregate if exists mysum1(int4);
+drop aggregate if exists mysum2(int4);
+drop table if exists toy;
+-- end_ignore
+---
+--- Test case errors out when we define aggregates without preliminary functions and use it as an aggregate derived window function.
+---
+-- SETUP
+-- start_ignore
+drop type if exists ema_type cascade;
+NOTICE:  type "ema_type" does not exist, skipping
+drop function if exists ema_adv(t ema_type, v float, x float) cascade;
+ERROR:  type "ema_type" does not exist
+drop function if exists ema_fin(t ema_type) cascade;
+ERROR:  type "ema_type" does not exist
+drop aggregate if exists ema(float, float);
+NOTICE:  aggregate ema(pg_catalog.float8,pg_catalog.float8) does not exist, skipping
+drop table if exists ema_test cascade;
+NOTICE:  table "ema_test" does not exist, skipping
+-- end_ignore
+create type ema_type as (x float, e float);
+create function ema_adv(t ema_type, v float, x float)
+    returns ema_type
+    as $$
+        begin
+            if t.e is null then
+                t.e = v;
+                t.x = x;
+            else
+                if t.x != x then
+                    raise exception 'ema smoothing x may not vary';
+                end if;
+                t.e = t.e + (v - t.e) * t.x;
+            end if;
+            return t;
+        end;
+    $$ language plpgsql;
+create function ema_fin(t ema_type)
+    returns float
+    as $$
+       begin
+           return t.e;
+       end;
+    $$ language plpgsql;
+create aggregate ema(float, float) (
+    sfunc = ema_adv,
+    stype = ema_type,
+    finalfunc = ema_fin,
+    initcond = '(,)');
+create table ema_test (k int, v float ) distributed by (k);
+insert into ema_test select i, 4*random() + 10.0*(1+cos(radians(i*5))) from generate_series(0,19) i(i);
+-- TEST
+select k, v, ema(v, 0.9) over (order by k rows between unbounded preceding and current row) from ema_test order by k;
+ERROR:  aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
+-- CLEANUP
+-- start_ignore
+drop table if exists ema_test cascade;
+drop aggregate if exists ema(float, float);
+drop function if exists ema_fin(t ema_type) cascade;
+drop function if exists ema_adv(t ema_type, v float, x float) cascade;
+drop type if exists ema_type cascade;
+-- end_ignore
+---
+--- Test case errors out when we define aggregates without preliminary functions and use it as an aggregate derived window function.
+---
+-- SETUP
+-- start_ignore
+drop type if exists ema_type cascade;
+NOTICE:  type "ema_type" does not exist, skipping
+drop function if exists ema_adv(t ema_type, v float, x float) cascade;
+ERROR:  type "ema_type" does not exist
+drop function if exists ema_fin(t ema_type) cascade;
+ERROR:  type "ema_type" does not exist
+drop aggregate if exists ema(float, float);
+NOTICE:  aggregate ema(pg_catalog.float8,pg_catalog.float8) does not exist, skipping
+drop table if exists ema_test cascade;
+NOTICE:  table "ema_test" does not exist, skipping
+-- end_ignore
+create type ema_type as (x float, e float);
+create function ema_adv(t ema_type, v float, x float)
+    returns ema_type
+    as $$
+        begin
+            if t.e is null then
+                t.e = v;
+                t.x = x;
+            else
+                if t.x != x then
+                    raise exception 'ema smoothing x may not vary';
+                end if;
+                t.e = t.e + (v - t.e) * t.x;
+            end if;
+            return t;
+        end;
+    $$ language plpgsql;
+create function ema_fin(t ema_type)
+    returns float
+    as $$
+       begin
+           return t.e;
+       end;
+    $$ language plpgsql;
+create aggregate ema(float, float) (
+    sfunc = ema_adv,
+    stype = ema_type,
+    finalfunc = ema_fin,
+    initcond = '(,)');
+create table ema_test (k int, v float ) distributed by (k);
+insert into ema_test select i, 4*random() + 10.0*(1+cos(radians(i*5))) from generate_series(0,19) i(i);
+-- TEST
+select k, v, ema(v, 0.9) over (order by k) from ema_test order by k;
+ERROR:  aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
+-- CLEANUP
+-- start_ignore
+drop table if exists ema_test cascade;
+drop aggregate if exists ema(float, float);
+drop function if exists ema_fin(t ema_type) cascade;
+drop function if exists ema_adv(t ema_type, v float, x float) cascade;
+drop type if exists ema_type cascade;
+-- end_ignore
+---
+--- Test with/without group by
+---
+-- SETUP
+-- start_ignore
+DROP TABLE IF EXISTS r;
+NOTICE:  table "r" does not exist, skipping
+-- end_ignore
+CREATE TABLE r
+(
+    a INT NOT NULL, 
+    b INT, 
+    c CHARACTER VARYING(200),  
+    d NUMERIC(10,0), 
+    e DATE
+) DISTRIBUTED BY (a,b);
+ALTER TABLE r ADD CONSTRAINT PKEY PRIMARY KEY (b);
+NOTICE:  updating distribution policy to match new primary key
+NOTICE:  ALTER TABLE / ADD PRIMARY KEY will create implicit index "r_pkey" for table "r"
+--TEST
+SELECT MAX(a) AS m FROM r GROUP BY b ORDER BY m;
+ m 
+---
+(0 rows)
+SELECT MAX(a) AS m FROM r GROUP BY a ORDER BY m;
+ m 
+---
+(0 rows)
+SELECT MAX(a) AS m FROM r GROUP BY b;
+ m 
+---
+(0 rows)
+-- CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS r;
+-- end_ignore
+---
+--- ORDER BY clause includes some grouping column or not
+---
+-- SETUP
+-- start_ignore
+DROP TABLE IF EXISTS r;
+NOTICE:  table "r" does not exist, skipping
+-- end_ignore
+CREATE TABLE r
+(
+    a INT NOT NULL, 
+    b INT, 
+    c CHARACTER VARYING(200),  
+    d NUMERIC(10,0), 
+    e DATE
+) DISTRIBUTED BY (a,b);
+ALTER TABLE r ADD CONSTRAINT PKEY PRIMARY KEY (b);
+NOTICE:  updating distribution policy to match new primary key
+NOTICE:  ALTER TABLE / ADD PRIMARY KEY will create implicit index "r_pkey" for table "r"
+--TEST
+SELECT MAX(a) AS m FROM R GROUP BY b ORDER BY m,b;
+ m 
+---
+(0 rows)
+SELECT MAX(a) AS m FROM R GROUP BY b,e ORDER BY m,b,e;
+ m 
+---
+(0 rows)
+SELECT MAX(a) AS m FROM R GROUP BY b,e ORDER BY m;
+ m 
+---
+(0 rows)
+-- CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS r;
+-- end_ignore
+---
+--- ORDER BY 1 or more columns
+---
+-- SETUP
+-- start_ignore
+DROP TABLE IF EXISTS r;
+NOTICE:  table "r" does not exist, skipping
+-- end_ignore
+CREATE TABLE r
+(
+    a INT NOT NULL, 
+    b INT, 
+    c CHARACTER VARYING(200),  
+    d NUMERIC(10,0), 
+    e DATE
+) DISTRIBUTED BY (a,b);
+ALTER TABLE r ADD CONSTRAINT PKEY PRIMARY KEY (b);
+NOTICE:  updating distribution policy to match new primary key
+NOTICE:  ALTER TABLE / ADD PRIMARY KEY will create implicit index "r_pkey" for table "r"
+--TEST
+SELECT MAX(a),d,e AS m FROM r GROUP BY b,d,e ORDER BY m,e,d;
+ max | d | m 
+-----+---+---
+(0 rows)
+SELECT MIN(a),d,e AS m FROM r GROUP BY b,e,d ORDER BY e,d;
+ min | d | m 
+-----+---+---
+(0 rows)
+SELECT MAX(a) AS m FROM r GROUP BY b,c,d,e ORDER BY e,d;
+ m 
+---
+(0 rows)
+SELECT MAX(a) AS m FROM r GROUP BY b,e ORDER BY e;
+ m 
+---
+(0 rows)
+SELECT MAX(e) AS m FROM r GROUP BY b ORDER BY m;
+ m 
+---
+(0 rows)
+-- CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS r;
+-- end_ignore
+---
+--- ORDER BY clause includes some grouping column or not
+---
+-- SETUP
+-- start_ignore
+DROP TABLE IF EXISTS dm_calendar;
+NOTICE:  table "dm_calendar" does not exist, skipping
+-- end_ignore
+CREATE TABLE dm_calendar (
+    calendar_id bigint NOT NULL,
+    date_name character varying(200),
+    date_name_cn character varying(200),
+    calendar_date date,
+    current_day numeric(10,0),
+    month_id numeric(10,0),
+    month_name character varying(200),
+    month_name_cn character varying(200),
+    month_name_short character varying(200),
+    month_name_short_cn character varying(200),
+    days_in_month numeric(10,0),
+    first_of_month numeric(10,0),
+    last_month_id numeric(10,0),
+    month_end numeric(10,0),
+    quarter_id numeric(10,0),
+    quarter_name character varying(200),
+    quarter_name_cn character varying(200),
+    quarter_name_short character varying(200),
+    quarter_name_short_cn character varying(200),
+    year_id numeric(10,0),
+    year_name character varying(200),
+    year_name_cn character varying(200),
+    description character varying(500),
+    create_date timestamp without time zone,
+    month_week_num character varying(100),
+    month_week_begin character varying(100),
+    month_week_end character varying(100),
+    half_year character varying(100),
+    weekend_flag character varying(100),
+    holidays_flag character varying(100),
+    workday_flag character varying(100),
+    month_number numeric(10,0)
+) DISTRIBUTED BY (calendar_id);
+ALTER TABLE ONLY dm_calendar ADD CONSTRAINT dm_calendar_pkey PRIMARY KEY (calendar_id);
+NOTICE:  ALTER TABLE / ADD PRIMARY KEY will create implicit index "dm_calendar_pkey" for table "dm_calendar"
+--TEST
+SELECT "year_id" as id , min("year_name") as a  from (select "year_id" as "year_id" , min("year_name") as "year_name" from  "dm_calendar" group by "year_id") "dm_calendar3" group by "year_id" order by a ASC ;
+ id | a 
+----+---
+(0 rows)
+-- CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS dm_calendar;
+-- end_ignore
+---
+--- Test with/without group by with primary key as dist key
+---
+-- SETUP
+-- start_ignore
+drop table if exists t;
+NOTICE:  table "t" does not exist, skipping
+-- end_ignore
+create table t
+(
+    a int NOT NULL,
+    b int,
+    c character varying(200),
+    d numeric(10,0),
+    e date
+) distributed by (b);
+alter table t ADD CONSTRAINT pkey primary key (b);
+NOTICE:  ALTER TABLE / ADD PRIMARY KEY will create implicit index "t_pkey" for table "t"
+-- TEST
+SELECT MAX(a) AS m FROM t GROUP BY b ORDER BY m;
+ m 
+---
+(0 rows)
+-- CLEANUP
+-- start_ignore
+drop table if exists t;
+-- end_ignore
+---
+--- Passing through distribution matching type in default implementation
+---
+-- SETUP
+-- start_ignore
+drop table if exists customer;
+NOTICE:  table "customer" does not exist, skipping
+drop table if exists sale;
+NOTICE:  table "sale" does not exist, skipping
+-- end_ignore
+create table customer
+(
+	cn int not null,
+	cname text not null,
+	cloc text,
+	primary key (cn)
+) distributed by (cn);
+NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "customer_pkey" for table "customer"
+insert into customer values
+  ( 1, 'Macbeth', 'Inverness'),
+  ( 2, 'Duncan', 'Forres'),
+  ( 3, 'Lady Macbeth', 'Inverness'),
+  ( 4, 'Witches, Inc', 'Lonely Heath');
+create table sale
+(
+	cn int not null,
+	vn int not null,
+	pn int not null,
+	dt date not null,
+	qty int not null,
+	prc float not null,
+	primary key (cn, vn, pn)
+) distributed by (cn,vn,pn);
+NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "sale_pkey" for table "sale"
+insert into sale values
+  ( 2, 40, 100, '1401-1-1', 1100, 2400),
+  ( 1, 10, 200, '1401-3-1', 1, 0),
+  ( 3, 40, 200, '1401-4-1', 1, 0),
+  ( 1, 20, 100, '1401-5-1', 1, 0),
+  ( 1, 30, 300, '1401-5-2', 1, 0),
+  ( 1, 50, 400, '1401-6-1', 1, 0),
+  ( 2, 50, 400, '1401-6-1', 1, 0),
+  ( 1, 30, 500, '1401-6-1', 12, 5),
+  ( 3, 30, 500, '1401-6-1', 12, 5),
+  ( 3, 30, 600, '1401-6-1', 12, 5),
+  ( 4, 40, 700, '1401-6-1', 1, 1),
+  ( 4, 40, 800, '1401-6-1', 1, 1);
+-- TEST
+select cname,
+rank() over (partition by sale.cn order by vn)
+from sale, customer
+where sale.cn = customer.cn
+order by 1, 2;
+    cname     | rank 
+--------------+------
+ Duncan       |    1
+ Duncan       |    2
+ Lady Macbeth |    1
+ Lady Macbeth |    1
+ Lady Macbeth |    3
+ Macbeth      |    1
+ Macbeth      |    2
+ Macbeth      |    3
+ Macbeth      |    3
+ Macbeth      |    5
+ Witches, Inc |    1
+ Witches, Inc |    1
+(12 rows)
+-- CLEANUP
+-- start_ignore
+drop table if exists customer;
+drop table if exists sale;
+-- end_ignore
+---
+--- Optimzier query crashing for logical window with no window functions
+---
+-- SETUP
+create table mpp23240(a int, b int, c int, d int, e int, f int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+-- TEST
+select a, b,
+       case 1
+        when 10 then
+          sum(c) over(partition by a)
+        when 20 then
+          sum(d) over(partition by a)
+        else
+          5
+       end as sum1
+from (select * from mpp23240 where f > 10) x;
+ a | b | sum1 
+---+---+------
+(0 rows)
+-- CLEANUP
+-- start_ignore
+drop table mpp23240;
+-- end_ignore
--- a/src/test/regress/expected/bfv_olap_optimizer.out
+++ b/src/test/regress/expected/bfv_olap_optimizer.out
+---
+--- Test case errors out when we define aggregates without preliminary functions and use it as an aggregate derived window function.
+---
+-- SETUP
+-- start_ignore
+drop table if exists toy;
+NOTICE:  table "toy" does not exist, skipping
+drop aggregate if exists mysum1(int4);
+NOTICE:  aggregate mysum1(int4) does not exist, skipping
+drop aggregate if exists mysum2(int4);
+NOTICE:  aggregate mysum2(int4) does not exist, skipping
+-- end_ignore
+create table toy(id,val) as select i,i from generate_series(1,5) i;
+NOTICE:  Table doesn't have 'distributed by' clause. Creating a NULL policy entry.
+create aggregate mysum1(int4) (sfunc = int4_sum, prefunc=int8pl, stype=bigint);
+create aggregate mysum2(int4) (sfunc = int4_sum, stype=bigint);
+-- TEST
+select id, val, sum(val) over (w), mysum1(val) over (w), mysum2(val) over (w) from toy window w as (order by id rows 2 preceding);
+ERROR:  aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
+-- CLEANUP
+-- start_ignore
+drop aggregate if exists mysum1(int4);
+drop aggregate if exists mysum2(int4);
+drop table if exists toy;
+-- end_ignore
+---
+--- Test case errors out when we define aggregates without preliminary functions and use it as an aggregate derived window function.
+---
+-- SETUP
+-- start_ignore
+drop type if exists ema_type cascade;
+NOTICE:  type "ema_type" does not exist, skipping
+drop function if exists ema_adv(t ema_type, v float, x float) cascade;
+ERROR:  type "ema_type" does not exist
+drop function if exists ema_fin(t ema_type) cascade;
+ERROR:  type "ema_type" does not exist
+drop aggregate if exists ema(float, float);
+NOTICE:  aggregate ema(pg_catalog.float8,pg_catalog.float8) does not exist, skipping
+drop table if exists ema_test cascade;
+NOTICE:  table "ema_test" does not exist, skipping
+-- end_ignore
+create type ema_type as (x float, e float);
+create function ema_adv(t ema_type, v float, x float)
+    returns ema_type
+    as $$
+        begin
+            if t.e is null then
+                t.e = v;
+                t.x = x;
+            else
+                if t.x != x then
+                    raise exception 'ema smoothing x may not vary';
+                end if;
+                t.e = t.e + (v - t.e) * t.x;
+            end if;
+            return t;
+        end;
+    $$ language plpgsql;
+create function ema_fin(t ema_type)
+    returns float
+    as $$
+       begin
+           return t.e;
+       end;
+    $$ language plpgsql;
+create aggregate ema(float, float) (
+    sfunc = ema_adv,
+    stype = ema_type,
+    finalfunc = ema_fin,
+    initcond = '(,)');
+create table ema_test (k int, v float ) distributed by (k);
+insert into ema_test select i, 4*random() + 10.0*(1+cos(radians(i*5))) from generate_series(0,19) i(i);
+-- TEST
+select k, v, ema(v, 0.9) over (order by k rows between unbounded preceding and current row) from ema_test order by k;
+ERROR:  aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
+-- CLEANUP
+-- start_ignore
+drop table if exists ema_test cascade;
+drop aggregate if exists ema(float, float);
+drop function if exists ema_fin(t ema_type) cascade;
+drop function if exists ema_adv(t ema_type, v float, x float) cascade;
+drop type if exists ema_type cascade;
+-- end_ignore
+---
+--- Test case errors out when we define aggregates without preliminary functions and use it as an aggregate derived window function.
+---
+-- SETUP
+-- start_ignore
+drop type if exists ema_type cascade;
+NOTICE:  type "ema_type" does not exist, skipping
+drop function if exists ema_adv(t ema_type, v float, x float) cascade;
+ERROR:  type "ema_type" does not exist
+drop function if exists ema_fin(t ema_type) cascade;
+ERROR:  type "ema_type" does not exist
+drop aggregate if exists ema(float, float);
+NOTICE:  aggregate ema(pg_catalog.float8,pg_catalog.float8) does not exist, skipping
+drop table if exists ema_test cascade;
+NOTICE:  table "ema_test" does not exist, skipping
+-- end_ignore
+create type ema_type as (x float, e float);
+create function ema_adv(t ema_type, v float, x float)
+    returns ema_type
+    as $$
+        begin
+            if t.e is null then
+                t.e = v;
+                t.x = x;
+            else
+                if t.x != x then
+                    raise exception 'ema smoothing x may not vary';
+                end if;
+                t.e = t.e + (v - t.e) * t.x;
+            end if;
+            return t;
+        end;
+    $$ language plpgsql;
+create function ema_fin(t ema_type)
+    returns float
+    as $$
+       begin
+           return t.e;
+       end;
+    $$ language plpgsql;
+create aggregate ema(float, float) (
+    sfunc = ema_adv,
+    stype = ema_type,
+    finalfunc = ema_fin,
+    initcond = '(,)');
+create table ema_test (k int, v float ) distributed by (k);
+insert into ema_test select i, 4*random() + 10.0*(1+cos(radians(i*5))) from generate_series(0,19) i(i);
+-- TEST
+select k, v, ema(v, 0.9) over (order by k) from ema_test order by k;
+ERROR:  aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
+-- CLEANUP
+-- start_ignore
+drop table if exists ema_test cascade;
+drop aggregate if exists ema(float, float);
+drop function if exists ema_fin(t ema_type) cascade;
+drop function if exists ema_adv(t ema_type, v float, x float) cascade;
+drop type if exists ema_type cascade;
+-- end_ignore
+---
+--- Test with/without group by
+---
+-- SETUP
+-- start_ignore
+DROP TABLE IF EXISTS r;
+NOTICE:  table "r" does not exist, skipping
+-- end_ignore
+CREATE TABLE r
+(
+    a INT NOT NULL, 
+    b INT, 
+    c CHARACTER VARYING(200),  
+    d NUMERIC(10,0), 
+    e DATE
+) DISTRIBUTED BY (a,b);
+ALTER TABLE r ADD CONSTRAINT PKEY PRIMARY KEY (b);
+NOTICE:  updating distribution policy to match new primary key
+NOTICE:  ALTER TABLE / ADD PRIMARY KEY will create implicit index "r_pkey" for table "r"
+--TEST
+SELECT MAX(a) AS m FROM r GROUP BY b ORDER BY m;
+ m 
+---
+(0 rows)
+SELECT MAX(a) AS m FROM r GROUP BY a ORDER BY m;
+ m 
+---
+(0 rows)
+SELECT MAX(a) AS m FROM r GROUP BY b;
+ m 
+---
+(0 rows)
+-- CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS r;
+-- end_ignore
+---
+--- ORDER BY clause includes some grouping column or not
+---
+-- SETUP
+-- start_ignore
+DROP TABLE IF EXISTS r;
+NOTICE:  table "r" does not exist, skipping
+-- end_ignore
+CREATE TABLE r
+(
+    a INT NOT NULL, 
+    b INT, 
+    c CHARACTER VARYING(200),  
+    d NUMERIC(10,0), 
+    e DATE
+) DISTRIBUTED BY (a,b);
+ALTER TABLE r ADD CONSTRAINT PKEY PRIMARY KEY (b);
+NOTICE:  updating distribution policy to match new primary key
+NOTICE:  ALTER TABLE / ADD PRIMARY KEY will create implicit index "r_pkey" for table "r"
+--TEST
+SELECT MAX(a) AS m FROM R GROUP BY b ORDER BY m,b;
+ m 
+---
+(0 rows)
+SELECT MAX(a) AS m FROM R GROUP BY b,e ORDER BY m,b,e;
+ m 
+---
+(0 rows)
+SELECT MAX(a) AS m FROM R GROUP BY b,e ORDER BY m;
+ m 
+---
+(0 rows)
+-- CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS r;
+-- end_ignore
+---
+--- ORDER BY 1 or more columns
+---
+-- SETUP
+-- start_ignore
+DROP TABLE IF EXISTS r;
+NOTICE:  table "r" does not exist, skipping
+-- end_ignore
+CREATE TABLE r
+(
+    a INT NOT NULL, 
+    b INT, 
+    c CHARACTER VARYING(200),  
+    d NUMERIC(10,0), 
+    e DATE
+) DISTRIBUTED BY (a,b);
+ALTER TABLE r ADD CONSTRAINT PKEY PRIMARY KEY (b);
+NOTICE:  updating distribution policy to match new primary key
+NOTICE:  ALTER TABLE / ADD PRIMARY KEY will create implicit index "r_pkey" for table "r"
+--TEST
+SELECT MAX(a),d,e AS m FROM r GROUP BY b,d,e ORDER BY m,e,d;
+ max | d | m 
+-----+---+---
+(0 rows)
+SELECT MIN(a),d,e AS m FROM r GROUP BY b,e,d ORDER BY e,d;
+ min | d | m 
+-----+---+---
+(0 rows)
+SELECT MAX(a) AS m FROM r GROUP BY b,c,d,e ORDER BY e,d;
+ m 
+---
+(0 rows)
+SELECT MAX(a) AS m FROM r GROUP BY b,e ORDER BY e;
+ m 
+---
+(0 rows)
+SELECT MAX(e) AS m FROM r GROUP BY b ORDER BY m;
+ m 
+---
+(0 rows)
+-- CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS r;
+-- end_ignore
+---
+--- ORDER BY clause includes some grouping column or not
+---
+-- SETUP
+-- start_ignore
+DROP TABLE IF EXISTS dm_calendar;
+NOTICE:  table "dm_calendar" does not exist, skipping
+-- end_ignore
+CREATE TABLE dm_calendar (
+    calendar_id bigint NOT NULL,
+    date_name character varying(200),
+    date_name_cn character varying(200),
+    calendar_date date,
+    current_day numeric(10,0),
+    month_id numeric(10,0),
+    month_name character varying(200),
+    month_name_cn character varying(200),
+    month_name_short character varying(200),
+    month_name_short_cn character varying(200),
+    days_in_month numeric(10,0),
+    first_of_month numeric(10,0),
+    last_month_id numeric(10,0),
+    month_end numeric(10,0),
+    quarter_id numeric(10,0),
+    quarter_name character varying(200),
+    quarter_name_cn character varying(200),
+    quarter_name_short character varying(200),
+    quarter_name_short_cn character varying(200),
+    year_id numeric(10,0),
+    year_name character varying(200),
+    year_name_cn character varying(200),
+    description character varying(500),
+    create_date timestamp without time zone,
+    month_week_num character varying(100),
+    month_week_begin character varying(100),
+    month_week_end character varying(100),
+    half_year character varying(100),
+    weekend_flag character varying(100),
+    holidays_flag character varying(100),
+    workday_flag character varying(100),
+    month_number numeric(10,0)
+) DISTRIBUTED BY (calendar_id);
+ALTER TABLE ONLY dm_calendar ADD CONSTRAINT dm_calendar_pkey PRIMARY KEY (calendar_id);
+NOTICE:  ALTER TABLE / ADD PRIMARY KEY will create implicit index "dm_calendar_pkey" for table "dm_calendar"
+--TEST
+SELECT "year_id" as id , min("year_name") as a  from (select "year_id" as "year_id" , min("year_name") as "year_name" from  "dm_calendar" group by "year_id") "dm_calendar3" group by "year_id" order by a ASC ;
+ id | a 
+----+---
+(0 rows)
+-- CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS dm_calendar;
+-- end_ignore
+---
+--- Test with/without group by with primary key as dist key
+---
+-- SETUP
+-- start_ignore
+drop table if exists t;
+NOTICE:  table "t" does not exist, skipping
+-- end_ignore
+create table t
+(
+    a int NOT NULL,
+    b int,
+    c character varying(200),
+    d numeric(10,0),
+    e date
+) distributed by (b);
+alter table t ADD CONSTRAINT pkey primary key (b);
+NOTICE:  ALTER TABLE / ADD PRIMARY KEY will create implicit index "t_pkey" for table "t"
+-- TEST
+SELECT MAX(a) AS m FROM t GROUP BY b ORDER BY m;
+ m 
+---
+(0 rows)
+-- CLEANUP
+-- start_ignore
+drop table if exists t;
+-- end_ignore
+---
+--- Passing through distribution matching type in default implementation
+---
+-- SETUP
+-- start_ignore
+drop table if exists customer;
+NOTICE:  table "customer" does not exist, skipping
+drop table if exists sale;
+NOTICE:  table "sale" does not exist, skipping
+-- end_ignore
+create table customer
+(
+	cn int not null,
+	cname text not null,
+	cloc text,
+	primary key (cn)
+) distributed by (cn);
+NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "customer_pkey" for table "customer"
+insert into customer values
+  ( 1, 'Macbeth', 'Inverness'),
+  ( 2, 'Duncan', 'Forres'),
+  ( 3, 'Lady Macbeth', 'Inverness'),
+  ( 4, 'Witches, Inc', 'Lonely Heath');
+create table sale
+(
+	cn int not null,
+	vn int not null,
+	pn int not null,
+	dt date not null,
+	qty int not null,
+	prc float not null,
+	primary key (cn, vn, pn)
+) distributed by (cn,vn,pn);
+NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "sale_pkey" for table "sale"
+insert into sale values
+  ( 2, 40, 100, '1401-1-1', 1100, 2400),
+  ( 1, 10, 200, '1401-3-1', 1, 0),
+  ( 3, 40, 200, '1401-4-1', 1, 0),
+  ( 1, 20, 100, '1401-5-1', 1, 0),
+  ( 1, 30, 300, '1401-5-2', 1, 0),
+  ( 1, 50, 400, '1401-6-1', 1, 0),
+  ( 2, 50, 400, '1401-6-1', 1, 0),
+  ( 1, 30, 500, '1401-6-1', 12, 5),
+  ( 3, 30, 500, '1401-6-1', 12, 5),
+  ( 3, 30, 600, '1401-6-1', 12, 5),
+  ( 4, 40, 700, '1401-6-1', 1, 1),
+  ( 4, 40, 800, '1401-6-1', 1, 1);
+-- TEST
+select cname,
+rank() over (partition by sale.cn order by vn)
+from sale, customer
+where sale.cn = customer.cn
+order by 1, 2;
+    cname     | rank 
+--------------+------
+ Duncan       |    1
+ Duncan       |    2
+ Lady Macbeth |    1
+ Lady Macbeth |    1
+ Lady Macbeth |    3
+ Macbeth      |    1
+ Macbeth      |    2
+ Macbeth      |    3
+ Macbeth      |    3
+ Macbeth      |    5
+ Witches, Inc |    1
+ Witches, Inc |    1
+(12 rows)
+-- CLEANUP
+-- start_ignore
+drop table if exists customer;
+drop table if exists sale;
+-- end_ignore
+---
+--- Optimzier query crashing for logical window with no window functions
+---
+-- SETUP
+create table mpp23240(a int, b int, c int, d int, e int, f int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+-- TEST
+select a, b,
+       case 1
+        when 10 then
+          sum(c) over(partition by a)
+        when 20 then
+          sum(d) over(partition by a)
+        else
+          5
+       end as sum1
+from (select * from mpp23240 where f > 10) x;
+ a | b | sum1 
+---+---+------
+(0 rows)
+-- CLEANUP
+-- start_ignore
+drop table mpp23240;
+-- end_ignore
--- a/src/test/regress/expected/bfv_partition.out
+++ b/src/test/regress/expected/bfv_partition.out
--- a/src/test/regress/expected/bfv_partition_optimizer.out
+++ b/src/test/regress/expected/bfv_partition_optimizer.out
--- a/src/test/regress/greenplum_schedule
+++ b/src/test/regress/greenplum_schedule
@@ -72,10 +72,9 @@ test: gp_toolkit
 test: filespace trig auth_constraint role rle portals_updatable plpgsql_cache timeseries resource_queue_function pg_stat_last_operation gp_numeric_agg plan_size partindex_test direct_dispatch partition_pruning_with_fn dsp
-# direct dispatch tests
 test: bfv_dd bfv_dd_multicolumn bfv_dd_types
-test: catalog bfv_catalog bfv_index
+test: catalog bfv_catalog bfv_index bfv_olap bfv_aggregate bfv_partition
 test: aggregate_with_groupingsets gp_optimizer 

--- a/src/test/regress/sql/bfv_aggregate.sql
+++ b/src/test/regress/sql/bfv_aggregate.sql
+---
+--- Window function with outer references in PARTITION BY/ORDER BY clause
+---
+-- SETUP
+-- start_ignore
+DROP TABLE IF EXISTS x_outer;
+DROP TABLE IF EXISTS y_inner;
+-- end_ignore
+create table x_outer (a int, b int, c int);
+create table y_inner (d int, e int);
+insert into x_outer select i%3, i, i from generate_series(1,10) i;
+insert into y_inner select i%3, i from generate_series(1,10) i;
+analyze x_outer;
+analyze y_inner;
+-- TEST
+select * from x_outer where a in (select row_number() over(partition by a) from y_inner) order by 1, 2;
+select * from x_outer where a in (select rank() over(order by a) from y_inner) order by 1, 2;
+select * from x_outer where a not in (select rank() over(order by a) from y_inner) order by 1, 2;
+select * from x_outer where exists (select rank() over(order by a) from y_inner where d = a) order by 1, 2;
+select * from x_outer where not exists (select rank() over(order by a) from y_inner where d = a) order by 1, 2;
+select * from x_outer where a in (select last_value(d) over(partition by b order by e rows between e preceding and e+1 following) from y_inner) order by 1, 2;
+-- CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS x_outer;
+DROP TABLE IF EXISTS y_inner;
+-- end_ignore
+---
+--- Testing aggregation in a query
+---
+-- SETUP
+create table d (col1 timestamp, col2 int);
+insert into d select to_date('2014-01-01', 'YYYY-DD-MM'), generate_series(1,100);
+-- TEST
+select 1, to_char(col1, 'YYYY'), median(col2) from d group by 1, 2;
+-- CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS d;
+-- end_ignore
+---
+--- Testing if aggregate derived window function produces incorrect results
+---
+-- SETUP
+-- start_ignore
+drop table if exists toy;
+drop aggregate mysum1(int4);
+drop aggregate mysum2(int4);
+-- end_ignore
+create table toy(id,val) as select i,i from generate_series(1,5) i;
+create aggregate mysum1(int4) (sfunc = int4_sum, prefunc=int8pl, stype=bigint);
+create aggregate mysum2(int4) (sfunc = int4_sum, stype=bigint);
+-- TEST
+select
+   id, val,
+   sum(val) over (w),
+   mysum1(val) over (w),
+   mysum2(val) over (w)
+from toy
+window w as (order by id rows 2 preceding);
+-- CLEANUP
+-- start_ignore
+drop table if exists toy;
+drop aggregate mysum1(int4);
+drop aggregate mysum2(int4);
+-- end_ignore
+---
+--- Error executing for aggregate with anyarry as return type
+---
+-- SETUP
+CREATE OR REPLACE FUNCTION tfp(anyarray,anyelement) RETURNS anyarray AS
+'select $1 || $2' LANGUAGE SQL;
+CREATE OR REPLACE FUNCTION ffp(anyarray) RETURNS anyarray AS
+'select $1' LANGUAGE SQL;
+CREATE AGGREGATE myaggp20a(BASETYPE = anyelement, SFUNC = tfp,
+  STYPE = anyarray, FINALFUNC = ffp, INITCOND = '{}');
+-- Adding a sql function to sory the array
+CREATE OR REPLACE FUNCTION array_sort (ANYARRAY)
+RETURNS ANYARRAY LANGUAGE SQL
+AS $$
+SELECT ARRAY(SELECT unnest($1) ORDER BY 1)
+$$;
+create temp table t(f1 int, f2 int[], f3 text);
+-- TEST
+insert into t values(1,array[1],'a');
+insert into t values(1,array[11],'b');
+insert into t values(1,array[111],'c');
+insert into t values(2,array[2],'a');
+insert into t values(2,array[22],'b');
+insert into t values(2,array[222],'c');
+insert into t values(3,array[3],'a');
+insert into t values(3,array[3],'b');
+select f3, array_sort(myaggp20a(f1)) from t group by f3 order by f3;
+-- CLEANUP
+-- start_ignore
+drop table if exists t;
+drop function array_sort (ANYARRAY) cascade;
+drop function tfp(anyarray,anyelement) cascade;
+drop function ffp(anyarray) cascade;
+-- end_ignore
+-- start_ignore
+-- start_ignore
+drop language if exists plpythonu;
+create language plpythonu;
+-- end_ignore
+create or replace function count_operator(explain_query text, operator text) returns int as
+$$
+rv = plpy.execute(explain_query)
+search_text = operator
+result = 0
+for i in range(len(rv)):
+    cur_line = rv[i]['QUERY PLAN']
+    if search_text.lower() in cur_line.lower():
+        result = result+1
+return result
+$$
+language plpythonu;
+---
+--- Testing adding a traceflag to favor multi-stage aggregation
+---
+-- SETUP
+-- start_ignore
+DROP TABLE IF EXISTS multi_stage_test;
+-- end_ignore
+create table multi_stage_test(a int, b int);
+insert into multi_stage_test select i, i%4 from generate_series(1,10) i;
+analyze multi_stage_test;
+-- TEST
+-- start_ignore
+set optimizer_segments=2;
+set optimizer_prefer_multistage_agg = on;
+-- end_ignore
+select count_operator('explain select count(*) from multi_stage_test group by b;','GroupAggregate');
+-- start_ignore
+set optimizer_prefer_multistage_agg = off;
+-- end_ignore
+select count_operator('explain select count(*) from multi_stage_test group by b;','GroupAggregate');
+--CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS multi_stage_test;
+reset optimizer_segments;
+set optimizer_prefer_multistage_agg = off;
+-- end_ignore
+---
+--- Testing not picking HashAgg for aggregates without preliminary functions
+---
+-- SETUP
+-- start_ignore
+SET optimizer_disable_missing_stats_collection=on;
+DROP TABLE IF EXISTS attribute_table;
+-- end_ignore
+CREATE TABLE attribute_table (product_id integer, attribute_id integer,attribute text, attribute2 text,attribute_ref_lists text,short_name text,attribute6 text,attribute5 text,measure double precision,unit character varying(60)) DISTRIBUTED BY (product_id ,attribute_id);
+-- create the transition function
+CREATE OR REPLACE FUNCTION do_concat(text,text)
+RETURNS text
+--concatenates 2 strings
+AS 'SELECT CASE WHEN $1 IS NULL THEN $2
+WHEN $2 IS NULL THEN $1
+ELSE $1 || $2 END;'
+     LANGUAGE SQL
+     IMMUTABLE
+     RETURNS NULL ON NULL INPUT;
+-- UDA definition. No PREFUNC exists
+-- start_ignore
+DROP AGGREGATE IF EXISTS concat(text);
+-- end_ignore
+CREATE AGGREGATE concat(text) (
+   --text/string concatenation
+   SFUNC = do_concat, --Function to call for each string that builds the aggregate
+   STYPE = text,--FINALFUNC=final_func, --Function to call after everything has been aggregated
+   INITCOND = '' --Initialize as an empty string when starting
+);
+-- TEST
+-- cook some stats
+-- start_ignore
+set allow_system_table_mods='DML';
+-- end_ignore
+UPDATE pg_class set reltuples=524592::real, relpages=2708::integer where oid = 'attribute_table'::regclass;
+select count_operator('explain select product_id,concat(E''#attribute_''||attribute_id::varchar||E'':''||attribute) as attr FROM attribute_table GROUP BY product_id;','HashAggregate');
+-- CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS attribute_table;
+DROP AGGREGATE IF EXISTS concat(text);
+drop function do_concat(text,text) cascade;
+SET optimizer_disable_missing_stats_collection=off;
+-- end_ignore
+---
+--- Testing fallback to planner when the agg used in window does not have either prelim or inverse prelim function.
+---
+-- SETUP
+-- start_ignore
+DROP TABLE IF EXISTS foo;
+-- end_ignore
+create table foo(a int, b text) distributed by (a);
+-- TEST
+insert into foo values (1,'aaa'), (2,'bbb'), (3,'ccc');
+-- should fall back
+select string_agg(b) over (partition by a) from foo order by 1;
+select string_agg(b) over (partition by a,b) from foo order by 1;
+-- should not fall back
+select max(b) over (partition by a) from foo order by 1;
+select count_operator('explain select max(b) over (partition by a) from foo order by 1;', 'Table Scan');
+-- fall back
+select string_agg(b) over (partition by a+1) from foo order by 1;
+select string_agg(b || 'txt') over (partition by a) from foo order by 1;
+select string_agg(b || 'txt') over (partition by a+1) from foo order by 1;
+-- fall back and planner's plan produces unsupported execution
+select string_agg(b) over (partition by a order by a) from foo order by 1;
+select string_agg(b || 'txt') over (partition by a,b order by a,b) from foo order by 1;
+select '1' || string_agg(b) over (partition by a+1 order by a+1) from foo;
+-- CLEANUP
+-- start_ignore
+drop function count_operator(text,text);
+DROP TABLE IF EXISTS foo;
+drop function if exists count_operator(explain_query text, operator text);
+drop language if exists plpythonu;
+-- end_ignore
--- a/src/test/regress/sql/bfv_olap.sql
+++ b/src/test/regress/sql/bfv_olap.sql
+---
+--- Test case errors out when we define aggregates without preliminary functions and use it as an aggregate derived window function.
+---
+-- SETUP
+-- start_ignore
+drop table if exists toy;
+drop aggregate if exists mysum1(int4);
+drop aggregate if exists mysum2(int4);
+-- end_ignore
+create table toy(id,val) as select i,i from generate_series(1,5) i;
+create aggregate mysum1(int4) (sfunc = int4_sum, prefunc=int8pl, stype=bigint);
+create aggregate mysum2(int4) (sfunc = int4_sum, stype=bigint);
+-- TEST
+select id, val, sum(val) over (w), mysum1(val) over (w), mysum2(val) over (w) from toy window w as (order by id rows 2 preceding);
+-- CLEANUP
+-- start_ignore
+drop aggregate if exists mysum1(int4);
+drop aggregate if exists mysum2(int4);
+drop table if exists toy;
+-- end_ignore
+---
+--- Test case errors out when we define aggregates without preliminary functions and use it as an aggregate derived window function.
+---
+-- SETUP
+-- start_ignore
+drop type if exists ema_type cascade;
+drop function if exists ema_adv(t ema_type, v float, x float) cascade;
+drop function if exists ema_fin(t ema_type) cascade;
+drop aggregate if exists ema(float, float);
+drop table if exists ema_test cascade;
+-- end_ignore
+create type ema_type as (x float, e float);
+create function ema_adv(t ema_type, v float, x float)
+    returns ema_type
+    as $$
+        begin
+            if t.e is null then
+                t.e = v;
+                t.x = x;
+            else
+                if t.x != x then
+                    raise exception 'ema smoothing x may not vary';
+                end if;
+                t.e = t.e + (v - t.e) * t.x;
+            end if;
+            return t;
+        end;
+    $$ language plpgsql;
+create function ema_fin(t ema_type)
+    returns float
+    as $$
+       begin
+           return t.e;
+       end;
+    $$ language plpgsql;
+create aggregate ema(float, float) (
+    sfunc = ema_adv,
+    stype = ema_type,
+    finalfunc = ema_fin,
+    initcond = '(,)');
+create table ema_test (k int, v float ) distributed by (k);
+insert into ema_test select i, 4*random() + 10.0*(1+cos(radians(i*5))) from generate_series(0,19) i(i);
+-- TEST
+select k, v, ema(v, 0.9) over (order by k rows between unbounded preceding and current row) from ema_test order by k;
+-- CLEANUP
+-- start_ignore
+drop table if exists ema_test cascade;
+drop aggregate if exists ema(float, float);
+drop function if exists ema_fin(t ema_type) cascade;
+drop function if exists ema_adv(t ema_type, v float, x float) cascade;
+drop type if exists ema_type cascade;
+-- end_ignore
+---
+--- Test case errors out when we define aggregates without preliminary functions and use it as an aggregate derived window function.
+---
+-- SETUP
+-- start_ignore
+drop type if exists ema_type cascade;
+drop function if exists ema_adv(t ema_type, v float, x float) cascade;
+drop function if exists ema_fin(t ema_type) cascade;
+drop aggregate if exists ema(float, float);
+drop table if exists ema_test cascade;
+-- end_ignore
+create type ema_type as (x float, e float);
+create function ema_adv(t ema_type, v float, x float)
+    returns ema_type
+    as $$
+        begin
+            if t.e is null then
+                t.e = v;
+                t.x = x;
+            else
+                if t.x != x then
+                    raise exception 'ema smoothing x may not vary';
+                end if;
+                t.e = t.e + (v - t.e) * t.x;
+            end if;
+            return t;
+        end;
+    $$ language plpgsql;
+create function ema_fin(t ema_type)
+    returns float
+    as $$
+       begin
+           return t.e;
+       end;
+    $$ language plpgsql;
+create aggregate ema(float, float) (
+    sfunc = ema_adv,
+    stype = ema_type,
+    finalfunc = ema_fin,
+    initcond = '(,)');
+create table ema_test (k int, v float ) distributed by (k);
+insert into ema_test select i, 4*random() + 10.0*(1+cos(radians(i*5))) from generate_series(0,19) i(i);
+-- TEST
+select k, v, ema(v, 0.9) over (order by k) from ema_test order by k;
+-- CLEANUP
+-- start_ignore
+drop table if exists ema_test cascade;
+drop aggregate if exists ema(float, float);
+drop function if exists ema_fin(t ema_type) cascade;
+drop function if exists ema_adv(t ema_type, v float, x float) cascade;
+drop type if exists ema_type cascade;
+-- end_ignore
+---
+--- Test with/without group by
+---
+-- SETUP
+-- start_ignore
+DROP TABLE IF EXISTS r;
+-- end_ignore
+CREATE TABLE r
+(
+    a INT NOT NULL, 
+    b INT, 
+    c CHARACTER VARYING(200),  
+    d NUMERIC(10,0), 
+    e DATE
+) DISTRIBUTED BY (a,b);
+ALTER TABLE r ADD CONSTRAINT PKEY PRIMARY KEY (b);
+--TEST
+SELECT MAX(a) AS m FROM r GROUP BY b ORDER BY m;
+SELECT MAX(a) AS m FROM r GROUP BY a ORDER BY m;
+SELECT MAX(a) AS m FROM r GROUP BY b;
+-- CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS r;
+-- end_ignore
+---
+--- ORDER BY clause includes some grouping column or not
+---
+-- SETUP
+-- start_ignore
+DROP TABLE IF EXISTS r;
+-- end_ignore
+CREATE TABLE r
+(
+    a INT NOT NULL, 
+    b INT, 
+    c CHARACTER VARYING(200),  
+    d NUMERIC(10,0), 
+    e DATE
+) DISTRIBUTED BY (a,b);
+ALTER TABLE r ADD CONSTRAINT PKEY PRIMARY KEY (b);
+--TEST
+SELECT MAX(a) AS m FROM R GROUP BY b ORDER BY m,b;
+SELECT MAX(a) AS m FROM R GROUP BY b,e ORDER BY m,b,e;
+SELECT MAX(a) AS m FROM R GROUP BY b,e ORDER BY m;
+-- CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS r;
+-- end_ignore
+---
+--- ORDER BY 1 or more columns
+---
+-- SETUP
+-- start_ignore
+DROP TABLE IF EXISTS r;
+-- end_ignore
+CREATE TABLE r
+(
+    a INT NOT NULL, 
+    b INT, 
+    c CHARACTER VARYING(200),  
+    d NUMERIC(10,0), 
+    e DATE
+) DISTRIBUTED BY (a,b);
+ALTER TABLE r ADD CONSTRAINT PKEY PRIMARY KEY (b);
+--TEST
+SELECT MAX(a),d,e AS m FROM r GROUP BY b,d,e ORDER BY m,e,d;
+SELECT MIN(a),d,e AS m FROM r GROUP BY b,e,d ORDER BY e,d;
+SELECT MAX(a) AS m FROM r GROUP BY b,c,d,e ORDER BY e,d;
+SELECT MAX(a) AS m FROM r GROUP BY b,e ORDER BY e;
+SELECT MAX(e) AS m FROM r GROUP BY b ORDER BY m;
+-- CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS r;
+-- end_ignore
+---
+--- ORDER BY clause includes some grouping column or not
+---
+-- SETUP
+-- start_ignore
+DROP TABLE IF EXISTS dm_calendar;
+-- end_ignore
+CREATE TABLE dm_calendar (
+    calendar_id bigint NOT NULL,
+    date_name character varying(200),
+    date_name_cn character varying(200),
+    calendar_date date,
+    current_day numeric(10,0),
+    month_id numeric(10,0),
+    month_name character varying(200),
+    month_name_cn character varying(200),
+    month_name_short character varying(200),
+    month_name_short_cn character varying(200),
+    days_in_month numeric(10,0),
+    first_of_month numeric(10,0),
+    last_month_id numeric(10,0),
+    month_end numeric(10,0),
+    quarter_id numeric(10,0),
+    quarter_name character varying(200),
+    quarter_name_cn character varying(200),
+    quarter_name_short character varying(200),
+    quarter_name_short_cn character varying(200),
+    year_id numeric(10,0),
+    year_name character varying(200),
+    year_name_cn character varying(200),
+    description character varying(500),
+    create_date timestamp without time zone,
+    month_week_num character varying(100),
+    month_week_begin character varying(100),
+    month_week_end character varying(100),
+    half_year character varying(100),
+    weekend_flag character varying(100),
+    holidays_flag character varying(100),
+    workday_flag character varying(100),
+    month_number numeric(10,0)
+) DISTRIBUTED BY (calendar_id);
+ALTER TABLE ONLY dm_calendar ADD CONSTRAINT dm_calendar_pkey PRIMARY KEY (calendar_id);
+--TEST
+SELECT "year_id" as id , min("year_name") as a  from (select "year_id" as "year_id" , min("year_name") as "year_name" from  "dm_calendar" group by "year_id") "dm_calendar3" group by "year_id" order by a ASC ;
+-- CLEANUP
+-- start_ignore
+DROP TABLE IF EXISTS dm_calendar;
+-- end_ignore
+---
+--- Test with/without group by with primary key as dist key
+---
+-- SETUP
+-- start_ignore
+drop table if exists t;
+-- end_ignore
+create table t
+(
+    a int NOT NULL,
+    b int,
+    c character varying(200),
+    d numeric(10,0),
+    e date
+) distributed by (b);
+alter table t ADD CONSTRAINT pkey primary key (b);
+-- TEST
+SELECT MAX(a) AS m FROM t GROUP BY b ORDER BY m;
+-- CLEANUP
+-- start_ignore
+drop table if exists t;
+-- end_ignore
+---
+--- Passing through distribution matching type in default implementation
+---
+-- SETUP
+-- start_ignore
+drop table if exists customer;
+drop table if exists sale;
+-- end_ignore
+create table customer
+(
+	cn int not null,
+	cname text not null,
+	cloc text,
+	primary key (cn)
+) distributed by (cn);
+insert into customer values
+  ( 1, 'Macbeth', 'Inverness'),
+  ( 2, 'Duncan', 'Forres'),
+  ( 3, 'Lady Macbeth', 'Inverness'),
+  ( 4, 'Witches, Inc', 'Lonely Heath');
+create table sale
+(
+	cn int not null,
+	vn int not null,
+	pn int not null,
+	dt date not null,
+	qty int not null,
+	prc float not null,
+	primary key (cn, vn, pn)
+) distributed by (cn,vn,pn);
+insert into sale values
+  ( 2, 40, 100, '1401-1-1', 1100, 2400),
+  ( 1, 10, 200, '1401-3-1', 1, 0),
+  ( 3, 40, 200, '1401-4-1', 1, 0),
+  ( 1, 20, 100, '1401-5-1', 1, 0),
+  ( 1, 30, 300, '1401-5-2', 1, 0),
+  ( 1, 50, 400, '1401-6-1', 1, 0),
+  ( 2, 50, 400, '1401-6-1', 1, 0),
+  ( 1, 30, 500, '1401-6-1', 12, 5),
+  ( 3, 30, 500, '1401-6-1', 12, 5),
+  ( 3, 30, 600, '1401-6-1', 12, 5),
+  ( 4, 40, 700, '1401-6-1', 1, 1),
+  ( 4, 40, 800, '1401-6-1', 1, 1);
+-- TEST
+select cname,
+rank() over (partition by sale.cn order by vn)
+from sale, customer
+where sale.cn = customer.cn
+order by 1, 2;
+-- CLEANUP
+-- start_ignore
+drop table if exists customer;
+drop table if exists sale;
+-- end_ignore
+---
+--- Optimzier query crashing for logical window with no window functions
+---
+-- SETUP
+create table mpp23240(a int, b int, c int, d int, e int, f int);
+-- TEST
+select a, b,
+       case 1
+        when 10 then
+          sum(c) over(partition by a)
+        when 20 then
+          sum(d) over(partition by a)
+        else
+          5
+       end as sum1
+from (select * from mpp23240 where f > 10) x;
+-- CLEANUP
+-- start_ignore
+drop table mpp23240;
+-- end_ignore
--- a/src/test/regress/sql/bfv_partition.sql
+++ b/src/test/regress/sql/bfv_partition.sql