--- Window function with outer references in PARTITION BY/ORDER BY clause
---
-- SETUP
-- start_ignore
DROP TABLE IF EXISTS x_outer;
NOTICE: table "x_outer" does not exist, skipping
DROP TABLE IF EXISTS y_inner;
NOTICE: table "y_inner" does not exist, skipping
-- end_ignore
create table x_outer (a int, b int, c int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table y_inner (d int, e int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'd' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into x_outer select i%3, i, i from generate_series(1,10) i;
insert into y_inner select i%3, i from generate_series(1,10) i;
analyze x_outer;
analyze y_inner;
-- TEST
select * from x_outer where a in (select row_number() over(partition by a) from y_inner) order by 1, 2;
a | b | c
---+----+----
1 | 1 | 1
1 | 4 | 4
1 | 7 | 7
1 | 10 | 10
2 | 2 | 2
2 | 5 | 5
2 | 8 | 8
(7 rows)
select * from x_outer where a in (select rank() over(order by a) from y_inner) order by 1, 2;
a | b | c
---+----+----
1 | 1 | 1
1 | 4 | 4
1 | 7 | 7
1 | 10 | 10
(4 rows)
select * from x_outer where a not in (select rank() over(order by a) from y_inner) order by 1, 2;
a | b | c
---+---+---
0 | 3 | 3
0 | 6 | 6
0 | 9 | 9
2 | 2 | 2
2 | 5 | 5
2 | 8 | 8
(6 rows)
select * from x_outer where exists (select rank() over(order by a) from y_inner where d = a) order by 1, 2;
a | b | c
---+----+----
0 | 3 | 3
0 | 6 | 6
0 | 9 | 9
1 | 1 | 1
1 | 4 | 4
1 | 7 | 7
1 | 10 | 10
2 | 2 | 2
2 | 5 | 5
2 | 8 | 8
(10 rows)
select * from x_outer where not exists (select rank() over(order by a) from y_inner where d = a) order by 1, 2;
a | b | c
---+---+---
(0 rows)
select * from x_outer where a in (select last_value(d) over(partition by b order by e rows between e preceding and e+1 following) from y_inner) order by 1, 2;
a | b | c
---+----+----
0 | 3 | 3
0 | 6 | 6
0 | 9 | 9
1 | 1 | 1
1 | 4 | 4
1 | 7 | 7
1 | 10 | 10
2 | 2 | 2
2 | 5 | 5
2 | 8 | 8
(10 rows)
-- CLEANUP
-- start_ignore
DROP TABLE IF EXISTS x_outer;
DROP TABLE IF EXISTS y_inner;
-- end_ignore
---
--- Testing aggregation in a query
---
-- SETUP
create table d (col1 timestamp, col2 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'col1' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into d select to_date('2014-01-01', 'YYYY-DD-MM'), generate_series(1,100);
-- TEST
select 1, to_char(col1, 'YYYY'), median(col2) from d group by 1, 2;
?column? | to_char | median
----------+---------+--------
1 | 2014 | 50.5
(1 row)
-- CLEANUP
-- start_ignore
DROP TABLE IF EXISTS d;
-- end_ignore
---
--- Testing if aggregate derived window function produces incorrect results
---
-- SETUP
-- start_ignore
drop table if exists toy;
NOTICE: table "toy" does not exist, skipping
drop aggregate mysum1(int4);
ERROR: aggregate mysum1(integer) does not exist
drop aggregate mysum2(int4);
ERROR: aggregate mysum2(integer) does not exist
-- end_ignore
create table toy(id,val) as select i,i from generate_series(1,5) i;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
-- TEST
insert into t values(1,array[1],'a');
insert into t values(1,array[11],'b');
insert into t values(1,array[111],'c');
insert into t values(2,array[2],'a');
insert into t values(2,array[22],'b');
insert into t values(2,array[222],'c');
insert into t values(3,array[3],'a');
insert into t values(3,array[3],'b');
select f3, array_sort(myaggp20a(f1)) from t group by f3 order by f3;
f3 | array_sort
----+------------
a | {1,2,3}
b | {1,2,3}
c | {1,2}
(3 rows)
-- CLEANUP
-- start_ignore
drop table if exists t;
drop function array_sort (ANYARRAY) cascade;
drop function tfp(anyarray,anyelement) cascade;
NOTICE: drop cascades to function myaggp20a(anyelement)
drop function ffp(anyarray) cascade;
-- end_ignore
-- start_ignore
-- start_ignore
drop language if exists plpythonu;
create language plpythonu;
-- end_ignore
create or replace function count_operator(explain_query text, operator text) returns int as
$$
rv = plpy.execute(explain_query)
search_text = operator
result = 0
for i in range(len(rv)):
cur_line = rv[i]['QUERY PLAN']
if search_text.lower() in cur_line.lower():
result = result+1
return result
$$
language plpythonu;
---
--- Testing adding a traceflag to favor multi-stage aggregation
---
-- SETUP
-- start_ignore
DROP TABLE IF EXISTS multi_stage_test;
NOTICE: table "multi_stage_test" does not exist, skipping
-- end_ignore
create table multi_stage_test(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into multi_stage_test select i, i%4 from generate_series(1,10) i;
analyze multi_stage_test;
-- TEST
-- start_ignore
set optimizer_segments=2;
set optimizer_prefer_multistage_agg = on;
-- end_ignore
select count_operator('explain select count(*) from multi_stage_test group by b;','GroupAggregate');
count_operator
----------------
0
(1 row)
-- start_ignore
set optimizer_prefer_multistage_agg = off;
-- end_ignore
select count_operator('explain select count(*) from multi_stage_test group by b;','GroupAggregate');
count_operator
----------------
0
(1 row)
--CLEANUP
-- start_ignore
DROP TABLE IF EXISTS multi_stage_test;
reset optimizer_segments;
set optimizer_prefer_multistage_agg = off;
-- end_ignore
---
--- Testing not picking HashAgg for aggregates without preliminary functions
---
-- SETUP
-- start_ignore
SET optimizer_disable_missing_stats_collection=on;
DROP TABLE IF EXISTS attribute_table;
NOTICE: table "attribute_table" does not exist, skipping
NOTICE: aggregate concat(text) does not exist, skipping
-- end_ignore
CREATE AGGREGATE concat(text) (
--text/string concatenation
SFUNC = do_concat, --Function to call for each string that builds the aggregate
STYPE = text,--FINALFUNC=final_func, --Function to call after everything has been aggregated
INITCOND = '' --Initialize as an empty string when starting
);
-- TEST
-- cook some stats
-- start_ignore
set allow_system_table_mods='DML';
-- end_ignore
UPDATE pg_class set reltuples=524592::real, relpages=2708::integer where oid = 'attribute_table'::regclass;
select count_operator('explain select product_id,concat(E''#attribute_''||attribute_id::varchar||E'':''||attribute) as attr FROM attribute_table GROUP BY product_id;','HashAggregate');
count_operator
----------------
0
(1 row)
-- CLEANUP
-- start_ignore
DROP TABLE IF EXISTS attribute_table;
DROP AGGREGATE IF EXISTS concat(text);
drop function do_concat(text,text) cascade;
SET optimizer_disable_missing_stats_collection=off;
-- end_ignore
---
--- Testing fallback to planner when the agg used in window does not have either prelim or inverse prelim function.
---
-- SETUP
-- start_ignore
DROP TABLE IF EXISTS foo;
NOTICE: table "foo" does not exist, skipping
-- end_ignore
create table foo(a int, b text) distributed by (a);
-- TEST
insert into foo values (1,'aaa'), (2,'bbb'), (3,'ccc');
-- should fall back
select string_agg(b) over (partition by a) from foo order by 1;
string_agg
------------
aaa
bbb
ccc
(3 rows)
select string_agg(b) over (partition by a,b) from foo order by 1;
string_agg
------------
aaa
bbb
ccc
(3 rows)
-- should not fall back
select max(b) over (partition by a) from foo order by 1;
max
-----
aaa
bbb
ccc
(3 rows)
select count_operator('explain select max(b) over (partition by a) from foo order by 1;', 'Table Scan');
count_operator
----------------
0
(1 row)
-- fall back
select string_agg(b) over (partition by a+1) from foo order by 1;
string_agg
------------
aaa
bbb
ccc
(3 rows)
select string_agg(b || 'txt') over (partition by a) from foo order by 1;
string_agg
------------
aaatxt
bbbtxt
ccctxt
(3 rows)
select string_agg(b || 'txt') over (partition by a+1) from foo order by 1;
string_agg
------------
aaatxt
bbbtxt
ccctxt
(3 rows)
-- fall back and planner's plan produces unsupported execution
select string_agg(b) over (partition by a order by a) from foo order by 1;
ERROR: aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
select string_agg(b || 'txt') over (partition by a,b order by a,b) from foo order by 1;
ERROR: aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
select '1' || string_agg(b) over (partition by a+1 order by a+1) from foo;
ERROR: aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
-- CLEANUP
-- start_ignore
drop function count_operator(text,text);
DROP TABLE IF EXISTS foo;
drop function if exists count_operator(explain_query text, operator text);
NOTICE: function count_operator(text,text) does not exist, skipping
--- Window function with outer references in PARTITION BY/ORDER BY clause
---
-- SETUP
-- start_ignore
DROP TABLE IF EXISTS x_outer;
NOTICE: table "x_outer" does not exist, skipping
DROP TABLE IF EXISTS y_inner;
NOTICE: table "y_inner" does not exist, skipping
-- end_ignore
create table x_outer (a int, b int, c int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table y_inner (d int, e int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'd' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into x_outer select i%3, i, i from generate_series(1,10) i;
insert into y_inner select i%3, i from generate_series(1,10) i;
analyze x_outer;
analyze y_inner;
-- TEST
select * from x_outer where a in (select row_number() over(partition by a) from y_inner) order by 1, 2;
a | b | c
---+----+----
1 | 1 | 1
1 | 4 | 4
1 | 7 | 7
1 | 10 | 10
2 | 2 | 2
2 | 5 | 5
2 | 8 | 8
(7 rows)
select * from x_outer where a in (select rank() over(order by a) from y_inner) order by 1, 2;
a | b | c
---+----+----
1 | 1 | 1
1 | 4 | 4
1 | 7 | 7
1 | 10 | 10
(4 rows)
select * from x_outer where a not in (select rank() over(order by a) from y_inner) order by 1, 2;
a | b | c
---+---+---
0 | 3 | 3
0 | 6 | 6
0 | 9 | 9
2 | 2 | 2
2 | 5 | 5
2 | 8 | 8
(6 rows)
select * from x_outer where exists (select rank() over(order by a) from y_inner where d = a) order by 1, 2;
a | b | c
---+----+----
0 | 3 | 3
0 | 6 | 6
0 | 9 | 9
1 | 1 | 1
1 | 4 | 4
1 | 7 | 7
1 | 10 | 10
2 | 2 | 2
2 | 5 | 5
2 | 8 | 8
(10 rows)
select * from x_outer where not exists (select rank() over(order by a) from y_inner where d = a) order by 1, 2;
a | b | c
---+---+---
(0 rows)
select * from x_outer where a in (select last_value(d) over(partition by b order by e rows between e preceding and e+1 following) from y_inner) order by 1, 2;
a | b | c
---+----+----
0 | 3 | 3
0 | 6 | 6
0 | 9 | 9
1 | 1 | 1
1 | 4 | 4
1 | 7 | 7
1 | 10 | 10
2 | 2 | 2
2 | 5 | 5
2 | 8 | 8
(10 rows)
-- CLEANUP
-- start_ignore
DROP TABLE IF EXISTS x_outer;
DROP TABLE IF EXISTS y_inner;
-- end_ignore
---
--- Testing aggregation in a query
---
-- SETUP
create table d (col1 timestamp, col2 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'col1' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into d select to_date('2014-01-01', 'YYYY-DD-MM'), generate_series(1,100);
-- TEST
select 1, to_char(col1, 'YYYY'), median(col2) from d group by 1, 2;
?column? | to_char | median
----------+---------+--------
1 | 2014 | 50.5
(1 row)
-- CLEANUP
-- start_ignore
DROP TABLE IF EXISTS d;
-- end_ignore
---
--- Testing if aggregate derived window function produces incorrect results
---
-- SETUP
-- start_ignore
drop table if exists toy;
NOTICE: table "toy" does not exist, skipping
drop aggregate mysum1(int4);
ERROR: aggregate mysum1(integer) does not exist
drop aggregate mysum2(int4);
ERROR: aggregate mysum2(integer) does not exist
-- end_ignore
create table toy(id,val) as select i,i from generate_series(1,5) i;
NOTICE: Table doesn't have 'distributed by' clause. Creating a NULL policy entry.
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
-- TEST
insert into t values(1,array[1],'a');
insert into t values(1,array[11],'b');
insert into t values(1,array[111],'c');
insert into t values(2,array[2],'a');
insert into t values(2,array[22],'b');
insert into t values(2,array[222],'c');
insert into t values(3,array[3],'a');
insert into t values(3,array[3],'b');
select f3, array_sort(myaggp20a(f1)) from t group by f3 order by f3;
f3 | array_sort
----+------------
a | {1,2,3}
b | {1,2,3}
c | {1,2}
(3 rows)
-- CLEANUP
-- start_ignore
drop table if exists t;
drop function array_sort (ANYARRAY) cascade;
drop function tfp(anyarray,anyelement) cascade;
NOTICE: drop cascades to function myaggp20a(anyelement)
drop function ffp(anyarray) cascade;
-- end_ignore
-- start_ignore
-- start_ignore
drop language if exists plpythonu;
create language plpythonu;
-- end_ignore
create or replace function count_operator(explain_query text, operator text) returns int as
$$
rv = plpy.execute(explain_query)
search_text = operator
result = 0
for i in range(len(rv)):
cur_line = rv[i]['QUERY PLAN']
if search_text.lower() in cur_line.lower():
result = result+1
return result
$$
language plpythonu;
---
--- Testing adding a traceflag to favor multi-stage aggregation
---
-- SETUP
-- start_ignore
DROP TABLE IF EXISTS multi_stage_test;
NOTICE: table "multi_stage_test" does not exist, skipping
-- end_ignore
create table multi_stage_test(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into multi_stage_test select i, i%4 from generate_series(1,10) i;
analyze multi_stage_test;
-- TEST
-- start_ignore
set optimizer_segments=2;
set optimizer_prefer_multistage_agg = on;
-- end_ignore
select count_operator('explain select count(*) from multi_stage_test group by b;','GroupAggregate');
count_operator
----------------
2
(1 row)
-- start_ignore
set optimizer_prefer_multistage_agg = off;
-- end_ignore
select count_operator('explain select count(*) from multi_stage_test group by b;','GroupAggregate');
count_operator
----------------
1
(1 row)
--CLEANUP
-- start_ignore
DROP TABLE IF EXISTS multi_stage_test;
reset optimizer_segments;
set optimizer_prefer_multistage_agg = off;
-- end_ignore
---
--- Testing not picking HashAgg for aggregates without preliminary functions
---
-- SETUP
-- start_ignore
SET optimizer_disable_missing_stats_collection=on;
DROP TABLE IF EXISTS attribute_table;
NOTICE: table "attribute_table" does not exist, skipping
NOTICE: aggregate concat(text) does not exist, skipping
-- end_ignore
CREATE AGGREGATE concat(text) (
--text/string concatenation
SFUNC = do_concat, --Function to call for each string that builds the aggregate
STYPE = text,--FINALFUNC=final_func, --Function to call after everything has been aggregated
INITCOND = '' --Initialize as an empty string when starting
);
-- TEST
-- cook some stats
-- start_ignore
set allow_system_table_mods='DML';
-- end_ignore
UPDATE pg_class set reltuples=524592::real, relpages=2708::integer where oid = 'attribute_table'::regclass;
select count_operator('explain select product_id,concat(E''#attribute_''||attribute_id::varchar||E'':''||attribute) as attr FROM attribute_table GROUP BY product_id;','HashAggregate');
count_operator
----------------
0
(1 row)
-- CLEANUP
-- start_ignore
DROP TABLE IF EXISTS attribute_table;
DROP AGGREGATE IF EXISTS concat(text);
drop function do_concat(text,text) cascade;
SET optimizer_disable_missing_stats_collection=off;
-- end_ignore
---
--- Testing fallback to planner when the agg used in window does not have either prelim or inverse prelim function.
---
-- SETUP
-- start_ignore
DROP TABLE IF EXISTS foo;
NOTICE: table "foo" does not exist, skipping
-- end_ignore
create table foo(a int, b text) distributed by (a);
-- TEST
insert into foo values (1,'aaa'), (2,'bbb'), (3,'ccc');
-- should fall back
select string_agg(b) over (partition by a) from foo order by 1;
string_agg
------------
aaa
bbb
ccc
(3 rows)
select string_agg(b) over (partition by a,b) from foo order by 1;
string_agg
------------
aaa
bbb
ccc
(3 rows)
-- should not fall back
select max(b) over (partition by a) from foo order by 1;
max
-----
aaa
bbb
ccc
(3 rows)
select count_operator('explain select max(b) over (partition by a) from foo order by 1;', 'Table Scan');
count_operator
----------------
1
(1 row)
-- fall back
select string_agg(b) over (partition by a+1) from foo order by 1;
string_agg
------------
aaa
bbb
ccc
(3 rows)
select string_agg(b || 'txt') over (partition by a) from foo order by 1;
string_agg
------------
aaatxt
bbbtxt
ccctxt
(3 rows)
select string_agg(b || 'txt') over (partition by a+1) from foo order by 1;
string_agg
------------
aaatxt
bbbtxt
ccctxt
(3 rows)
-- fall back and planner's plan produces unsupported execution
select string_agg(b) over (partition by a order by a) from foo order by 1;
ERROR: aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
select string_agg(b || 'txt') over (partition by a,b order by a,b) from foo order by 1;
ERROR: aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
select '1' || string_agg(b) over (partition by a+1 order by a+1) from foo;
ERROR: aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
-- CLEANUP
-- start_ignore
drop function count_operator(text,text);
DROP TABLE IF EXISTS foo;
drop function if exists count_operator(explain_query text, operator text);
NOTICE: function count_operator(text,text) does not exist, skipping
--- Test case errors out when we define aggregates without preliminary functions and use it as an aggregate derived window function.
---
-- SETUP
-- start_ignore
drop table if exists toy;
NOTICE: table "toy" does not exist, skipping
drop aggregate if exists mysum1(int4);
NOTICE: aggregate mysum1(int4) does not exist, skipping
drop aggregate if exists mysum2(int4);
NOTICE: aggregate mysum2(int4) does not exist, skipping
-- end_ignore
create table toy(id,val) as select i,i from generate_series(1,5) i;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
select id, val, sum(val) over (w), mysum1(val) over (w), mysum2(val) over (w) from toy window w as (order by id rows 2 preceding);
ERROR: aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
-- CLEANUP
-- start_ignore
drop aggregate if exists mysum1(int4);
drop aggregate if exists mysum2(int4);
drop table if exists toy;
-- end_ignore
---
--- Test case errors out when we define aggregates without preliminary functions and use it as an aggregate derived window function.
---
-- SETUP
-- start_ignore
drop type if exists ema_type cascade;
NOTICE: type "ema_type" does not exist, skipping
drop function if exists ema_adv(t ema_type, v float, x float) cascade;
ERROR: type "ema_type" does not exist
drop function if exists ema_fin(t ema_type) cascade;
ERROR: type "ema_type" does not exist
drop aggregate if exists ema(float, float);
NOTICE: aggregate ema(pg_catalog.float8,pg_catalog.float8) does not exist, skipping
drop table if exists ema_test cascade;
NOTICE: table "ema_test" does not exist, skipping
-- end_ignore
create type ema_type as (x float, e float);
create function ema_adv(t ema_type, v float, x float)
returns ema_type
as $$
begin
if t.e is null then
t.e = v;
t.x = x;
else
if t.x != x then
raise exception 'ema smoothing x may not vary';
end if;
t.e = t.e + (v - t.e) * t.x;
end if;
return t;
end;
$$ language plpgsql;
create function ema_fin(t ema_type)
returns float
as $$
begin
return t.e;
end;
$$ language plpgsql;
create aggregate ema(float, float) (
sfunc = ema_adv,
stype = ema_type,
finalfunc = ema_fin,
initcond = '(,)');
create table ema_test (k int, v float ) distributed by (k);
insert into ema_test select i, 4*random() + 10.0*(1+cos(radians(i*5))) from generate_series(0,19) i(i);
-- TEST
select k, v, ema(v, 0.9) over (order by k rows between unbounded preceding and current row) from ema_test order by k;
ERROR: aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
-- CLEANUP
-- start_ignore
drop table if exists ema_test cascade;
drop aggregate if exists ema(float, float);
drop function if exists ema_fin(t ema_type) cascade;
drop function if exists ema_adv(t ema_type, v float, x float) cascade;
drop type if exists ema_type cascade;
-- end_ignore
---
--- Test case errors out when we define aggregates without preliminary functions and use it as an aggregate derived window function.
---
-- SETUP
-- start_ignore
drop type if exists ema_type cascade;
NOTICE: type "ema_type" does not exist, skipping
drop function if exists ema_adv(t ema_type, v float, x float) cascade;
ERROR: type "ema_type" does not exist
drop function if exists ema_fin(t ema_type) cascade;
ERROR: type "ema_type" does not exist
drop aggregate if exists ema(float, float);
NOTICE: aggregate ema(pg_catalog.float8,pg_catalog.float8) does not exist, skipping
drop table if exists ema_test cascade;
NOTICE: table "ema_test" does not exist, skipping
-- end_ignore
create type ema_type as (x float, e float);
create function ema_adv(t ema_type, v float, x float)
returns ema_type
as $$
begin
if t.e is null then
t.e = v;
t.x = x;
else
if t.x != x then
raise exception 'ema smoothing x may not vary';
end if;
t.e = t.e + (v - t.e) * t.x;
end if;
return t;
end;
$$ language plpgsql;
create function ema_fin(t ema_type)
returns float
as $$
begin
return t.e;
end;
$$ language plpgsql;
create aggregate ema(float, float) (
sfunc = ema_adv,
stype = ema_type,
finalfunc = ema_fin,
initcond = '(,)');
create table ema_test (k int, v float ) distributed by (k);
insert into ema_test select i, 4*random() + 10.0*(1+cos(radians(i*5))) from generate_series(0,19) i(i);
-- TEST
select k, v, ema(v, 0.9) over (order by k) from ema_test order by k;
ERROR: aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
-- CLEANUP
-- start_ignore
drop table if exists ema_test cascade;
drop aggregate if exists ema(float, float);
drop function if exists ema_fin(t ema_type) cascade;
drop function if exists ema_adv(t ema_type, v float, x float) cascade;
drop type if exists ema_type cascade;
-- end_ignore
---
--- Test with/without group by
---
-- SETUP
-- start_ignore
DROP TABLE IF EXISTS r;
NOTICE: table "r" does not exist, skipping
-- end_ignore
CREATE TABLE r
(
a INT NOT NULL,
b INT,
c CHARACTER VARYING(200),
d NUMERIC(10,0),
e DATE
) DISTRIBUTED BY (a,b);
ALTER TABLE r ADD CONSTRAINT PKEY PRIMARY KEY (b);
NOTICE: updating distribution policy to match new primary key
NOTICE: ALTER TABLE / ADD PRIMARY KEY will create implicit index "r_pkey" for table "r"
--TEST
SELECT MAX(a) AS m FROM r GROUP BY b ORDER BY m;
m
---
(0 rows)
SELECT MAX(a) AS m FROM r GROUP BY a ORDER BY m;
m
---
(0 rows)
SELECT MAX(a) AS m FROM r GROUP BY b;
m
---
(0 rows)
-- CLEANUP
-- start_ignore
DROP TABLE IF EXISTS r;
-- end_ignore
---
--- ORDER BY clause includes some grouping column or not
---
-- SETUP
-- start_ignore
DROP TABLE IF EXISTS r;
NOTICE: table "r" does not exist, skipping
-- end_ignore
CREATE TABLE r
(
a INT NOT NULL,
b INT,
c CHARACTER VARYING(200),
d NUMERIC(10,0),
e DATE
) DISTRIBUTED BY (a,b);
ALTER TABLE r ADD CONSTRAINT PKEY PRIMARY KEY (b);
NOTICE: updating distribution policy to match new primary key
NOTICE: ALTER TABLE / ADD PRIMARY KEY will create implicit index "r_pkey" for table "r"
--TEST
SELECT MAX(a) AS m FROM R GROUP BY b ORDER BY m,b;
m
---
(0 rows)
SELECT MAX(a) AS m FROM R GROUP BY b,e ORDER BY m,b,e;
m
---
(0 rows)
SELECT MAX(a) AS m FROM R GROUP BY b,e ORDER BY m;
m
---
(0 rows)
-- CLEANUP
-- start_ignore
DROP TABLE IF EXISTS r;
-- end_ignore
---
--- ORDER BY 1 or more columns
---
-- SETUP
-- start_ignore
DROP TABLE IF EXISTS r;
NOTICE: table "r" does not exist, skipping
-- end_ignore
CREATE TABLE r
(
a INT NOT NULL,
b INT,
c CHARACTER VARYING(200),
d NUMERIC(10,0),
e DATE
) DISTRIBUTED BY (a,b);
ALTER TABLE r ADD CONSTRAINT PKEY PRIMARY KEY (b);
NOTICE: updating distribution policy to match new primary key
NOTICE: ALTER TABLE / ADD PRIMARY KEY will create implicit index "r_pkey" for table "r"
--TEST
SELECT MAX(a),d,e AS m FROM r GROUP BY b,d,e ORDER BY m,e,d;
max | d | m
-----+---+---
(0 rows)
SELECT MIN(a),d,e AS m FROM r GROUP BY b,e,d ORDER BY e,d;
min | d | m
-----+---+---
(0 rows)
SELECT MAX(a) AS m FROM r GROUP BY b,c,d,e ORDER BY e,d;
m
---
(0 rows)
SELECT MAX(a) AS m FROM r GROUP BY b,e ORDER BY e;
m
---
(0 rows)
SELECT MAX(e) AS m FROM r GROUP BY b ORDER BY m;
m
---
(0 rows)
-- CLEANUP
-- start_ignore
DROP TABLE IF EXISTS r;
-- end_ignore
---
--- ORDER BY clause includes some grouping column or not
---
-- SETUP
-- start_ignore
DROP TABLE IF EXISTS dm_calendar;
NOTICE: table "dm_calendar" does not exist, skipping
-- end_ignore
CREATE TABLE dm_calendar (
calendar_id bigint NOT NULL,
date_name character varying(200),
date_name_cn character varying(200),
calendar_date date,
current_day numeric(10,0),
month_id numeric(10,0),
month_name character varying(200),
month_name_cn character varying(200),
month_name_short character varying(200),
month_name_short_cn character varying(200),
days_in_month numeric(10,0),
first_of_month numeric(10,0),
last_month_id numeric(10,0),
month_end numeric(10,0),
quarter_id numeric(10,0),
quarter_name character varying(200),
quarter_name_cn character varying(200),
quarter_name_short character varying(200),
quarter_name_short_cn character varying(200),
year_id numeric(10,0),
year_name character varying(200),
year_name_cn character varying(200),
description character varying(500),
create_date timestamp without time zone,
month_week_num character varying(100),
month_week_begin character varying(100),
month_week_end character varying(100),
half_year character varying(100),
weekend_flag character varying(100),
holidays_flag character varying(100),
workday_flag character varying(100),
month_number numeric(10,0)
) DISTRIBUTED BY (calendar_id);
ALTER TABLE ONLY dm_calendar ADD CONSTRAINT dm_calendar_pkey PRIMARY KEY (calendar_id);
NOTICE: ALTER TABLE / ADD PRIMARY KEY will create implicit index "dm_calendar_pkey" for table "dm_calendar"
--TEST
SELECT "year_id" as id , min("year_name") as a from (select "year_id" as "year_id" , min("year_name") as "year_name" from "dm_calendar" group by "year_id") "dm_calendar3" group by "year_id" order by a ASC ;
id | a
----+---
(0 rows)
-- CLEANUP
-- start_ignore
DROP TABLE IF EXISTS dm_calendar;
-- end_ignore
---
--- Test with/without group by with primary key as dist key
---
-- SETUP
-- start_ignore
drop table if exists t;
NOTICE: table "t" does not exist, skipping
-- end_ignore
create table t
(
a int NOT NULL,
b int,
c character varying(200),
d numeric(10,0),
e date
) distributed by (b);
alter table t ADD CONSTRAINT pkey primary key (b);
NOTICE: ALTER TABLE / ADD PRIMARY KEY will create implicit index "t_pkey" for table "t"
-- TEST
SELECT MAX(a) AS m FROM t GROUP BY b ORDER BY m;
m
---
(0 rows)
-- CLEANUP
-- start_ignore
drop table if exists t;
-- end_ignore
---
--- Passing through distribution matching type in default implementation
---
-- SETUP
-- start_ignore
drop table if exists customer;
NOTICE: table "customer" does not exist, skipping
drop table if exists sale;
NOTICE: table "sale" does not exist, skipping
-- end_ignore
create table customer
(
cn int not null,
cname text not null,
cloc text,
primary key (cn)
) distributed by (cn);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "customer_pkey" for table "customer"
insert into customer values
( 1, 'Macbeth', 'Inverness'),
( 2, 'Duncan', 'Forres'),
( 3, 'Lady Macbeth', 'Inverness'),
( 4, 'Witches, Inc', 'Lonely Heath');
create table sale
(
cn int not null,
vn int not null,
pn int not null,
dt date not null,
qty int not null,
prc float not null,
primary key (cn, vn, pn)
) distributed by (cn,vn,pn);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "sale_pkey" for table "sale"
insert into sale values
( 2, 40, 100, '1401-1-1', 1100, 2400),
( 1, 10, 200, '1401-3-1', 1, 0),
( 3, 40, 200, '1401-4-1', 1, 0),
( 1, 20, 100, '1401-5-1', 1, 0),
( 1, 30, 300, '1401-5-2', 1, 0),
( 1, 50, 400, '1401-6-1', 1, 0),
( 2, 50, 400, '1401-6-1', 1, 0),
( 1, 30, 500, '1401-6-1', 12, 5),
( 3, 30, 500, '1401-6-1', 12, 5),
( 3, 30, 600, '1401-6-1', 12, 5),
( 4, 40, 700, '1401-6-1', 1, 1),
( 4, 40, 800, '1401-6-1', 1, 1);
-- TEST
select cname,
rank() over (partition by sale.cn order by vn)
from sale, customer
where sale.cn = customer.cn
order by 1, 2;
cname | rank
--------------+------
Duncan | 1
Duncan | 2
Lady Macbeth | 1
Lady Macbeth | 1
Lady Macbeth | 3
Macbeth | 1
Macbeth | 2
Macbeth | 3
Macbeth | 3
Macbeth | 5
Witches, Inc | 1
Witches, Inc | 1
(12 rows)
-- CLEANUP
-- start_ignore
drop table if exists customer;
drop table if exists sale;
-- end_ignore
---
--- Optimzier query crashing for logical window with no window functions
---
-- SETUP
create table mpp23240(a int, b int, c int, d int, e int, f int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
select id, val, sum(val) over (w), mysum1(val) over (w), mysum2(val) over (w) from toy window w as (order by id rows 2 preceding);
ERROR: aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
-- CLEANUP
-- start_ignore
drop aggregate if exists mysum1(int4);
drop aggregate if exists mysum2(int4);
drop table if exists toy;
-- end_ignore
---
--- Test case errors out when we define aggregates without preliminary functions and use it as an aggregate derived window function.
---
-- SETUP
-- start_ignore
drop type if exists ema_type cascade;
NOTICE: type "ema_type" does not exist, skipping
drop function if exists ema_adv(t ema_type, v float, x float) cascade;
ERROR: type "ema_type" does not exist
drop function if exists ema_fin(t ema_type) cascade;
ERROR: type "ema_type" does not exist
drop aggregate if exists ema(float, float);
NOTICE: aggregate ema(pg_catalog.float8,pg_catalog.float8) does not exist, skipping
drop table if exists ema_test cascade;
NOTICE: table "ema_test" does not exist, skipping
-- end_ignore
create type ema_type as (x float, e float);
create function ema_adv(t ema_type, v float, x float)
returns ema_type
as $$
begin
if t.e is null then
t.e = v;
t.x = x;
else
if t.x != x then
raise exception 'ema smoothing x may not vary';
end if;
t.e = t.e + (v - t.e) * t.x;
end if;
return t;
end;
$$ language plpgsql;
create function ema_fin(t ema_type)
returns float
as $$
begin
return t.e;
end;
$$ language plpgsql;
create aggregate ema(float, float) (
sfunc = ema_adv,
stype = ema_type,
finalfunc = ema_fin,
initcond = '(,)');
create table ema_test (k int, v float ) distributed by (k);
insert into ema_test select i, 4*random() + 10.0*(1+cos(radians(i*5))) from generate_series(0,19) i(i);
-- TEST
select k, v, ema(v, 0.9) over (order by k rows between unbounded preceding and current row) from ema_test order by k;
ERROR: aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
-- CLEANUP
-- start_ignore
drop table if exists ema_test cascade;
drop aggregate if exists ema(float, float);
drop function if exists ema_fin(t ema_type) cascade;
drop function if exists ema_adv(t ema_type, v float, x float) cascade;
drop type if exists ema_type cascade;
-- end_ignore
---
--- Test case errors out when we define aggregates without preliminary functions and use it as an aggregate derived window function.
---
-- SETUP
-- start_ignore
drop type if exists ema_type cascade;
NOTICE: type "ema_type" does not exist, skipping
drop function if exists ema_adv(t ema_type, v float, x float) cascade;
ERROR: type "ema_type" does not exist
drop function if exists ema_fin(t ema_type) cascade;
ERROR: type "ema_type" does not exist
drop aggregate if exists ema(float, float);
NOTICE: aggregate ema(pg_catalog.float8,pg_catalog.float8) does not exist, skipping
drop table if exists ema_test cascade;
NOTICE: table "ema_test" does not exist, skipping
-- end_ignore
create type ema_type as (x float, e float);
create function ema_adv(t ema_type, v float, x float)
returns ema_type
as $$
begin
if t.e is null then
t.e = v;
t.x = x;
else
if t.x != x then
raise exception 'ema smoothing x may not vary';
end if;
t.e = t.e + (v - t.e) * t.x;
end if;
return t;
end;
$$ language plpgsql;
create function ema_fin(t ema_type)
returns float
as $$
begin
return t.e;
end;
$$ language plpgsql;
create aggregate ema(float, float) (
sfunc = ema_adv,
stype = ema_type,
finalfunc = ema_fin,
initcond = '(,)');
create table ema_test (k int, v float ) distributed by (k);
insert into ema_test select i, 4*random() + 10.0*(1+cos(radians(i*5))) from generate_series(0,19) i(i);
-- TEST
select k, v, ema(v, 0.9) over (order by k) from ema_test order by k;
ERROR: aggregate functions with no prelimfn or invprelimfn are not yet supported as window functions
-- CLEANUP
-- start_ignore
drop table if exists ema_test cascade;
drop aggregate if exists ema(float, float);
drop function if exists ema_fin(t ema_type) cascade;
drop function if exists ema_adv(t ema_type, v float, x float) cascade;
drop type if exists ema_type cascade;
-- end_ignore
---
--- Test with/without group by
---
-- SETUP
-- start_ignore
DROP TABLE IF EXISTS r;
NOTICE: table "r" does not exist, skipping
-- end_ignore
CREATE TABLE r
(
a INT NOT NULL,
b INT,
c CHARACTER VARYING(200),
d NUMERIC(10,0),
e DATE
) DISTRIBUTED BY (a,b);
ALTER TABLE r ADD CONSTRAINT PKEY PRIMARY KEY (b);
NOTICE: updating distribution policy to match new primary key
NOTICE: ALTER TABLE / ADD PRIMARY KEY will create implicit index "r_pkey" for table "r"
--TEST
SELECT MAX(a) AS m FROM r GROUP BY b ORDER BY m;
m
---
(0 rows)
SELECT MAX(a) AS m FROM r GROUP BY a ORDER BY m;
m
---
(0 rows)
SELECT MAX(a) AS m FROM r GROUP BY b;
m
---
(0 rows)
-- CLEANUP
-- start_ignore
DROP TABLE IF EXISTS r;
-- end_ignore
---
--- ORDER BY clause includes some grouping column or not
---
-- SETUP
-- start_ignore
DROP TABLE IF EXISTS r;
NOTICE: table "r" does not exist, skipping
-- end_ignore
CREATE TABLE r
(
a INT NOT NULL,
b INT,
c CHARACTER VARYING(200),
d NUMERIC(10,0),
e DATE
) DISTRIBUTED BY (a,b);
ALTER TABLE r ADD CONSTRAINT PKEY PRIMARY KEY (b);
NOTICE: updating distribution policy to match new primary key
NOTICE: ALTER TABLE / ADD PRIMARY KEY will create implicit index "r_pkey" for table "r"
--TEST
SELECT MAX(a) AS m FROM R GROUP BY b ORDER BY m,b;
m
---
(0 rows)
SELECT MAX(a) AS m FROM R GROUP BY b,e ORDER BY m,b,e;
m
---
(0 rows)
SELECT MAX(a) AS m FROM R GROUP BY b,e ORDER BY m;
m
---
(0 rows)
-- CLEANUP
-- start_ignore
DROP TABLE IF EXISTS r;
-- end_ignore
---
--- ORDER BY 1 or more columns
---
-- SETUP
-- start_ignore
DROP TABLE IF EXISTS r;
NOTICE: table "r" does not exist, skipping
-- end_ignore
CREATE TABLE r
(
a INT NOT NULL,
b INT,
c CHARACTER VARYING(200),
d NUMERIC(10,0),
e DATE
) DISTRIBUTED BY (a,b);
ALTER TABLE r ADD CONSTRAINT PKEY PRIMARY KEY (b);
NOTICE: updating distribution policy to match new primary key
NOTICE: ALTER TABLE / ADD PRIMARY KEY will create implicit index "r_pkey" for table "r"
--TEST
SELECT MAX(a),d,e AS m FROM r GROUP BY b,d,e ORDER BY m,e,d;
max | d | m
-----+---+---
(0 rows)
SELECT MIN(a),d,e AS m FROM r GROUP BY b,e,d ORDER BY e,d;
min | d | m
-----+---+---
(0 rows)
SELECT MAX(a) AS m FROM r GROUP BY b,c,d,e ORDER BY e,d;
m
---
(0 rows)
SELECT MAX(a) AS m FROM r GROUP BY b,e ORDER BY e;
m
---
(0 rows)
SELECT MAX(e) AS m FROM r GROUP BY b ORDER BY m;
m
---
(0 rows)
-- CLEANUP
-- start_ignore
DROP TABLE IF EXISTS r;
-- end_ignore
---
--- ORDER BY clause includes some grouping column or not
---
-- SETUP
-- start_ignore
DROP TABLE IF EXISTS dm_calendar;
NOTICE: table "dm_calendar" does not exist, skipping
-- end_ignore
CREATE TABLE dm_calendar (
calendar_id bigint NOT NULL,
date_name character varying(200),
date_name_cn character varying(200),
calendar_date date,
current_day numeric(10,0),
month_id numeric(10,0),
month_name character varying(200),
month_name_cn character varying(200),
month_name_short character varying(200),
month_name_short_cn character varying(200),
days_in_month numeric(10,0),
first_of_month numeric(10,0),
last_month_id numeric(10,0),
month_end numeric(10,0),
quarter_id numeric(10,0),
quarter_name character varying(200),
quarter_name_cn character varying(200),
quarter_name_short character varying(200),
quarter_name_short_cn character varying(200),
year_id numeric(10,0),
year_name character varying(200),
year_name_cn character varying(200),
description character varying(500),
create_date timestamp without time zone,
month_week_num character varying(100),
month_week_begin character varying(100),
month_week_end character varying(100),
half_year character varying(100),
weekend_flag character varying(100),
holidays_flag character varying(100),
workday_flag character varying(100),
month_number numeric(10,0)
) DISTRIBUTED BY (calendar_id);
ALTER TABLE ONLY dm_calendar ADD CONSTRAINT dm_calendar_pkey PRIMARY KEY (calendar_id);
NOTICE: ALTER TABLE / ADD PRIMARY KEY will create implicit index "dm_calendar_pkey" for table "dm_calendar"
--TEST
SELECT "year_id" as id , min("year_name") as a from (select "year_id" as "year_id" , min("year_name") as "year_name" from "dm_calendar" group by "year_id") "dm_calendar3" group by "year_id" order by a ASC ;
id | a
----+---
(0 rows)
-- CLEANUP
-- start_ignore
DROP TABLE IF EXISTS dm_calendar;
-- end_ignore
---
--- Test with/without group by with primary key as dist key
---
-- SETUP
-- start_ignore
drop table if exists t;
NOTICE: table "t" does not exist, skipping
-- end_ignore
create table t
(
a int NOT NULL,
b int,
c character varying(200),
d numeric(10,0),
e date
) distributed by (b);
alter table t ADD CONSTRAINT pkey primary key (b);
NOTICE: ALTER TABLE / ADD PRIMARY KEY will create implicit index "t_pkey" for table "t"
-- TEST
SELECT MAX(a) AS m FROM t GROUP BY b ORDER BY m;
m
---
(0 rows)
-- CLEANUP
-- start_ignore
drop table if exists t;
-- end_ignore
---
--- Passing through distribution matching type in default implementation
---
-- SETUP
-- start_ignore
drop table if exists customer;
NOTICE: table "customer" does not exist, skipping
drop table if exists sale;
NOTICE: table "sale" does not exist, skipping
-- end_ignore
create table customer
(
cn int not null,
cname text not null,
cloc text,
primary key (cn)
) distributed by (cn);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "customer_pkey" for table "customer"
insert into customer values
( 1, 'Macbeth', 'Inverness'),
( 2, 'Duncan', 'Forres'),
( 3, 'Lady Macbeth', 'Inverness'),
( 4, 'Witches, Inc', 'Lonely Heath');
create table sale
(
cn int not null,
vn int not null,
pn int not null,
dt date not null,
qty int not null,
prc float not null,
primary key (cn, vn, pn)
) distributed by (cn,vn,pn);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "sale_pkey" for table "sale"
insert into sale values
( 2, 40, 100, '1401-1-1', 1100, 2400),
( 1, 10, 200, '1401-3-1', 1, 0),
( 3, 40, 200, '1401-4-1', 1, 0),
( 1, 20, 100, '1401-5-1', 1, 0),
( 1, 30, 300, '1401-5-2', 1, 0),
( 1, 50, 400, '1401-6-1', 1, 0),
( 2, 50, 400, '1401-6-1', 1, 0),
( 1, 30, 500, '1401-6-1', 12, 5),
( 3, 30, 500, '1401-6-1', 12, 5),
( 3, 30, 600, '1401-6-1', 12, 5),
( 4, 40, 700, '1401-6-1', 1, 1),
( 4, 40, 800, '1401-6-1', 1, 1);
-- TEST
select cname,
rank() over (partition by sale.cn order by vn)
from sale, customer
where sale.cn = customer.cn
order by 1, 2;
cname | rank
--------------+------
Duncan | 1
Duncan | 2
Lady Macbeth | 1
Lady Macbeth | 1
Lady Macbeth | 3
Macbeth | 1
Macbeth | 2
Macbeth | 3
Macbeth | 3
Macbeth | 5
Witches, Inc | 1
Witches, Inc | 1
(12 rows)
-- CLEANUP
-- start_ignore
drop table if exists customer;
drop table if exists sale;
-- end_ignore
---
--- Optimzier query crashing for logical window with no window functions
---
-- SETUP
create table mpp23240(a int, b int, c int, d int, e int, f int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
selectcount_operator('explain select product_id,concat(E''#attribute_''||attribute_id::varchar||E'':''||attribute) as attr FROM attribute_table GROUP BY product_id;','HashAggregate');