未验证 提交 e52dd032 编写于 作者: H Heikki Linnakangas 提交者: GitHub

Only apply transformGroupedWindows() with ORCA. (#10306)

* Only apply transformGroupedWindows() with ORCA.

The Postgres planner doesn't need it. Move the code to do it, so that it's
only used before passing a tree to ORCA. This doesn't change anything with
ORCA, but with the Postgres planner, it has some benefits:

* Some cases before this patch do not give correct results and now they run
  correctly (e.g. case `regress/olap_window_seq`)
* Fixes github issue #10143.

* Make transformGroupedWindows walk the entire tree

The transformGroupedWindows function now recursively transforms any
Query node in the tree that has both window functions and groupby or
aggregates.

Also fixed a pre-existing bug where we put a subquery in the target
list of such a Query node into the upper query, Q'. This meant that
any outer references to the scope of Q' no longer had the correct
varattno. The fix is to place the subquery into the target list of
the lower query, Q'' instead, which has the same range table as the
original query Q. Therefore, the varattnos to outer references to the
scope of Q (now Q'') don't need to be updated. Note that varlevelsup to
scopes above Q still need to be adjusted, since we inserted a new
scope Q'. (See comments in code for explanations of Q, Q', Q'').
Co-authored-by: NHeikki Linnakangas <hlinnakangas@pivotal.io>
Co-authored-by: NHans Zeller <hzeller@vmware.com>
Co-authored-by: NAbhijit Subramanya <asubramanya@pivotal.io>
上级 33c4582e
此差异已折叠。
......@@ -39,27 +39,6 @@ static Query *make_sirvf_subquery(FuncExpr *fe);
static bool safe_to_replace_sirvf_tle(Query *query);
static bool safe_to_replace_sirvf_rte(Query *query);
/**
* Preprocess query structure for consumption by the optimizer
*/
Query *
preprocess_query_optimizer(PlannerInfo *root, Query *query, ParamListInfo boundParams)
{
#ifdef USE_ASSERT_CHECKING
Query *qcopy = (Query *) copyObject(query);
#endif
/* fold all constant expressions */
Query *res = fold_constants(root, query, boundParams, GPOPT_MAX_FOLDED_CONSTANT_SIZE);
#ifdef USE_ASSERT_CHECKING
Assert(equal(qcopy, query) && "Preprocessing should not modify original query object");
#endif
return res;
}
/**
* Normalize query before planning.
*/
......
此差异已折叠。
......@@ -21,6 +21,4 @@
extern Query *normalize_query(Query *query);
/* preprocess the query for the optimizer */
extern Query *preprocess_query_optimizer(PlannerInfo *root, Query *query, ParamListInfo boundParams);
#endif /* TRANSFORM_H */
......@@ -591,6 +591,101 @@ select cn, sum(qty) from sale group by rollup(cn,vn) having sum(qty)=1144;
| 1144
(1 row)
--
-- Test a query with window function over an aggregate, and a subquery.
--
-- Github Issue https://github.com/greenplum-db/gpdb/issues/10143
create table t1_github_issue_10143(
base_ym varchar(6),
code varchar(5),
name varchar(60)
);
create table t2_github_issue_10143(
base_ym varchar(6),
dong varchar(8),
code varchar(6),
salary numeric(18)
);
insert into t1_github_issue_10143 values ('a', 'acode', 'aname');
insert into t2_github_issue_10143 values ('a', 'adong', 'acode', 1000);
insert into t2_github_issue_10143 values ('b', 'bdong', 'bcode', 1100);
set optimizer_trace_fallback = on;
explain select (select name from t1_github_issue_10143 where code = a.code limit 1) as dongnm
,sum(sum(a.salary)) over()
from t2_github_issue_10143 a
group by a.code;
QUERY PLAN
----------------------------------------------------------------------------------------------------------
WindowAgg (cost=1.02..2002.10 rows=1 width=176)
-> Finalize GroupAggregate (cost=1.02..1001.06 rows=1 width=38)
Group Key: a.code
-> Gather Motion 3:1 (slice2; segments: 3) (cost=1.02..1001.04 rows=1 width=38)
Merge Key: a.code
-> Partial GroupAggregate (cost=1.02..1.04 rows=1 width=38)
Group Key: a.code
-> Sort (cost=1.02..1.02 rows=1 width=11)
Sort Key: a.code
-> Seq Scan on t2_github_issue_10143 a (cost=0.00..1.01 rows=1 width=11)
SubPlan 1
-> Limit (cost=0.00..1001.02 rows=1 width=6)
-> Result (cost=0.00..1001.02 rows=1 width=6)
Filter: ((t1_github_issue_10143.code)::text = (a.code)::text)
-> Materialize (cost=0.00..1001.01 rows=1 width=12)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1001.01 rows=1 width=12)
-> Seq Scan on t1_github_issue_10143 (cost=0.00..1.01 rows=1 width=12)
Optimizer: Postgres query optimizer
(18 rows)
select (select name from t1_github_issue_10143 where code = a.code limit 1) as dongnm
,sum(sum(a.salary)) over()
from t2_github_issue_10143 a
group by a.code;
dongnm | sum
--------+------
aname | 2100
| 2100
(2 rows)
select * from (select sum(a.salary) over(), count(*)
from t2_github_issue_10143 a
group by a.salary) T;
sum | count
------+-------
2100 | 1
2100 | 1
(2 rows)
-- this query currently falls back, needs to be fixed
select (select rn from (select row_number() over () as rn, name
from t1_github_issue_10143
where code = a.code
group by name) T
) as dongnm
,sum(sum(a.salary)) over()
from t2_github_issue_10143 a
group by a.code;
dongnm | sum
--------+------
1 | 2100
| 2100
(2 rows)
with cte as (select row_number() over (order by code) as rn1, code
from t2_github_issue_10143
group by code)
select row_number() over (order by name) as rn2, name
from t1_github_issue_10143
group by name
union all
select * from cte;
rn2 | name
-----+-------
1 | aname
1 | acode
2 | bcode
(3 rows)
reset optimizer_trace_fallback;
-- CLEANUP
-- start_ignore
drop schema bfv_olap cascade;
......
-- Tests for old bugs related to OLAP queries.
-- First create a schema to contain the test tables, and few common test
-- tables that are shared by several test queries.
create schema bfv_olap;
set search_path=bfv_olap;
create table customer
(
cn int not null,
cname text not null,
cloc text,
primary key (cn)
) distributed by (cn);
insert into customer values
( 1, 'Macbeth', 'Inverness'),
( 2, 'Duncan', 'Forres'),
( 3, 'Lady Macbeth', 'Inverness'),
( 4, 'Witches, Inc', 'Lonely Heath');
create table vendor
(
vn int not null,
vname text not null,
vloc text,
primary key (vn)
) distributed by (vn);
insert into vendor values
( 10, 'Witches, Inc', 'Lonely Heath'),
( 20, 'Lady Macbeth', 'Inverness'),
( 30, 'Duncan', 'Forres'),
( 40, 'Macbeth', 'Inverness'),
( 50, 'Macduff', 'Fife');
create table sale
(
cn int not null,
vn int not null,
pn int not null,
dt date not null,
qty int not null,
prc float not null,
primary key (cn, vn, pn)
) distributed by (cn,vn,pn);
insert into sale values
( 2, 40, 100, '1401-1-1', 1100, 2400),
( 1, 10, 200, '1401-3-1', 1, 0),
( 3, 40, 200, '1401-4-1', 1, 0),
( 1, 20, 100, '1401-5-1', 1, 0),
( 1, 30, 300, '1401-5-2', 1, 0),
( 1, 50, 400, '1401-6-1', 1, 0),
( 2, 50, 400, '1401-6-1', 1, 0),
( 1, 30, 500, '1401-6-1', 12, 5),
( 3, 30, 500, '1401-6-1', 12, 5),
( 3, 30, 600, '1401-6-1', 12, 5),
( 4, 40, 700, '1401-6-1', 1, 1),
( 4, 40, 800, '1401-6-1', 1, 1);
--
-- Test case errors out when we define aggregates without combine functions
-- and use it as an aggregate derived window function.
--
-- SETUP
-- start_ignore
drop table if exists toy;
NOTICE: table "toy" does not exist, skipping
drop aggregate if exists mysum1(int4);
NOTICE: aggregate mysum1(int4) does not exist, skipping
drop aggregate if exists mysum2(int4);
NOTICE: aggregate mysum2(int4) does not exist, skipping
-- end_ignore
create table toy(id,val) as select i,i from generate_series(1,5) i;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create aggregate mysum1(int4) (sfunc = int4_sum, combinefunc=int8pl, stype=bigint);
create aggregate mysum2(int4) (sfunc = int4_sum, stype=bigint);
-- TEST
select id, val, sum(val) over (w), mysum1(val) over (w), mysum2(val) over (w) from toy window w as (order by id rows 2 preceding);
id | val | sum | mysum1 | mysum2
----+-----+-----+--------+--------
1 | 1 | 1 | 1 | 1
2 | 2 | 3 | 3 | 3
3 | 3 | 6 | 6 | 6
4 | 4 | 9 | 9 | 9
5 | 5 | 12 | 12 | 12
(5 rows)
-- CLEANUP
-- start_ignore
drop aggregate if exists mysum1(int4);
drop aggregate if exists mysum2(int4);
drop table if exists toy;
-- end_ignore
--
-- Test case errors out when we define aggregates without preliminary functions and use it as an aggregate derived window function.
--
-- SETUP
-- start_ignore
drop type if exists ema_type cascade;
NOTICE: type "ema_type" does not exist, skipping
drop function if exists ema_adv(t ema_type, v float, x float) cascade;
ERROR: type "ema_type" does not exist
drop function if exists ema_fin(t ema_type) cascade;
ERROR: type "ema_type" does not exist
drop aggregate if exists ema(float, float);
NOTICE: aggregate ema(pg_catalog.float8,pg_catalog.float8) does not exist, skipping
drop table if exists ema_test cascade;
NOTICE: table "ema_test" does not exist, skipping
-- end_ignore
create type ema_type as (x float, e float);
create function ema_adv(t ema_type, v float, x float)
returns ema_type
as $$
begin
if t.e is null then
t.e = v;
t.x = x;
else
if t.x != x then
raise exception 'ema smoothing x may not vary';
end if;
t.e = t.e + (v - t.e) * t.x;
end if;
return t;
end;
$$ language plpgsql;
create function ema_fin(t ema_type)
returns float
as $$
begin
return t.e;
end;
$$ language plpgsql;
create aggregate ema(float, float) (
sfunc = ema_adv,
stype = ema_type,
finalfunc = ema_fin,
initcond = '(,)');
create table ema_test (k int, v float ) distributed by (k);
insert into ema_test select i, 4*(i::float/20) + 10.0*(1+cos(radians(i*5))) from generate_series(0,19) i(i);
-- TEST
select k, v, ema(v, 0.9) over (order by k) from ema_test order by k;
k | v | ema
----+------------------+------------------
0 | 20 | 20
1 | 20.1619469809175 | 20.1457522828257
2 | 20.2480775301221 | 20.2378450053924
3 | 20.2592582628907 | 20.2571169371409
4 | 20.1969262078591 | 20.2029452807873
5 | 20.0630778703665 | 20.0770646114086
6 | 19.8602540378444 | 19.8819350952008
7 | 19.5915204428899 | 19.620561908121
8 | 19.2604444311898 | 19.2964561788829
9 | 18.8710678118655 | 18.9136066485672
10 | 18.4278760968654 | 18.4764491520356
11 | 17.9357643635105 | 17.989832842363
12 | 17.4 | 17.4589832842363
13 | 16.826182617407 | 16.8894626840899
14 | 16.2202014332567 | 16.28712755834
15 | 15.5881904510252 | 15.6580841617567
16 | 14.9364817766693 | 15.008642015178
17 | 14.2715574274766 | 14.3452658862467
18 | 13.6 | 13.6745265886247
19 | 12.9284425725234 | 13.0030509741335
(20 rows)
select k, v, ema(v, 0.9) over (order by k rows between unbounded preceding and current row) from ema_test order by k;
k | v | ema
----+------------------+------------------
0 | 20 | 20
1 | 20.1619469809175 | 20.1457522828257
2 | 20.2480775301221 | 20.2378450053924
3 | 20.2592582628907 | 20.2571169371409
4 | 20.1969262078591 | 20.2029452807873
5 | 20.0630778703665 | 20.0770646114086
6 | 19.8602540378444 | 19.8819350952008
7 | 19.5915204428899 | 19.620561908121
8 | 19.2604444311898 | 19.2964561788829
9 | 18.8710678118655 | 18.9136066485672
10 | 18.4278760968654 | 18.4764491520356
11 | 17.9357643635105 | 17.989832842363
12 | 17.4 | 17.4589832842363
13 | 16.826182617407 | 16.8894626840899
14 | 16.2202014332567 | 16.28712755834
15 | 15.5881904510252 | 15.6580841617567
16 | 14.9364817766693 | 15.008642015178
17 | 14.2715574274766 | 14.3452658862467
18 | 13.6 | 13.6745265886247
19 | 12.9284425725234 | 13.0030509741335
(20 rows)
-- CLEANUP
-- start_ignore
drop table if exists ema_test cascade;
drop aggregate if exists ema(float, float);
drop function if exists ema_fin(t ema_type) cascade;
drop function if exists ema_adv(t ema_type, v float, x float) cascade;
drop type if exists ema_type cascade;
-- end_ignore
--
-- Test with/without group by
--
-- SETUP
-- start_ignore
DROP TABLE IF EXISTS r;
NOTICE: table "r" does not exist, skipping
-- end_ignore
CREATE TABLE r
(
a INT NOT NULL,
b INT,
c CHARACTER VARYING(200),
d NUMERIC(10,0),
e DATE
) DISTRIBUTED BY (a,b);
ALTER TABLE r ADD CONSTRAINT PKEY PRIMARY KEY (b);
NOTICE: updating distribution policy to match new PRIMARY KEY
--TEST
SELECT MAX(a) AS m FROM r GROUP BY b ORDER BY m;
m
---
(0 rows)
SELECT MAX(a) AS m FROM r GROUP BY a ORDER BY m;
m
---
(0 rows)
SELECT MAX(a) AS m FROM r GROUP BY b;
m
---
(0 rows)
-- ORDER BY clause includes some grouping column or not
SELECT MAX(a) AS m FROM R GROUP BY b ORDER BY m,b;
m
---
(0 rows)
SELECT MAX(a) AS m FROM R GROUP BY b,e ORDER BY m,b,e;
m
---
(0 rows)
SELECT MAX(a) AS m FROM R GROUP BY b,e ORDER BY m;
m
---
(0 rows)
-- ORDER BY 1 or more columns
SELECT MAX(a),d,e AS m FROM r GROUP BY b,d,e ORDER BY m,e,d;
max | d | m
-----+---+---
(0 rows)
SELECT MIN(a),d,e AS m FROM r GROUP BY b,e,d ORDER BY e,d;
min | d | m
-----+---+---
(0 rows)
SELECT MAX(a) AS m FROM r GROUP BY b,c,d,e ORDER BY e,d;
m
---
(0 rows)
SELECT MAX(a) AS m FROM r GROUP BY b,e ORDER BY e;
m
---
(0 rows)
SELECT MAX(e) AS m FROM r GROUP BY b ORDER BY m;
m
---
(0 rows)
-- CLEANUP
-- start_ignore
DROP TABLE IF EXISTS r;
-- end_ignore
--
-- ORDER BY clause includes some grouping column or not
--
-- SETUP
-- start_ignore
DROP TABLE IF EXISTS dm_calendar;
NOTICE: table "dm_calendar" does not exist, skipping
-- end_ignore
CREATE TABLE dm_calendar (
calendar_id bigint NOT NULL,
date_name character varying(200),
date_name_cn character varying(200),
calendar_date date,
current_day numeric(10,0),
month_id numeric(10,0),
month_name character varying(200),
month_name_cn character varying(200),
month_name_short character varying(200),
month_name_short_cn character varying(200),
days_in_month numeric(10,0),
first_of_month numeric(10,0),
last_month_id numeric(10,0),
month_end numeric(10,0),
quarter_id numeric(10,0),
quarter_name character varying(200),
quarter_name_cn character varying(200),
quarter_name_short character varying(200),
quarter_name_short_cn character varying(200),
year_id numeric(10,0),
year_name character varying(200),
year_name_cn character varying(200),
description character varying(500),
create_date timestamp without time zone,
month_week_num character varying(100),
month_week_begin character varying(100),
month_week_end character varying(100),
half_year character varying(100),
weekend_flag character varying(100),
holidays_flag character varying(100),
workday_flag character varying(100),
month_number numeric(10,0)
) DISTRIBUTED BY (calendar_id);
ALTER TABLE ONLY dm_calendar ADD CONSTRAINT dm_calendar_pkey PRIMARY KEY (calendar_id);
--TEST
SELECT "year_id" as id , min("year_name") as a from (select "year_id" as "year_id" , min("year_name") as "year_name" from "dm_calendar" group by "year_id") "dm_calendar3" group by "year_id" order by a ASC ;
id | a
----+---
(0 rows)
-- CLEANUP
-- start_ignore
DROP TABLE IF EXISTS dm_calendar;
-- end_ignore
--
-- Test with/without group by with primary key as dist key
--
-- SETUP
-- start_ignore
drop table if exists t;
NOTICE: table "t" does not exist, skipping
-- end_ignore
create table t
(
a int NOT NULL,
b int,
c character varying(200),
d numeric(10,0),
e date
) distributed by (b);
alter table t ADD CONSTRAINT pkey primary key (b);
-- TEST
SELECT MAX(a) AS m FROM t GROUP BY b ORDER BY m;
m
---
(0 rows)
-- CLEANUP
-- start_ignore
drop table if exists t;
-- end_ignore
--
-- Passing through distribution matching type in default implementation
--
-- TEST
select cname,
rank() over (partition by sale.cn order by vn)
from sale, customer
where sale.cn = customer.cn
order by 1, 2;
cname | rank
--------------+------
Duncan | 1
Duncan | 2
Lady Macbeth | 1
Lady Macbeth | 1
Lady Macbeth | 3
Macbeth | 1
Macbeth | 2
Macbeth | 3
Macbeth | 3
Macbeth | 5
Witches, Inc | 1
Witches, Inc | 1
(12 rows)
--
-- Optimizer query crashing for logical window with no window functions
--
-- SETUP
create table mpp23240(a int, b int, c int, d int, e int, f int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
-- TEST
select a, b,
case 1
when 10 then
sum(c) over(partition by a)
when 20 then
sum(d) over(partition by a)
else
5
end as sum1
from (select * from mpp23240 where f > 10) x;
a | b | sum1
---+---+------
(0 rows)
-- CLEANUP
-- start_ignore
drop table mpp23240;
-- end_ignore
--
-- Test for the bug reported at https://github.com/greenplum-db/gpdb/issues/2236
--
create table test1 (x int, y int, z double precision);
insert into test1 select a, b, a*10 + b from generate_series(1, 5) a, generate_series(1, 5) b;
select sum(z) over (partition by x) as sumx, sum(z) over (partition by y) as sumy from test1;
sumx | sumy
------+------
65 | 155
65 | 160
65 | 165
65 | 170
65 | 175
115 | 155
115 | 160
115 | 165
115 | 170
115 | 175
165 | 155
165 | 160
165 | 165
165 | 170
165 | 175
215 | 155
215 | 160
215 | 165
215 | 170
215 | 175
265 | 155
265 | 160
265 | 165
265 | 170
265 | 175
(25 rows)
drop table test1;
--
-- This failed at one point because of an over-zealous syntax check, with
-- "window functions not allowed in WHERE clause" error.
--
select sum(g) from generate_series(1, 5) g
where g in (
select rank() over (order by x) from generate_series(1,5) x
);
sum
-----
15
(1 row)
--
-- This caused a crash in ROLLUP planning at one point.
--
SELECT sale.vn
FROM sale,vendor
WHERE sale.vn=vendor.vn
GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn));
vn
----
40
20
30
10
40
50
30
40
30
50
30
40
(34 rows)
SELECT DISTINCT sale.vn
FROM sale,vendor
WHERE sale.vn=vendor.vn
GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn));
vn
----
10
20
30
40
50
(6 rows)
--
-- Another ROLLUP query, that hit a bug in setting up the planner-generated
-- subquery's targetlist. (https://github.com/greenplum-db/gpdb/issues/6754)
--
SELECT sale.vn, rank() over (partition by sale.vn)
FROM vendor, sale
WHERE sale.vn=vendor.vn
GROUP BY ROLLUP( sale.vn);
vn | rank
----+------
| 1
10 | 1
20 | 1
30 | 1
40 | 1
50 | 1
(6 rows)
--
-- Test window function with constant PARTITION BY
--
CREATE TABLE testtab (a int4);
insert into testtab values (1), (2);
SELECT count(*) OVER (PARTITION BY 1) AS count FROM testtab;
count
-------
2
2
(2 rows)
-- Another variant, where the PARTITION BY is not a literal, but the
-- planner can deduce that it's a constant through equivalence classes.
SELECT 1
FROM (
SELECT a, count(*) OVER (PARTITION BY a) FROM (VALUES (1,1)) AS foo(a)
) AS sup(c, d)
WHERE c = 87 ;
?column?
----------
(0 rows)
--
-- This used to crash, and/or produce incorrect results. The culprit was that a Hash Agg
-- was used, but the planner put a Gather Merge at the top, without a Sort, even though
-- a Hash Agg doesn't preserve the sort order.
--
SELECT sale.qty
FROM sale
GROUP BY ROLLUP((qty)) order by 1;
qty
------
1
12
1100
(4 rows)
--
-- Test two-stage aggregate with grouping sets and a HAVING clause
--
-- persuade planner to choose a two-stage plan.
set gp_motion_cost_per_row TO 1000;
select cn, sum(qty) from sale group by rollup(cn,vn) having sum(qty)=1;
cn | sum
----+-----
1 | 1
1 | 1
1 | 1
2 | 1
3 | 1
(5 rows)
-- same, but the HAVING clause matches a rolled up row.
select cn, sum(qty) from sale group by rollup(cn,vn) having sum(qty)=1144;
cn | sum
----+------
| 1144
(1 row)
--
-- Test a query with window function over an aggregate, and a subquery.
--
-- Github Issue https://github.com/greenplum-db/gpdb/issues/10143
create table t1_github_issue_10143(
base_ym varchar(6),
code varchar(5),
name varchar(60)
);
create table t2_github_issue_10143(
base_ym varchar(6),
dong varchar(8),
code varchar(6),
salary numeric(18)
);
insert into t1_github_issue_10143 values ('a', 'acode', 'aname');
insert into t2_github_issue_10143 values ('a', 'adong', 'acode', 1000);
insert into t2_github_issue_10143 values ('b', 'bdong', 'bcode', 1100);
set optimizer_trace_fallback = on;
explain select (select name from t1_github_issue_10143 where code = a.code limit 1) as dongnm
,sum(sum(a.salary)) over()
from t2_github_issue_10143 a
group by a.code;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------
WindowAgg (cost=0.00..1324055.31 rows=1 width=16)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324055.31 rows=1 width=16)
-> GroupAggregate (cost=0.00..431.00 rows=1 width=14)
Group Key: t2_github_issue_10143.code
-> Sort (cost=0.00..431.00 rows=1 width=11)
Sort Key: t2_github_issue_10143.code
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=11)
Hash Key: t2_github_issue_10143.code
-> Seq Scan on t2_github_issue_10143 (cost=0.00..431.00 rows=1 width=11)
SubPlan 1
-> Limit (cost=0.00..431.00 rows=1 width=6)
-> Result (cost=0.00..431.00 rows=1 width=6)
Filter: ((t1_github_issue_10143.code)::text = (t2_github_issue_10143.code)::text)
-> Materialize (cost=0.00..431.00 rows=1 width=12)
-> Broadcast Motion 3:3 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=12)
-> Seq Scan on t1_github_issue_10143 (cost=0.00..431.00 rows=1 width=12)
Optimizer: Pivotal Optimizer (GPORCA)
(17 rows)
select (select name from t1_github_issue_10143 where code = a.code limit 1) as dongnm
,sum(sum(a.salary)) over()
from t2_github_issue_10143 a
group by a.code;
dongnm | sum
--------+------
aname | 2100
| 2100
(2 rows)
select * from (select sum(a.salary) over(), count(*)
from t2_github_issue_10143 a
group by a.salary) T;
sum | count
------+-------
2100 | 1
2100 | 1
(2 rows)
-- this query currently falls back, needs to be fixed
select (select rn from (select row_number() over () as rn, name
from t1_github_issue_10143
where code = a.code
group by name) T
) as dongnm
,sum(sum(a.salary)) over()
from t2_github_issue_10143 a
group by a.code;
INFO: GPORCA failed to produce a plan, falling back to planner
DETAIL: Query-to-DXL Translation: No variable entry found due to incorrect normalization of query
dongnm | sum
--------+------
1 | 2100
| 2100
(2 rows)
with cte as (select row_number() over (order by code) as rn1, code
from t2_github_issue_10143
group by code)
select row_number() over (order by name) as rn2, name
from t1_github_issue_10143
group by name
union all
select * from cte;
rn2 | name
-----+-------
1 | aname
1 | acode
2 | bcode
(3 rows)
reset optimizer_trace_fallback;
-- CLEANUP
-- start_ignore
drop schema bfv_olap cascade;
-- end_ignore
......@@ -7853,8 +7853,11 @@ from
group by
2*b.g,
lower(f);
ERROR: unresolved grouping key in window query
HINT: You might need to use explicit aliases and/or to refer to grouping keys in the same way throughout the query.
?column? | lower | sum
----------+-------+-----
2 | b | 2
(1 row)
-- End MPP-12082
-- MPP-12913
select count(*) over (partition by 1 order by cn rows between 1 preceding and 1 preceding) from sale;
......
......@@ -626,20 +626,19 @@ explain (costs off)
select first_value(max(x)) over (), y
from (select unique1 as x, ten+four as y from tenk1) ss
group by y;
QUERY PLAN
------------------------------------------------------------------------
QUERY PLAN
------------------------------------------------------------------
WindowAgg
-> Gather Motion 3:1 (slice1; segments: 3)
-> Subquery Scan on "Window"
-> Finalize HashAggregate
Group Key: ((tenk1.ten + tenk1.four))
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: ((tenk1.ten + tenk1.four))
-> Partial HashAggregate
Group Key: (tenk1.ten + tenk1.four)
-> Seq Scan on tenk1
-> Finalize HashAggregate
Group Key: ((tenk1.ten + tenk1.four))
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: ((tenk1.ten + tenk1.four))
-> Partial HashAggregate
Group Key: (tenk1.ten + tenk1.four)
-> Seq Scan on tenk1
Optimizer: Postgres query optimizer
(11 rows)
(10 rows)
-- test non-default frame specifications
SELECT four, ten,
......
......@@ -392,6 +392,64 @@ select cn, sum(qty) from sale group by rollup(cn,vn) having sum(qty)=1;
select cn, sum(qty) from sale group by rollup(cn,vn) having sum(qty)=1144;
--
-- Test a query with window function over an aggregate, and a subquery.
--
-- Github Issue https://github.com/greenplum-db/gpdb/issues/10143
create table t1_github_issue_10143(
base_ym varchar(6),
code varchar(5),
name varchar(60)
);
create table t2_github_issue_10143(
base_ym varchar(6),
dong varchar(8),
code varchar(6),
salary numeric(18)
);
insert into t1_github_issue_10143 values ('a', 'acode', 'aname');
insert into t2_github_issue_10143 values ('a', 'adong', 'acode', 1000);
insert into t2_github_issue_10143 values ('b', 'bdong', 'bcode', 1100);
set optimizer_trace_fallback = on;
explain select (select name from t1_github_issue_10143 where code = a.code limit 1) as dongnm
,sum(sum(a.salary)) over()
from t2_github_issue_10143 a
group by a.code;
select (select name from t1_github_issue_10143 where code = a.code limit 1) as dongnm
,sum(sum(a.salary)) over()
from t2_github_issue_10143 a
group by a.code;
select * from (select sum(a.salary) over(), count(*)
from t2_github_issue_10143 a
group by a.salary) T;
-- this query currently falls back, needs to be fixed
select (select rn from (select row_number() over () as rn, name
from t1_github_issue_10143
where code = a.code
group by name) T
) as dongnm
,sum(sum(a.salary)) over()
from t2_github_issue_10143 a
group by a.code;
with cte as (select row_number() over (order by code) as rn1, code
from t2_github_issue_10143
group by code)
select row_number() over (order by name) as rn2, name
from t1_github_issue_10143
group by name
union all
select * from cte;
reset optimizer_trace_fallback;
-- CLEANUP
-- start_ignore
drop schema bfv_olap cascade;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册