未验证 提交 ea3c7d48 编写于 作者: J Jinbao Chen 提交者: GitHub

Fix wrong result on aggregate with distinct (#9962)

The codes of multi phase aggregate add subqueryscan below the final
aggregate plan node. But we do not create a new root. So when we
create new path keys on sort node, wrong equivalent classes were
find out from the wrong list.
Now we just make a new root with empty equivalent class list when
the path keys were created. It is a little hack.
上级 18f20cbe
......@@ -1649,9 +1649,16 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
* change the previous root->parse Query node, which makes the
* current sort_pathkeys invalid.
*/
sort_pathkeys = make_pathkeys_for_sortclauses(root, parse->sortClause,
PlannerInfo *scroot = NULL;
scroot = makeNode(PlannerInfo);
memcpy(scroot, root, sizeof(PlannerInfo));
scroot->eq_classes = NULL;
sort_pathkeys = make_pathkeys_for_sortclauses(scroot, parse->sortClause,
result_plan->targetlist, true);
sort_pathkeys = canonicalize_pathkeys(root, sort_pathkeys);
pfree(scroot);
}
}
else /* Not GP_ROLE_DISPATCH */
......
......@@ -1597,7 +1597,67 @@ select * from int2vectortab union select * from int2vectortab;
-- s/\(cdbmutate\.c:\d+\)//
-- end_matchsubs
select count(*) over (partition by t) from int2vectortab;
ERROR: cannot use expression as distribution key, because it is not hashable (cdbmutate.c:1329)
ERROR: cannot use expression as distribution key, because it is not hashable (cdbmutate.c:1383)
-- This is currently broken on 5X_STABLE, although it's been fixed in master.
CREATE TABLE distinct_agg1 (a int, b int, c int);
CREATE TABLE distinct_agg2 (d int, e int, f int);
INSERT INTO distinct_agg1 SELECT i%50, i%2, i%5 from generate_series(1, 100) i ;
INSERT INTO distinct_agg2 SELECT i%50, i, i%2 from generate_series(1, 100) i ;
SELECT DISTINCT c, e FROM distinct_agg1 AS t1, distinct_agg2 AS t2 WHERE t1.a = t2.d AND t1.b = 0 GROUP BY 1,2;
c | e
---+-----
0 | 10
0 | 20
0 | 30
0 | 40
0 | 50
0 | 60
0 | 70
0 | 80
0 | 90
0 | 100
1 | 6
1 | 16
1 | 26
1 | 36
1 | 46
1 | 56
1 | 66
1 | 76
1 | 86
1 | 96
2 | 2
2 | 12
2 | 22
2 | 32
2 | 42
2 | 52
2 | 62
2 | 72
2 | 82
2 | 92
3 | 8
3 | 18
3 | 28
3 | 38
3 | 48
3 | 58
3 | 68
3 | 78
3 | 88
3 | 98
4 | 4
4 | 14
4 | 24
4 | 34
4 | 44
4 | 54
4 | 64
4 | 74
4 | 84
4 | 94
(50 rows)
-- CLEANUP
set client_min_messages='warning';
drop schema bfv_aggregate cascade;
......@@ -1605,6 +1605,66 @@ select count(*) over (partition by t) from int2vectortab;
2
(5 rows)
-- This is currently broken on 5X_STABLE, although it's been fixed in master.
CREATE TABLE distinct_agg1 (a int, b int, c int);
CREATE TABLE distinct_agg2 (d int, e int, f int);
INSERT INTO distinct_agg1 SELECT i%50, i%2, i%5 from generate_series(1, 100) i ;
INSERT INTO distinct_agg2 SELECT i%50, i, i%2 from generate_series(1, 100) i ;
SELECT DISTINCT c, e FROM distinct_agg1 AS t1, distinct_agg2 AS t2 WHERE t1.a = t2.d AND t1.b = 0 GROUP BY 1,2;
c | e
---+-----
0 | 10
0 | 20
0 | 30
0 | 40
0 | 50
0 | 60
0 | 70
0 | 80
0 | 90
0 | 100
1 | 6
1 | 16
1 | 26
1 | 36
1 | 46
1 | 56
1 | 66
1 | 76
1 | 86
1 | 96
2 | 2
2 | 12
2 | 22
2 | 32
2 | 42
2 | 52
2 | 62
2 | 72
2 | 82
2 | 92
3 | 8
3 | 18
3 | 28
3 | 38
3 | 48
3 | 58
3 | 68
3 | 78
3 | 88
3 | 98
4 | 4
4 | 14
4 | 24
4 | 34
4 | 44
4 | 54
4 | 64
4 | 74
4 | 84
4 | 94
(50 rows)
-- CLEANUP
set client_min_messages='warning';
drop schema bfv_aggregate cascade;
......@@ -2140,111 +2140,116 @@ select * from dim1 inner join fact1 on (dim1.pid=fact1.pid) and fact1.code = 'OH
--
set gp_dynamic_partition_pruning=off;
explain select fact1.code, count(*) from dim1 inner join fact1 on (dim1.pid=fact1.pid) group by 1 order by 1;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice3; segments: 3) (cost=7908.21..7924.11 rows=530 width=40)
Merge Key: dpe_multi.fact1.code
-> GroupAggregate (cost=7908.21..7924.11 rows=177 width=40)
Group By: dpe_multi.fact1.code
-> Sort (cost=7908.21..7909.53 rows=177 width=40)
Sort Key: dpe_multi.fact1.code
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=7863.02..7884.23 rows=177 width=40)
Hash Key: dpe_multi.fact1.code
-> GroupAggregate (cost=7863.02..7873.63 rows=177 width=40)
Group By: dpe_multi.fact1.code
-> Sort (cost=7863.02..7864.35 rows=177 width=32)
Sort Key: dpe_multi.fact1.code
-> Hash Join (cost=1.06..7839.02 rows=177 width=32)
Hash Cond: dpe_multi.fact1.pid = dim1.pid
-> Append (cost=0.00..6504.00 rows=176800 width=36)
-> Seq Scan on fact1_1_prt_1_2_prt_ca fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Seq Scan on fact1_1_prt_1_2_prt_oh fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Seq Scan on fact1_1_prt_1_2_prt_wa fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Seq Scan on fact1_1_prt_2_2_prt_ca fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Seq Scan on fact1_1_prt_2_2_prt_oh fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Seq Scan on fact1_1_prt_2_2_prt_wa fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Seq Scan on fact1_1_prt_3_2_prt_ca fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Seq Scan on fact1_1_prt_3_2_prt_oh fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Seq Scan on fact1_1_prt_3_2_prt_wa fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Seq Scan on fact1_1_prt_4_2_prt_ca fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Seq Scan on fact1_1_prt_4_2_prt_oh fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Seq Scan on fact1_1_prt_4_2_prt_wa fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Hash (cost=1.04..1.04 rows=1 width=4)
-> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..1.04 rows=1 width=4)
-> Seq Scan on dim1 (cost=0.00..1.01 rows=1 width=4)
Settings: enable_bitmapscan=off; enable_hashjoin=on; enable_indexscan=off; enable_mergejoin=off; enable_nestloop=off; enable_seqscan=on; gp_dynamic_partition_pruning=off; gp_segments_for_planner=2; optimizer_segments=2
(31 rows)
select fact1.code, count(*) from dim1 inner join fact1 on (dim1.pid=fact1.pid) group by 1 order by 1;
code | count
------+-------
CA | 50
OH | 50
(2 rows)
set gp_dynamic_partition_pruning=on;
explain select fact1.code, count(*) from dim1 inner join fact1 on (dim1.pid=fact1.pid) group by 1 order by 1;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice3; segments: 3) (cost=7908.21..7924.11 rows=530 width=40)
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice3; segments: 3) (cost=7948.09..7949.42 rows=530 width=40)
Merge Key: dpe_multi.fact1.code
-> GroupAggregate (cost=7908.21..7924.11 rows=177 width=40)
Group By: dpe_multi.fact1.code
-> Sort (cost=7908.21..7909.53 rows=177 width=40)
Sort Key: dpe_multi.fact1.code
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=7863.02..7884.23 rows=177 width=40)
Hash Key: dpe_multi.fact1.code
-> GroupAggregate (cost=7863.02..7873.63 rows=177 width=40)
Group By: dpe_multi.fact1.code
-> Sort (cost=7863.02..7864.35 rows=177 width=32)
Sort Key: dpe_multi.fact1.code
-> Hash Join (cost=1.06..7839.02 rows=177 width=32)
Hash Cond: dpe_multi.fact1.pid = dim1.pid
-> Append (cost=0.00..6504.00 rows=176800 width=36)
-> Result (cost=0.00..542.00 rows=14734 width=36)
One-Time Filter: PartSelected
-> Sort (cost=7948.09..7949.42 rows=177 width=40)
Sort Key: dpe_multi.fact1.code
-> GroupAggregate (cost=7908.21..7924.11 rows=177 width=40)
Group By: dpe_multi.fact1.code
-> Sort (cost=7908.21..7909.53 rows=177 width=40)
Sort Key: dpe_multi.fact1.code
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=7863.02..7884.23 rows=177 width=40)
Hash Key: dpe_multi.fact1.code
-> GroupAggregate (cost=7863.02..7873.63 rows=177 width=40)
Group By: dpe_multi.fact1.code
-> Sort (cost=7863.02..7864.35 rows=177 width=32)
Sort Key: dpe_multi.fact1.code
-> Hash Join (cost=1.06..7839.02 rows=177 width=32)
Hash Cond: dpe_multi.fact1.pid = dim1.pid
-> Append (cost=0.00..6504.00 rows=176800 width=36)
-> Seq Scan on fact1_1_prt_1_2_prt_ca fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Result (cost=0.00..542.00 rows=14734 width=36)
One-Time Filter: PartSelected
-> Seq Scan on fact1_1_prt_1_2_prt_oh fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Result (cost=0.00..542.00 rows=14734 width=36)
One-Time Filter: PartSelected
-> Seq Scan on fact1_1_prt_1_2_prt_wa fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Result (cost=0.00..542.00 rows=14734 width=36)
One-Time Filter: PartSelected
-> Seq Scan on fact1_1_prt_2_2_prt_ca fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Result (cost=0.00..542.00 rows=14734 width=36)
One-Time Filter: PartSelected
-> Seq Scan on fact1_1_prt_2_2_prt_oh fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Result (cost=0.00..542.00 rows=14734 width=36)
One-Time Filter: PartSelected
-> Seq Scan on fact1_1_prt_2_2_prt_wa fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Result (cost=0.00..542.00 rows=14734 width=36)
One-Time Filter: PartSelected
-> Seq Scan on fact1_1_prt_3_2_prt_ca fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Result (cost=0.00..542.00 rows=14734 width=36)
One-Time Filter: PartSelected
-> Seq Scan on fact1_1_prt_3_2_prt_oh fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Result (cost=0.00..542.00 rows=14734 width=36)
One-Time Filter: PartSelected
-> Seq Scan on fact1_1_prt_3_2_prt_wa fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Result (cost=0.00..542.00 rows=14734 width=36)
One-Time Filter: PartSelected
-> Seq Scan on fact1_1_prt_4_2_prt_ca fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Result (cost=0.00..542.00 rows=14734 width=36)
One-Time Filter: PartSelected
-> Seq Scan on fact1_1_prt_4_2_prt_oh fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Result (cost=0.00..542.00 rows=14734 width=36)
One-Time Filter: PartSelected
-> Seq Scan on fact1_1_prt_4_2_prt_wa fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Hash (cost=1.04..1.04 rows=1 width=4)
-> Partition Selector for fact1 (dynamic scan id: 1) (cost=0.00..1.04 rows=1 width=4)
Filter: dim1.pid
-> Hash (cost=1.04..1.04 rows=1 width=4)
-> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..1.04 rows=1 width=4)
-> Seq Scan on dim1 (cost=0.00..1.01 rows=1 width=4)
Settings: enable_bitmapscan=off; enable_hashjoin=on; enable_indexscan=off; enable_mergejoin=off; enable_nestloop=off; enable_seqscan=on; gp_dynamic_partition_pruning=on; gp_segments_for_planner=2; optimizer_segments=2
Settings: enable_bitmapscan=off; enable_hashjoin=on; enable_indexscan=off; enable_mergejoin=off; enable_nestloop=off; enable_seqscan=on; gp_dynamic_partition_pruning=off; gp_segments_for_planner=2; optimizer=off; optimizer_segments=2
Optimizer status: legacy query optimizer
(58 rows)
(34 rows)
select fact1.code, count(*) from dim1 inner join fact1 on (dim1.pid=fact1.pid) group by 1 order by 1;
code | count
------+-------
CA | 50
OH | 50
(2 rows)
set gp_dynamic_partition_pruning=on;
explain select fact1.code, count(*) from dim1 inner join fact1 on (dim1.pid=fact1.pid) group by 1 order by 1;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice3; segments: 3) (cost=7948.09..7949.42 rows=530 width=40)
Merge Key: dpe_multi.fact1.code
-> Sort (cost=7948.09..7949.42 rows=177 width=40)
Sort Key: dpe_multi.fact1.code
-> GroupAggregate (cost=7908.21..7924.11 rows=177 width=40)
Group By: dpe_multi.fact1.code
-> Sort (cost=7908.21..7909.53 rows=177 width=40)
Sort Key: dpe_multi.fact1.code
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=7863.02..7884.23 rows=177 width=40)
Hash Key: dpe_multi.fact1.code
-> GroupAggregate (cost=7863.02..7873.63 rows=177 width=40)
Group By: dpe_multi.fact1.code
-> Sort (cost=7863.02..7864.35 rows=177 width=32)
Sort Key: dpe_multi.fact1.code
-> Hash Join (cost=1.06..7839.02 rows=177 width=32)
Hash Cond: dpe_multi.fact1.pid = dim1.pid
-> Append (cost=0.00..6504.00 rows=176800 width=36)
-> Result (cost=0.00..542.00 rows=14734 width=36)
One-Time Filter: PartSelected
-> Seq Scan on fact1_1_prt_1_2_prt_ca fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Result (cost=0.00..542.00 rows=14734 width=36)
One-Time Filter: PartSelected
-> Seq Scan on fact1_1_prt_1_2_prt_oh fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Result (cost=0.00..542.00 rows=14734 width=36)
One-Time Filter: PartSelected
-> Seq Scan on fact1_1_prt_1_2_prt_wa fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Result (cost=0.00..542.00 rows=14734 width=36)
One-Time Filter: PartSelected
-> Seq Scan on fact1_1_prt_2_2_prt_ca fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Result (cost=0.00..542.00 rows=14734 width=36)
One-Time Filter: PartSelected
-> Seq Scan on fact1_1_prt_2_2_prt_oh fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Result (cost=0.00..542.00 rows=14734 width=36)
One-Time Filter: PartSelected
-> Seq Scan on fact1_1_prt_2_2_prt_wa fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Result (cost=0.00..542.00 rows=14734 width=36)
One-Time Filter: PartSelected
-> Seq Scan on fact1_1_prt_3_2_prt_ca fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Result (cost=0.00..542.00 rows=14734 width=36)
One-Time Filter: PartSelected
-> Seq Scan on fact1_1_prt_3_2_prt_oh fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Result (cost=0.00..542.00 rows=14734 width=36)
One-Time Filter: PartSelected
-> Seq Scan on fact1_1_prt_3_2_prt_wa fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Result (cost=0.00..542.00 rows=14734 width=36)
One-Time Filter: PartSelected
-> Seq Scan on fact1_1_prt_4_2_prt_ca fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Result (cost=0.00..542.00 rows=14734 width=36)
One-Time Filter: PartSelected
-> Seq Scan on fact1_1_prt_4_2_prt_oh fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Result (cost=0.00..542.00 rows=14734 width=36)
One-Time Filter: PartSelected
-> Seq Scan on fact1_1_prt_4_2_prt_wa fact1 (cost=0.00..542.00 rows=14734 width=36)
-> Hash (cost=1.04..1.04 rows=1 width=4)
-> Partition Selector for fact1 (dynamic scan id: 1) (cost=0.00..1.04 rows=1 width=4)
Filter: dim1.pid
-> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..1.04 rows=1 width=4)
-> Seq Scan on dim1 (cost=0.00..1.01 rows=1 width=4)
Settings: enable_bitmapscan=off; enable_hashjoin=on; enable_indexscan=off; enable_mergejoin=off; enable_nestloop=off; enable_seqscan=on; gp_dynamic_partition_pruning=on; gp_segments_for_planner=2; optimizer=off; optimizer_segments=2
Optimizer status: legacy query optimizer
(60 rows)
select fact1.code, count(*) from dim1 inner join fact1 on (dim1.pid=fact1.pid) group by 1 order by 1;
code | count
......
......@@ -1407,6 +1407,12 @@ select * from int2vectortab union select * from int2vectortab;
-- end_matchsubs
select count(*) over (partition by t) from int2vectortab;
-- This is currently broken on 5X_STABLE, although it's been fixed in master.
CREATE TABLE distinct_agg1 (a int, b int, c int);
CREATE TABLE distinct_agg2 (d int, e int, f int);
INSERT INTO distinct_agg1 SELECT i%50, i%2, i%5 from generate_series(1, 100) i ;
INSERT INTO distinct_agg2 SELECT i%50, i, i%2 from generate_series(1, 100) i ;
SELECT DISTINCT c, e FROM distinct_agg1 AS t1, distinct_agg2 AS t2 WHERE t1.a = t2.d AND t1.b = 0 GROUP BY 1,2;
-- CLEANUP
set client_min_messages='warning';
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册