提交 fab372cb 编写于 作者: B Bhuvnesh Chaudhary 提交者: Bhuvnesh

Add tests to verify that dummy joins are created

For semi join queries if the constraints can eliminate the scanned relations,
the resulting relation should be marked as a dummy and the join using it should
be a dummy join.
上级 65e3a822
......@@ -1712,3 +1712,77 @@ SELECT 1 AS col1 WHERE NOT (SELECT 1 = 1);
------
(0 rows)
--
-- Test sane behavior in case of semi join semantics
--
-- start_ignore
DROP TABLE IF EXISTS dedup_test1;
NOTICE: table "dedup_test1" does not exist, skipping
DROP TABLE IF EXISTS dedup_test2;
NOTICE: table "dedup_test2" does not exist, skipping
DROP TABLE IF EXISTS dedup_test3;
NOTICE: table "dedup_test3" does not exist, skipping
-- end_ignore
CREATE TABLE dedup_test1 ( a int, b int ) DISTRIBUTED BY (a);
CREATE TABLE dedup_test2 ( e int, f int ) DISTRIBUTED BY (e);
CREATE TABLE dedup_test3 ( a int, b int, c int) DISTRIBUTED BY (a) PARTITION BY RANGE(c) (START(1) END(2) EVERY(1));
NOTICE: CREATE TABLE will create partition "dedup_test3_1_prt_1" for table "dedup_test3"
INSERT INTO dedup_test1 select i, i from generate_series(1,4)i;
INSERT INTO dedup_test2 select i, i from generate_series(1,4)i;
INSERT INTO dedup_test3 select 1, 1, 1 from generate_series(1,10);
ANALYZE dedup_test1;
ANALYZE dedup_test2;
ANALYZE dedup_test3;
EXPLAIN SELECT * FROM dedup_test1 INNER JOIN dedup_test2 ON dedup_test1.a= dedup_test2.e WHERE (a) IN (SELECT a FROM dedup_test3);
QUERY PLAN
-------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=3.31..5.46 rows=4 width=16)
-> Hash Semi Join (cost=3.31..5.46 rows=2 width=16)
Hash Cond: dedup_test1.a = subselect_gp.dedup_test3.a
-> Hash Join (cost=2.09..4.18 rows=2 width=16)
Hash Cond: dedup_test1.a = dedup_test2.e
-> Seq Scan on dedup_test1 (cost=0.00..2.04 rows=2 width=8)
-> Hash (cost=2.04..2.04 rows=2 width=8)
-> Seq Scan on dedup_test2 (cost=0.00..2.04 rows=2 width=8)
-> Hash (cost=1.10..1.10 rows=4 width=4)
-> Append (cost=0.00..1.10 rows=4 width=4)
-> Seq Scan on dedup_test3_1_prt_1 dedup_test3 (cost=0.00..1.10 rows=4 width=4)
Optimizer: legacy query optimizer
(12 rows)
SELECT * FROM dedup_test1 INNER JOIN dedup_test2 ON dedup_test1.a= dedup_test2.e WHERE (a) IN (SELECT a FROM dedup_test3);
a | b | e | f
---+---+---+---
1 | 1 | 1 | 1
(1 row)
-- Test planner to check if it optimizes the join and marks it as a dummy join
EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND dedup_test3.b IN (SELECT b FROM dedup_test1);
QUERY PLAN
------------------------------------------
Result (cost=0.00..0.01 rows=1 width=0)
One-Time Filter: false
Optimizer: legacy query optimizer
(3 rows)
EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND dedup_test3.b IN (SELECT a FROM dedup_test1);
QUERY PLAN
------------------------------------------
Result (cost=0.00..0.01 rows=1 width=0)
One-Time Filter: false
Optimizer: legacy query optimizer
(3 rows)
EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND EXISTS (SELECT b FROM dedup_test1) AND dedup_test3.b IN (SELECT b FROM dedup_test1);
QUERY PLAN
------------------------------------------
Result (cost=0.53..0.54 rows=1 width=0)
One-Time Filter: false
Optimizer: legacy query optimizer
(3 rows)
-- start_ignore
DROP TABLE IF EXISTS dedup_test1;
DROP TABLE IF EXISTS dedup_test2;
DROP TABLE IF EXISTS dedup_test3;
-- end_ignore
......@@ -1759,3 +1759,152 @@ SELECT 1 AS col1 WHERE NOT (SELECT 1 = 1);
------
(0 rows)
--
-- Test sane behavior in case of semi join semantics
--
-- start_ignore
DROP TABLE IF EXISTS dedup_test1;
NOTICE: table "dedup_test1" does not exist, skipping
DROP TABLE IF EXISTS dedup_test2;
NOTICE: table "dedup_test2" does not exist, skipping
DROP TABLE IF EXISTS dedup_test3;
NOTICE: table "dedup_test3" does not exist, skipping
-- end_ignore
CREATE TABLE dedup_test1 ( a int, b int ) DISTRIBUTED BY (a);
CREATE TABLE dedup_test2 ( e int, f int ) DISTRIBUTED BY (e);
CREATE TABLE dedup_test3 ( a int, b int, c int) DISTRIBUTED BY (a) PARTITION BY RANGE(c) (START(1) END(2) EVERY(1));
NOTICE: CREATE TABLE will create partition "dedup_test3_1_prt_1" for table "dedup_test3"
INSERT INTO dedup_test1 select i, i from generate_series(1,4)i;
INSERT INTO dedup_test2 select i, i from generate_series(1,4)i;
INSERT INTO dedup_test3 select 1, 1, 1 from generate_series(1,10);
ANALYZE dedup_test1;
ANALYZE dedup_test2;
ANALYZE dedup_test3;
EXPLAIN SELECT * FROM dedup_test1 INNER JOIN dedup_test2 ON dedup_test1.a= dedup_test2.e WHERE (a) IN (SELECT a FROM dedup_test3);
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..1293.00 rows=2 width=16)
-> Hash Join (cost=0.00..1293.00 rows=1 width=16)
Hash Cond: dedup_test2.e = dedup_test1.a
-> Table Scan on dedup_test2 (cost=0.00..431.00 rows=2 width=8)
-> Hash (cost=862.00..862.00 rows=1 width=8)
-> Hash Join (cost=0.00..862.00 rows=1 width=8)
Hash Cond: dedup_test1.a = dedup_test3.a
-> Table Scan on dedup_test1 (cost=0.00..431.00 rows=2 width=8)
-> Hash (cost=431.00..431.00 rows=1 width=4)
-> GroupAggregate (cost=0.00..431.00 rows=1 width=4)
Group Key: dedup_test3.a
-> Sort (cost=0.00..431.00 rows=1 width=4)
Sort Key: dedup_test3.a
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4)
Hash Key: dedup_test3.a
-> GroupAggregate (cost=0.00..431.00 rows=1 width=4)
Group Key: dedup_test3.a
-> Sort (cost=0.00..431.00 rows=4 width=4)
Sort Key: dedup_test3.a
-> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..431.00 rows=4 width=4)
-> Sequence (cost=0.00..431.00 rows=4 width=4)
-> Partition Selector for dedup_test3 (dynamic scan id: 1) (cost=10.00..100.00 rows=34 width=4)
Partitions selected: 1 (out of 1)
-> Dynamic Table Scan on dedup_test3 (dynamic scan id: 1) (cost=0.00..431.00 rows=4 width=4)
Optimizer: PQO version 2.60.0
(25 rows)
SELECT * FROM dedup_test1 INNER JOIN dedup_test2 ON dedup_test1.a= dedup_test2.e WHERE (a) IN (SELECT a FROM dedup_test3);
a | b | e | f
---+---+---+---
1 | 1 | 1 | 1
(1 row)
-- Test planner to check if it optimizes the join and marks it as a dummy join
EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND dedup_test3.b IN (SELECT b FROM dedup_test1);
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice4; segments: 3) (cost=0.00..1324463.71 rows=4 width=20)
-> Hash Semi Join (cost=0.00..1324463.71 rows=2 width=20)
Hash Cond: dedup_test3.b = subselect_gp.dedup_test1.b
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1324032.71 rows=2 width=20)
Hash Key: dedup_test3.b
-> Nested Loop (cost=0.00..1324032.71 rows=2 width=20)
Join Filter: true
-> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=12)
-> Sequence (cost=0.00..431.00 rows=1 width=12)
-> Partition Selector for dedup_test3 (dynamic scan id: 1) (cost=10.00..100.00 rows=34 width=4)
Partitions selected: 0 (out of 1)
-> Dynamic Table Scan on dedup_test3 (dynamic scan id: 1) (cost=0.00..431.00 rows=1 width=12)
Filter: c = 7
-> Table Scan on dedup_test1 (cost=0.00..431.00 rows=2 width=8)
-> Hash (cost=431.00..431.00 rows=2 width=4)
-> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..431.00 rows=2 width=4)
Hash Key: subselect_gp.dedup_test1.b
-> Table Scan on dedup_test1 (cost=0.00..431.00 rows=2 width=4)
Optimizer: PQO version 2.60.0
(19 rows)
EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND dedup_test3.b IN (SELECT a FROM dedup_test1);
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..1324463.71 rows=4 width=20)
-> Hash Semi Join (cost=0.00..1324463.71 rows=2 width=20)
Hash Cond: dedup_test3.b = subselect_gp.dedup_test1.a
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1324032.71 rows=2 width=20)
Hash Key: dedup_test3.b
-> Nested Loop (cost=0.00..1324032.71 rows=2 width=20)
Join Filter: true
-> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=12)
-> Sequence (cost=0.00..431.00 rows=1 width=12)
-> Partition Selector for dedup_test3 (dynamic scan id: 1) (cost=10.00..100.00 rows=34 width=4)
Partitions selected: 0 (out of 1)
-> Dynamic Table Scan on dedup_test3 (dynamic scan id: 1) (cost=0.00..431.00 rows=1 width=12)
Filter: c = 7
-> Table Scan on dedup_test1 (cost=0.00..431.00 rows=2 width=8)
-> Hash (cost=431.00..431.00 rows=2 width=4)
-> Table Scan on dedup_test1 (cost=0.00..431.00 rows=2 width=4)
Optimizer: PQO version 2.60.0
(17 rows)
EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND EXISTS (SELECT b FROM dedup_test1) AND dedup_test3.b IN (SELECT b FROM dedup_test1);
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice6; segments: 3) (cost=0.00..2648064.92 rows=4 width=20)
-> Hash Join (cost=0.00..2648064.92 rows=2 width=20)
Hash Cond: dedup_test3.b = subselect_gp.dedup_test1.b
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1324032.71 rows=2 width=20)
Hash Key: dedup_test3.b
-> Nested Loop (cost=0.00..1324032.71 rows=2 width=20)
Join Filter: true
-> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=12)
-> Sequence (cost=0.00..431.00 rows=1 width=12)
-> Partition Selector for dedup_test3 (dynamic scan id: 1) (cost=10.00..100.00 rows=34 width=4)
Partitions selected: 0 (out of 1)
-> Dynamic Table Scan on dedup_test3 (dynamic scan id: 1) (cost=0.00..431.00 rows=1 width=12)
Filter: c = 7
-> Table Scan on dedup_test1 (cost=0.00..431.00 rows=2 width=8)
-> Hash (cost=1324032.22..1324032.22 rows=2 width=4)
-> Nested Loop Semi Join (cost=0.00..1324032.22 rows=2 width=4)
Join Filter: true
-> GroupAggregate (cost=0.00..431.00 rows=2 width=4)
Group Key: subselect_gp.dedup_test1.b
-> Sort (cost=0.00..431.00 rows=2 width=4)
Sort Key: subselect_gp.dedup_test1.b
-> Redistribute Motion 3:3 (slice5; segments: 3) (cost=0.00..431.00 rows=2 width=4)
Hash Key: subselect_gp.dedup_test1.b
-> GroupAggregate (cost=0.00..431.00 rows=2 width=4)
Group Key: subselect_gp.dedup_test1.b
-> Sort (cost=0.00..431.00 rows=2 width=4)
Sort Key: subselect_gp.dedup_test1.b
-> Table Scan on dedup_test1 (cost=0.00..431.00 rows=2 width=4)
-> Materialize (cost=0.00..431.00 rows=1 width=1)
-> Broadcast Motion 1:3 (slice4) (cost=0.00..431.00 rows=3 width=1)
-> Limit (cost=0.00..431.00 rows=1 width=1)
-> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=1)
-> Limit (cost=0.00..431.00 rows=1 width=1)
-> Table Scan on dedup_test1 (cost=0.00..431.00 rows=2 width=1)
Optimizer: PQO version 2.60.0
(35 rows)
-- start_ignore
DROP TABLE IF EXISTS dedup_test1;
DROP TABLE IF EXISTS dedup_test2;
DROP TABLE IF EXISTS dedup_test3;
-- end_ignore
......@@ -732,3 +732,36 @@ SELECT EXISTS(SELECT * FROM tenk1 WHERE tenk1.unique1 = tenk2.unique1) FROM tenk
-- Ensure that NOT is not lost during subquery pull-up
--
SELECT 1 AS col1 WHERE NOT (SELECT 1 = 1);
--
-- Test sane behavior in case of semi join semantics
--
-- start_ignore
DROP TABLE IF EXISTS dedup_test1;
DROP TABLE IF EXISTS dedup_test2;
DROP TABLE IF EXISTS dedup_test3;
-- end_ignore
CREATE TABLE dedup_test1 ( a int, b int ) DISTRIBUTED BY (a);
CREATE TABLE dedup_test2 ( e int, f int ) DISTRIBUTED BY (e);
CREATE TABLE dedup_test3 ( a int, b int, c int) DISTRIBUTED BY (a) PARTITION BY RANGE(c) (START(1) END(2) EVERY(1));
INSERT INTO dedup_test1 select i, i from generate_series(1,4)i;
INSERT INTO dedup_test2 select i, i from generate_series(1,4)i;
INSERT INTO dedup_test3 select 1, 1, 1 from generate_series(1,10);
ANALYZE dedup_test1;
ANALYZE dedup_test2;
ANALYZE dedup_test3;
EXPLAIN SELECT * FROM dedup_test1 INNER JOIN dedup_test2 ON dedup_test1.a= dedup_test2.e WHERE (a) IN (SELECT a FROM dedup_test3);
SELECT * FROM dedup_test1 INNER JOIN dedup_test2 ON dedup_test1.a= dedup_test2.e WHERE (a) IN (SELECT a FROM dedup_test3);
-- Test planner to check if it optimizes the join and marks it as a dummy join
EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND dedup_test3.b IN (SELECT b FROM dedup_test1);
EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND dedup_test3.b IN (SELECT a FROM dedup_test1);
EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND EXISTS (SELECT b FROM dedup_test1) AND dedup_test3.b IN (SELECT b FROM dedup_test1);
-- start_ignore
DROP TABLE IF EXISTS dedup_test1;
DROP TABLE IF EXISTS dedup_test2;
DROP TABLE IF EXISTS dedup_test3;
-- end_ignore
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册