-- -- NOTIN -- Test NOTIN clauses -- create schema notin; set search_path=notin; -- -- generate a bunch of tables -- create table t1 ( c1 integer ); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table t2 ( c2 integer ); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c2' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table t3 ( c3 integer ); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c3' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table t4 ( c4 integer ); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c4' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table t1n ( c1n integer ); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1n' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table g1 ( a integer, b integer, c integer ); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table l1 ( w integer, x integer, y integer, z integer ); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'w' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -- -- stick in some values -- insert into t1 values (generate_series (1,10)); insert into t2 values (generate_series (1,5)); insert into t3 values (1), (2), (3); insert into t4 values (1), (2); insert into t1n values (1), (2), (3), (null), (5), (6), (7); insert into g1 values (1,1,1), (1,1,2), (1,2,2), (2,2,2), (2,2,3), (2,3,3), (3,3,3), (3,3,3), (3,3,4), (3,4,4), (4,4,4); insert into l1 values (generate_series (1,10), generate_series (1,10), generate_series (1,10), generate_series (1,10)); -- -- queries -- -- --q1 -- explain select c1 from t1 where c1 not in (select c2 from t2); QUERY PLAN ------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice2; segments: 3) (cost=2.49..5.68 rows=4 width=4) -> Hash Left Anti Semi Join (Not-In) (cost=2.49..5.68 rows=2 width=4) Hash Cond: t1.c1 = "NotIn_SUBQUERY".c2 -> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4) -> Hash (cost=2.30..2.30 rows=5 width=4) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..2.30 rows=5 width=4) -> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..2.10 rows=2 width=4) -> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4) Settings: optimizer=off Optimizer status: legacy query optimizer (10 rows) select c1 from t1 where c1 not in (select c2 from t2); c1 ---- 6 8 10 7 9 (5 rows) -- --q2 -- explain select c1 from t1 where c1 not in (select c2 from t2 where c2 > 2 and c2 not in (select c3 from t3)); QUERY PLAN ------------------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice3; segments: 3) (cost=4.71..7.89 rows=4 width=4) -> Hash Left Anti Semi Join (Not-In) (cost=4.71..7.89 rows=2 width=4) Hash Cond: t1.c1 = "NotIn_SUBQUERY".c2 -> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4) -> Hash (cost=4.58..4.58 rows=4 width=4) -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=2.29..4.58 rows=4 width=4) -> Subquery Scan "NotIn_SUBQUERY" (cost=2.29..4.45 rows=2 width=4) -> Hash Left Anti Semi Join (Not-In) (cost=2.29..4.41 rows=2 width=4) Hash Cond: t2.c2 = "NotIn_SUBQUERY".c3 -> Seq Scan on t2 (cost=0.00..2.06 rows=2 width=4) Filter: c2 > 2 -> Hash (cost=2.18..2.18 rows=3 width=4) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..2.18 rows=3 width=4) -> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..2.06 rows=1 width=4) -> Seq Scan on t3 (cost=0.00..2.03 rows=1 width=4) Settings: optimizer=off Optimizer status: legacy query optimizer (17 rows) select c1 from t1 where c1 not in (select c2 from t2 where c2 > 2 and c2 not in (select c3 from t3)); c1 ---- 1 3 7 9 2 6 8 10 (8 rows) -- --q3 -- explain select c1 from t1 where c1 not in (select c2 from t2 where c2 not in (select c3 from t3 where c3 not in (select c4 from t4))); QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice4; segments: 3) (cost=5.98..9.17 rows=4 width=4) -> Hash Left Anti Semi Join (Not-In) (cost=5.98..9.17 rows=2 width=4) Hash Cond: t1.c1 = "NotIn_SUBQUERY".c2 -> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4) -> Hash (cost=5.86..5.86 rows=4 width=4) -> Broadcast Motion 3:3 (slice3; segments: 3) (cost=3.57..5.86 rows=4 width=4) -> Subquery Scan "NotIn_SUBQUERY" (cost=3.57..5.73 rows=2 width=4) -> Hash Left Anti Semi Join (Not-In) (cost=3.57..5.69 rows=2 width=4) Hash Cond: t2.c2 = "NotIn_SUBQUERY".c3 -> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4) -> Hash (cost=3.45..3.45 rows=4 width=4) -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=1.20..3.45 rows=4 width=4) -> Subquery Scan "NotIn_SUBQUERY" (cost=1.20..3.31 rows=2 width=4) -> Hash Left Anti Semi Join (Not-In) (cost=1.20..3.28 rows=2 width=4) Hash Cond: t3.c3 = "NotIn_SUBQUERY".c4 -> Seq Scan on t3 (cost=0.00..2.03 rows=1 width=4) -> Hash (cost=1.12..1.12 rows=2 width=4) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..1.12 rows=2 width=4) -> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..1.04 rows=1 width=4) -> Seq Scan on t4 (cost=0.00..1.02 rows=1 width=4) Settings: optimizer=off Optimizer status: legacy query optimizer (22 rows) select c1 from t1 where c1 not in (select c2 from t2 where c2 not in (select c3 from t3 where c3 not in (select c4 from t4))); c1 ---- 6 8 10 3 7 9 (6 rows) -- --q4 -- explain select c1 from t1, (select c2 from t2 where c2 not in (select c3 from t3)) foo where c1 = foo.c2; QUERY PLAN ------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice2; segments: 3) (cost=4.45..7.62 rows=4 width=4) -> Hash Join (cost=4.45..7.62 rows=2 width=4) Hash Cond: t1.c1 = t2.c2 -> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4) -> Hash (cost=4.41..4.41 rows=2 width=4) -> Hash Left Anti Semi Join (Not-In) (cost=2.29..4.41 rows=2 width=4) Hash Cond: t2.c2 = "NotIn_SUBQUERY".c3 -> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4) -> Hash (cost=2.18..2.18 rows=3 width=4) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..2.18 rows=3 width=4) -> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..2.06 rows=1 width=4) -> Seq Scan on t3 (cost=0.00..2.03 rows=1 width=4) Settings: optimizer=off Optimizer status: legacy query optimizer (14 rows) select c1 from t1, (select c2 from t2 where c2 not in (select c3 from t3)) foo where c1 = foo.c2; c1 ---- 5 4 (2 rows) -- --q5 -- explain select c1 from t1, (select c2 from t2 where c2 not in (select c3 from t3) and c2 > 4) foo where c1 = foo.c2; QUERY PLAN ------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice2; segments: 3) (cost=4.44..7.62 rows=4 width=4) -> Hash Join (cost=4.44..7.62 rows=2 width=4) Hash Cond: t1.c1 = t2.c2 -> Seq Scan on t1 (cost=0.00..3.12 rows=3 width=4) Filter: c1 > 4 -> Hash (cost=4.40..4.40 rows=2 width=4) -> Hash Left Anti Semi Join (Not-In) (cost=2.29..4.40 rows=2 width=4) Hash Cond: t2.c2 = "NotIn_SUBQUERY".c3 -> Seq Scan on t2 (cost=0.00..2.06 rows=1 width=4) Filter: c2 > 4 -> Hash (cost=2.18..2.18 rows=3 width=4) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..2.18 rows=3 width=4) -> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..2.06 rows=1 width=4) -> Seq Scan on t3 (cost=0.00..2.03 rows=1 width=4) Settings: optimizer=off Optimizer status: legacy query optimizer (16 rows) select c1 from t1, (select c2 from t2 where c2 not in (select c3 from t3) and c2 > 4) foo where c1 = foo.c2; c1 ---- 5 (1 row) -- --q6 -- explain select c1 from t1 where c1 not in (select c2 from t2) and c1 > 1; QUERY PLAN ------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice2; segments: 3) (cost=2.49..5.71 rows=4 width=4) -> Hash Left Anti Semi Join (Not-In) (cost=2.49..5.71 rows=2 width=4) Hash Cond: t1.c1 = "NotIn_SUBQUERY".c2 -> Seq Scan on t1 (cost=0.00..3.12 rows=4 width=4) Filter: c1 > 1 -> Hash (cost=2.30..2.30 rows=5 width=4) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..2.30 rows=5 width=4) -> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..2.10 rows=2 width=4) -> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4) Settings: optimizer=off Optimizer status: legacy query optimizer (11 rows) select c1 from t1 where c1 not in (select c2 from t2) and c1 > 1; c1 ---- 7 9 6 8 10 (5 rows) -- --q7 -- explain select c1 from t1 where c1 > 6 and c1 not in (select c2 from t2) and c1 < 10; QUERY PLAN ------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice2; segments: 3) (cost=2.49..5.70 rows=4 width=4) -> Hash Left Anti Semi Join (Not-In) (cost=2.49..5.70 rows=2 width=4) Hash Cond: t1.c1 = "NotIn_SUBQUERY".c2 -> Seq Scan on t1 (cost=0.00..3.15 rows=2 width=4) Filter: c1 > 6 AND c1 < 10 -> Hash (cost=2.30..2.30 rows=5 width=4) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..2.30 rows=5 width=4) -> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..2.10 rows=2 width=4) -> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4) Settings: optimizer=off Optimizer status: legacy query optimizer (11 rows) select c1 from t1 where c1 > 6 and c1 not in (select c2 from t2) and c1 < 10; c1 ---- 7 9 8 (3 rows) -- --q8 introduce join -- explain select c1 from t1,t2 where c1 not in (select c3 from t3) and c1 = c2; QUERY PLAN ------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice2; segments: 3) (cost=4.40..7.65 rows=4 width=4) -> Hash Left Anti Semi Join (Not-In) (cost=4.40..7.65 rows=2 width=4) Hash Cond: t1.c1 = "NotIn_SUBQUERY".c3 -> Hash Join (cost=2.11..5.30 rows=2 width=4) Hash Cond: t1.c1 = t2.c2 -> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4) -> Hash (cost=2.05..2.05 rows=2 width=4) -> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4) -> Hash (cost=2.18..2.18 rows=3 width=4) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..2.18 rows=3 width=4) -> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..2.06 rows=1 width=4) -> Seq Scan on t3 (cost=0.00..2.03 rows=1 width=4) Settings: optimizer=off Optimizer status: legacy query optimizer (14 rows) select c1 from t1,t2 where c1 not in (select c3 from t3) and c1 = c2; c1 ---- 4 5 (2 rows) -- --q9 -- select c1 from t1 where c1 not in (select c2 from t2 where c2 > 2 and c2 < 5); c1 ---- 1 5 7 9 2 6 8 10 (8 rows) -- --q10 -- select count(c1) from t1 where c1 not in (select sum(c2) from t2); count ------- 10 (1 row) -- --q11 -- select c1 from t1 where c1 not in (select count(*) from t1); c1 ---- 1 3 5 7 9 2 4 6 8 (9 rows) -- --q12 -- select a,b from g1 where (a,b) not in (select a,b from g1); a | b ---+--- (0 rows) -- --q13 -- explain select x,y from l1 where (x,y) not in (select distinct y, sum(x) from l1 group by y having y < 4 order by y) order by 1,2; QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice4; segments: 3) (cost=7.44..7.46 rows=10 width=8) Merge Key: notin.l1.x, notin.l1.y -> Sort (cost=7.44..7.46 rows=4 width=8) Sort Key: notin.l1.x, notin.l1.y -> Nested Loop Left Anti Semi Join (Not-In) (cost=3.42..7.27 rows=4 width=8) Join Filter: notin.l1.x = "NotIn_SUBQUERY".y AND notin.l1.y = "NotIn_SUBQUERY".sum -> Seq Scan on l1 (cost=0.00..3.10 rows=4 width=8) -> Materialize (cost=3.42..3.45 rows=1 width=12) -> Broadcast Motion 3:3 (slice3; segments: 3) (cost=3.36..3.42 rows=1 width=12) -> Subquery Scan "NotIn_SUBQUERY" (cost=3.36..3.38 rows=1 width=12) -> Unique (cost=3.36..3.37 rows=1 width=16) Group By: notin.l1.y, (pg_catalog.sum((sum(notin.l1.x)))) -> Sort (cost=3.36..3.37 rows=1 width=16) Sort Key (Distinct): notin.l1.y, (pg_catalog.sum((sum(notin.l1.x)))) -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=3.31..3.35 rows=1 width=16) Hash Key: notin.l1.y, (pg_catalog.sum((sum(notin.l1.x)))) -> Unique (cost=3.31..3.33 rows=1 width=16) Group By: notin.l1.y, (pg_catalog.sum((sum(notin.l1.x)))) -> Sort (cost=3.31..3.32 rows=1 width=16) Sort Key (Distinct): notin.l1.y, (pg_catalog.sum((sum(notin.l1.x)))) -> HashAggregate (cost=3.25..3.28 rows=1 width=16) Group By: notin.l1.y -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=3.14..3.20 rows=1 width=12) Hash Key: notin.l1.y -> HashAggregate (cost=3.14..3.14 rows=1 width=12) Group By: notin.l1.y -> Seq Scan on l1 (cost=0.00..3.12 rows=2 width=8) Filter: y < 4 Settings: optimizer=off Optimizer status: legacy query optimizer (30 rows) select x,y from l1 where (x,y) not in (select distinct y, sum(x) from l1 group by y having y < 4 order by y) order by 1,2; x | y ----+---- 4 | 4 5 | 5 6 | 6 7 | 7 8 | 8 9 | 9 10 | 10 (7 rows) -- --q14 -- explain select * from g1 where (a,b,c) not in (select x,y,z from l1); QUERY PLAN ------------------------------------------------------------------------------------------------------------ Gather Motion 3:1 (slice2; segments: 3) (cost=3.63..14.81 rows=11 width=12) -> Nested Loop Left Anti Semi Join (Not-In) (cost=3.63..14.81 rows=4 width=12) Join Filter: g1.a = "NotIn_SUBQUERY".x AND g1.b = "NotIn_SUBQUERY".y AND g1.c = "NotIn_SUBQUERY".z -> Seq Scan on g1 (cost=0.00..2.11 rows=4 width=12) -> Materialize (cost=3.63..3.93 rows=10 width=12) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..3.60 rows=10 width=12) -> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..3.20 rows=4 width=12) -> Seq Scan on l1 (cost=0.00..3.10 rows=4 width=12) Settings: optimizer=off Optimizer status: legacy query optimizer (10 rows) select * from g1 where (a,b,c) not in (select x,y,z from l1); a | b | c ---+---+--- 1 | 1 | 2 1 | 2 | 2 3 | 3 | 4 3 | 4 | 4 2 | 2 | 3 2 | 3 | 3 (6 rows) -- --q15 -- explain select c1 from t1, t2 where c1 not in (select c3 from t3 where c3 = c1) and c1 = c2; QUERY PLAN --------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice2; segments: 3) (cost=2.11..15.49 rows=4 width=4) -> Hash Join (cost=2.11..15.49 rows=2 width=4) Hash Cond: t1.c1 = t2.c2 -> Seq Scan on t1 (cost=0.00..13.32 rows=2 width=4) Filter: (subplan) SubPlan 1 -> Result (cost=2.04..2.05 rows=1 width=4) Filter: t3.c3 = $0 -> Materialize (cost=2.04..2.05 rows=1 width=4) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..2.04 rows=1 width=4) -> Seq Scan on t3 (cost=0.00..2.04 rows=1 width=4) -> Hash (cost=2.05..2.05 rows=2 width=4) -> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4) Settings: optimizer=off Optimizer status: legacy query optimizer (15 rows) select c1 from t1, t2 where c1 not in (select c3 from t3 where c3 = c1) and c1 = c2; c1 ---- 5 4 (2 rows) -- --q17 -- null test -- select c1 from t1 where c1 not in (select c1n from t1n); c1 ---- (0 rows) -- --q18 -- null test -- select c1 from t1 where c1 not in (select c2 from t2 where c2 not in (select c3 from t3 where c3 not in (select c1n from t1n))); c1 ---- 7 9 6 8 10 (5 rows) -- --q19 -- select c1 from t1 join t2 on c1 = c2 where c1 not in (select c3 from t3); c1 ---- 5 4 (2 rows) -- --q20 -- explain select c1 from t1 where c1 not in (select sum(c2) as s from t2 where c2 > 2 group by c2 having c2 > 3); QUERY PLAN ------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice2; segments: 3) (cost=2.29..5.46 rows=4 width=4) -> Hash Left Anti Semi Join (Not-In) (cost=2.29..5.46 rows=2 width=4) Hash Cond: t1.c1 = "NotIn_SUBQUERY".s -> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4) -> Hash (cost=2.21..2.21 rows=2 width=8) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=2.09..2.21 rows=2 width=8) -> Subquery Scan "NotIn_SUBQUERY" (cost=2.09..2.13 rows=1 width=8) -> HashAggregate (cost=2.09..2.11 rows=1 width=16) Group By: t2.c2 -> Seq Scan on t2 (cost=0.00..2.08 rows=1 width=4) Filter: c2 > 2 AND c2 > 3 Settings: optimizer=off Optimizer status: legacy query optimizer (13 rows) select c1 from t1 where c1 not in (select sum(c2) as s from t2 where c2 > 2 group by c2 having c2 > 3); c1 ---- 1 3 7 9 2 6 8 10 (8 rows) -- --q21 -- multiple not in in where clause -- select c1 from t1 where c1 not in (select c2 from t2) and c1 not in (select c3 from t3); c1 ---- 7 9 6 8 10 (5 rows) -- --q22 -- coexist with joins -- select c1 from t1,t3,t2 where c1 not in (select c4 from t4) and c1 = c3 and c1 = c2; c1 ---- 3 (1 row) -- --q23 -- union in subselect -- select c1 from t1 where c1 not in (select c2 from t2 union select c3 from t3); c1 ---- 6 8 10 7 9 (5 rows) -- --q24 -- select c1 from t1 where c1 not in (select c2 from t2 union all select c3 from t3); c1 ---- 7 9 6 8 10 (5 rows) -- --q25 -- select c1 from t1 where c1 not in (select (case when c1n is null then 1 else c1n end) as c1n from t1n); c1 ---- 9 4 8 10 (4 rows) -- --q26 -- explain select (case when c1%2 = 0 then (select sum(c2) from t2 where c2 not in (select c3 from t3)) else (select sum(c3) from t3 where c3 not in (select c4 from t4)) end) as foo from t1; QUERY PLAN --------------------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice5; segments: 3) (cost=7.86..11.01 rows=10 width=4) -> Seq Scan on t1 (cost=7.86..11.01 rows=4 width=4) InitPlan (slice6) -> Aggregate (cost=4.49..4.50 rows=1 width=8) -> Gather Motion 3:1 (slice2; segments: 3) (cost=4.42..4.47 rows=1 width=8) -> Aggregate (cost=4.42..4.43 rows=1 width=8) -> Hash Left Anti Semi Join (Not-In) (cost=2.29..4.41 rows=2 width=4) Hash Cond: t2.c2 = "NotIn_SUBQUERY".c3 -> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4) -> Hash (cost=2.18..2.18 rows=3 width=4) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..2.18 rows=3 width=4) -> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..2.06 rows=1 width=4) -> Seq Scan on t3 (cost=0.00..2.03 rows=1 width=4) InitPlan (slice7) -> Aggregate (cost=3.36..3.37 rows=1 width=8) -> Gather Motion 3:1 (slice4; segments: 3) (cost=3.29..3.34 rows=1 width=8) -> Aggregate (cost=3.29..3.30 rows=1 width=8) -> Hash Left Anti Semi Join (Not-In) (cost=1.20..3.28 rows=2 width=4) Hash Cond: notin.t3.c3 = "NotIn_SUBQUERY".c4 -> Seq Scan on t3 (cost=0.00..2.03 rows=1 width=4) -> Hash (cost=1.12..1.12 rows=2 width=4) -> Broadcast Motion 3:3 (slice3; segments: 3) (cost=0.00..1.12 rows=2 width=4) -> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..1.04 rows=1 width=4) -> Seq Scan on t4 (cost=0.00..1.02 rows=1 width=4) Settings: optimizer=off Optimizer status: legacy query optimizer (26 rows) select (case when c1%2 = 0 then (select sum(c2) from t2 where c2 not in (select c3 from t3)) else (select sum(c3) from t3 where c3 not in (select c4 from t4)) end) as foo from t1; foo ----- 9 9 9 9 9 3 3 3 3 3 (10 rows) -- --q27 -- explain select c1 from t1 where not c1 >= some (select c2 from t2); QUERY PLAN ------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice2; segments: 3) (cost=2.31..8.79 rows=4 width=4) -> Nested Loop Left Anti Semi Join (Not-In) (cost=2.31..8.79 rows=2 width=4) Join Filter: t1.c1 >= "NotIn_SUBQUERY".c2 -> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4) -> Materialize (cost=2.31..2.46 rows=5 width=4) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..2.30 rows=5 width=4) -> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..2.10 rows=2 width=4) -> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4) Settings: optimizer=off Optimizer status: legacy query optimizer (10 rows) select c1 from t1 where not c1 >= some (select c2 from t2); c1 ---- (0 rows) -- --q28 -- explain select c2 from t2 where not c2 < all (select c2 from t2); QUERY PLAN ------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice2; segments: 3) (cost=2.27..6.00 rows=9 width=4) -> Nested Loop EXISTS Join (cost=2.27..6.00 rows=3 width=4) Join Filter: notin.t2.c2 >= notin.t2.c2 -> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4) -> Materialize (cost=2.27..2.42 rows=5 width=4) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..2.25 rows=5 width=4) -> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4) Settings: optimizer=off Optimizer status: legacy query optimizer (9 rows) select c2 from t2 where not c2 < all (select c2 from t2); c2 ---- 2 4 1 3 5 (5 rows) -- --q29 -- explain select c3 from t3 where not c3 <> any (select c4 from t4); QUERY PLAN ------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice2; segments: 3) (cost=1.13..3.56 rows=4 width=4) -> Nested Loop Left Anti Semi Join (Not-In) (cost=1.13..3.56 rows=2 width=4) Join Filter: t3.c3 <> "NotIn_SUBQUERY".c4 -> Seq Scan on t3 (cost=0.00..2.03 rows=1 width=4) -> Materialize (cost=1.13..1.19 rows=2 width=4) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..1.12 rows=2 width=4) -> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..1.04 rows=1 width=4) -> Seq Scan on t4 (cost=0.00..1.02 rows=1 width=4) Settings: optimizer=off Optimizer status: legacy query optimizer (10 rows) select c3 from t3 where not c3 <> any (select c4 from t4); c3 ---- (0 rows) -- --q31 -- explain select c1 from t1 where c1 not in (select c2 from t2 order by c2 limit 3) order by c1; QUERY PLAN ---------------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice3; segments: 3) (cost=5.65..5.66 rows=4 width=4) Merge Key: t1.c1 -> Sort (cost=5.65..5.66 rows=2 width=4) Sort Key: t1.c1 -> Hash Left Anti Semi Join (Not-In) (cost=2.44..5.62 rows=2 width=4) Hash Cond: t1.c1 = "NotIn_SUBQUERY".c2 -> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4) -> Hash (cost=2.33..2.33 rows=3 width=4) -> Broadcast Motion 1:3 (slice2; segments: 1) (cost=2.11..2.33 rows=9 width=4) -> Subquery Scan "NotIn_SUBQUERY" (cost=2.11..2.21 rows=3 width=4) -> Limit (cost=2.11..2.18 rows=3 width=4) -> Gather Motion 3:1 (slice1; segments: 3) (cost=2.11..2.18 rows=3 width=4) Merge Key: t2.c2 -> Limit (cost=2.11..2.12 rows=1 width=4) -> Sort (cost=2.11..2.12 rows=2 width=4) Sort Key: t2.c2 -> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4) Settings: optimizer=off Optimizer status: legacy query optimizer (19 rows) select c1 from t1 where c1 not in (select c2 from t2 order by c2 limit 3) order by c1; c1 ---- 4 5 6 7 8 9 10 (7 rows) --quantified/correlated subqueries -- --q32 -- explain select c1 from t1 where c1 =all (select c2 from t2 where c2 > -1 and c2 <= 1); QUERY PLAN ------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice2; segments: 3) (cost=2.13..5.90 rows=9 width=4) -> Nested Loop Left Anti Semi Join (Not-In) (cost=2.13..5.90 rows=3 width=4) Join Filter: t1.c1 <> "NotIn_SUBQUERY".c2 -> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4) -> Materialize (cost=2.13..2.16 rows=1 width=4) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..2.12 rows=1 width=4) -> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..2.08 rows=1 width=4) -> Seq Scan on t2 (cost=0.00..2.08 rows=1 width=4) Filter: c2 > (-1) AND c2 <= 1 Settings: optimizer=off Optimizer status: legacy query optimizer (11 rows) select c1 from t1 where c1 =all (select c2 from t2 where c2 > -1 and c2 <= 1); c1 ---- 1 (1 row) -- --q33 -- explain select c1 from t1 where c1 <>all (select c2 from t2); QUERY PLAN ------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice2; segments: 3) (cost=2.49..5.68 rows=4 width=4) -> Hash Left Anti Semi Join (Not-In) (cost=2.49..5.68 rows=2 width=4) Hash Cond: t1.c1 = "NotIn_SUBQUERY".c2 -> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4) -> Hash (cost=2.30..2.30 rows=5 width=4) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..2.30 rows=5 width=4) -> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..2.10 rows=2 width=4) -> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4) Settings: optimizer=off Optimizer status: legacy query optimizer (10 rows) select c1 from t1 where c1 <>all (select c2 from t2); c1 ---- 7 9 6 8 10 (5 rows) -- --q34 -- explain select c1 from t1 where c1 <=all (select c2 from t2 where c2 not in (select c1n from t1n)); QUERY PLAN ------------------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice3; segments: 3) (cost=4.99..10.33 rows=4 width=4) -> Nested Loop Left Anti Semi Join (Not-In) (cost=4.99..10.33 rows=2 width=4) Join Filter: t1.c1 > "NotIn_SUBQUERY".c2 -> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4) -> Materialize (cost=4.99..5.09 rows=4 width=4) -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=2.68..4.98 rows=4 width=4) -> Subquery Scan "NotIn_SUBQUERY" (cost=2.68..4.85 rows=2 width=4) -> Hash Left Anti Semi Join (Not-In) (cost=2.68..4.82 rows=2 width=4) Hash Cond: t2.c2 = "NotIn_SUBQUERY".c1n -> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4) -> Hash (cost=2.42..2.42 rows=7 width=4) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..2.42 rows=7 width=4) -> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..2.14 rows=3 width=4) -> Seq Scan on t1n (cost=0.00..2.07 rows=3 width=4) Settings: optimizer=off Optimizer status: legacy query optimizer (16 rows) select c1 from t1 where c1 <=all (select c2 from t2 where c2 not in (select c1n from t1n)); c1 ---- 2 4 6 8 10 1 3 5 7 9 (10 rows) -- --q35 -- explain select c1 from t1 where not c1 =all (select c2 from t2 where not c2 >all (select c3 from t3)); QUERY PLAN ------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice3; segments: 3) (cost=5.43..11.91 rows=45 width=4) -> Nested Loop EXISTS Join (cost=5.43..11.91 rows=15 width=4) Join Filter: t1.c1 <> t2.c2 -> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4) -> Materialize (cost=5.43..5.58 rows=5 width=8) -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=2.05..5.42 rows=5 width=8) -> Nested Loop (cost=2.05..5.22 rows=2 width=8) Join Filter: t2.c2 <= t3.c3 -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..2.15 rows=3 width=4) -> Seq Scan on t3 (cost=0.00..2.03 rows=1 width=4) -> Materialize (cost=2.05..2.10 rows=2 width=4) -> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4) Settings: optimizer=off Optimizer status: legacy query optimizer (14 rows) select c1 from t1 where not c1 =all (select c2 from t2 where not c2 >all (select c3 from t3)); c1 ---- 1 3 5 7 9 2 4 6 8 10 (10 rows) -- --q36 -- explain select c1 from t1 where not c1 <>all (select c1n from t1n where c1n Hash EXISTS Join (cost=9.27..12.44 rows=2 width=4) Hash Cond: t1.c1 = t1n.c1n -> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4) -> Hash (cost=9.23..9.23 rows=2 width=4) -> Seq Scan on t1n (cost=0.00..9.23 rows=2 width=4) Filter: (subplan) SubPlan 1 -> Result (cost=2.04..2.05 rows=1 width=4) Filter: t3.c3 = $0 -> Materialize (cost=2.04..2.05 rows=1 width=4) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..2.04 rows=1 width=4) -> Seq Scan on t3 (cost=0.00..2.04 rows=1 width=4) Settings: optimizer=off Optimizer status: legacy query optimizer (15 rows) select c1 from t1 where not c1 <>all (select c1n from t1n where c1n =all (select c2 from t2 where c2 = c1); QUERY PLAN ---------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (cost=2.11..5.30 rows=4 width=4) -> Hash EXISTS Join (cost=2.11..5.30 rows=2 width=4) Hash Cond: t1.c1 = t2.c2 Join Filter: t1.c1 < t2.c2 -> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4) -> Hash (cost=2.05..2.05 rows=2 width=4) -> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4) Settings: optimizer=off Optimizer status: legacy query optimizer (9 rows) select c1 from t1 where not c1 >=all (select c2 from t2 where c2 = c1); c1 ---- (0 rows) -- --q38 -- explain select c1 from t1 where not exists (select c2 from t2 where c2 = c1); QUERY PLAN ---------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (cost=2.11..5.28 rows=4 width=4) -> Hash Left Anti Semi Join (cost=2.11..5.28 rows=2 width=4) Hash Cond: t1.c1 = t2.c2 -> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4) -> Hash (cost=2.05..2.05 rows=2 width=4) -> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4) Filter: c2 IS NOT NULL Settings: optimizer=off Optimizer status: legacy query optimizer (9 rows) select c1 from t1 where not exists (select c2 from t2 where c2 = c1); c1 ---- 7 9 6 8 10 (5 rows) -- --q39 -- explain select c1 from t1 where not exists (select c2 from t2 where c2 not in (select c3 from t3) and c2 = c1); QUERY PLAN ------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice2; segments: 3) (cost=4.45..7.62 rows=4 width=4) -> Hash Left Anti Semi Join (cost=4.45..7.62 rows=2 width=4) Hash Cond: t1.c1 = t2.c2 -> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4) -> Hash (cost=4.41..4.41 rows=2 width=4) -> Hash Left Anti Semi Join (Not-In) (cost=2.29..4.41 rows=2 width=4) Hash Cond: t2.c2 = "NotIn_SUBQUERY".c3 -> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4) Filter: c2 IS NOT NULL -> Hash (cost=2.18..2.18 rows=3 width=4) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..2.18 rows=3 width=4) -> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..2.06 rows=1 width=4) -> Seq Scan on t3 (cost=0.00..2.03 rows=1 width=4) Settings: optimizer=off Optimizer status: legacy query optimizer (15 rows) select c1 from t1 where not exists (select c2 from t2 where c2 not in (select c3 from t3) and c2 = c1); c1 ---- 1 3 7 9 2 6 8 10 (8 rows) -- --q40 -- explain select c1 from t1 where not exists (select c2 from t2 where exists (select c3 from t3) and c2 <>all (select c3 from t3) and c2 = c1); QUERY PLAN ------------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice3; segments: 3) (cost=5.16..8.33 rows=4 width=4) -> Hash Left Anti Semi Join (cost=5.16..8.33 rows=2 width=4) Hash Cond: t1.c1 = t2.c2 InitPlan (slice4) -> Limit (cost=0.00..0.70 rows=1 width=0) -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..0.70 rows=1 width=0) -> Limit (cost=0.00..0.68 rows=1 width=0) -> Seq Scan on t3 (cost=0.00..2.03 rows=1 width=0) -> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4) -> Hash (cost=4.42..4.42 rows=2 width=4) -> Result (cost=2.29..4.42 rows=2 width=4) One-Time Filter: $0 -> Hash Left Anti Semi Join (Not-In) (cost=2.29..4.42 rows=2 width=4) Hash Cond: t2.c2 = "NotIn_SUBQUERY".c3 -> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4) Filter: c2 IS NOT NULL -> Hash (cost=2.18..2.18 rows=3 width=4) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..2.18 rows=3 width=4) -> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..2.06 rows=1 width=4) -> Seq Scan on t3 (cost=0.00..2.03 rows=1 width=4) Settings: optimizer=off Optimizer status: legacy query optimizer (22 rows) select c1 from t1 where not exists (select c2 from t2 where exists (select c3 from t3) and c2 <>all (select c3 from t3) and c2 = c1); c1 ---- 2 6 8 10 1 3 7 9 (8 rows) -- --q41 -- select c1 from t1 where c1 not in (select c2 from t2) or c1 = 49; c1 ---- 7 9 6 8 10 (5 rows) -- --q42 -- select c1 from t1 where not not not c1 in (select c2 from t2); c1 ---- 7 9 6 8 10 (5 rows) -- --q43 -- explain select c1 from t1 where c1 not in (select c2 from t2 where c2 > 4) and c1 is not null; QUERY PLAN ------------------------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) (cost=2.09..5.25 rows=4 width=4) -> Hash Left Anti Semi Join (Not-In) (cost=2.09..5.25 rows=2 width=4) Hash Cond: t1.c1 = "NotIn_SUBQUERY".c2 -> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4) Filter: c1 IS NOT NULL -> Hash (cost=2.08..2.08 rows=1 width=4) -> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..2.08 rows=1 width=4) -> Seq Scan on t2 (cost=0.00..2.06 rows=1 width=4) Filter: c2 > 4 Settings: optimizer=off Optimizer status: legacy query optimizer (11 rows) select c1 from t1 where c1 not in (select c2 from t2 where c2 > 4) and c1 is not null; c1 ---- 2 4 6 8 10 1 3 7 9 (9 rows) -- --q44 -- select c1 from t1 where c1 not in (select c2 from t2 where c2 > 4) and c1 > 2; c1 ---- 4 6 8 10 3 7 9 (7 rows) -- Test the null not in an empty set -- null not in an unempty set, always returns false -- null not in an empty set, always returns true -- -- q46 -- create table table_source (c1 varchar(100),c2 varchar(100),c3 varchar(100),c4 varchar(100)); insert into table_source (c1 ,c2 ,c3 ,c4 ) values ('000181202006010000003158',null,'INC','0000000001') ; create table table_source2 as select * from table_source distributed by (c2); create table table_source4 (c1 varchar(100),c2 varchar(100) not null,c3 varchar(100),c4 varchar(100)); insert into table_source4 (c1 ,c2 ,c3 ,c4 ) values ('000181202006010000003158','a','INC','0000000001') ; create table table_config (c1 varchar(10) ,c2 varchar(10) ,PRIMARY KEY (c1)); NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "table_config_pkey" for table "table_config" insert into table_config select i, 'test' from generate_series(1, 1000)i; delete from table_config where gp_segment_id = 0; explain select * from table_source where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test'); QUERY PLAN ------------------------------------------------------------------------------------------------------ Gather Motion 3:1 (slice2; segments: 3) (cost=103.00..104.12 rows=10 width=258) -> Hash Left Anti Semi Join (Not-In) (cost=103.00..104.12 rows=4 width=258) Hash Cond: table_source.c2::text = "NotIn_SUBQUERY".c1::text -> Seq Scan on table_source (cost=0.00..1.01 rows=1 width=258) Filter: c3::text = 'INC'::text AND c4::text = '0000000001'::text -> Hash (cost=65.50..65.50 rows=1000 width=38) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..65.50 rows=1000 width=38) -> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..25.50 rows=334 width=38) -> Seq Scan on table_config (cost=0.00..15.50 rows=334 width=3) Filter: c2::text = 'test'::text Settings: optimizer=off Optimizer status: legacy query optimizer (12 rows) select * from table_source where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test'); c1 | c2 | c3 | c4 ----+----+----+---- (0 rows) explain select * from table_source2 where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test'); QUERY PLAN ------------------------------------------------------------------------------------------------------ Gather Motion 3:1 (slice2; segments: 3) (cost=103.00..104.12 rows=10 width=258) -> Hash Left Anti Semi Join (Not-In) (cost=103.00..104.12 rows=4 width=258) Hash Cond: table_source2.c2::text = "NotIn_SUBQUERY".c1::text -> Seq Scan on table_source2 (cost=0.00..1.01 rows=1 width=258) Filter: c3::text = 'INC'::text AND c4::text = '0000000001'::text -> Hash (cost=65.50..65.50 rows=1000 width=38) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..65.50 rows=1000 width=38) -> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..25.50 rows=334 width=38) -> Seq Scan on table_config (cost=0.00..15.50 rows=334 width=3) Filter: c2::text = 'test'::text Settings: optimizer=off Optimizer status: legacy query optimizer (12 rows) select * from table_source2 where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test'); c1 | c2 | c3 | c4 ----+----+----+---- (0 rows) explain select * from table_source4 where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test'); QUERY PLAN ----------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice2; segments: 3) (cost=38.00..39.14 rows=10 width=42) -> Hash Left Anti Semi Join (Not-In) (cost=38.00..39.14 rows=4 width=42) Hash Cond: table_source4.c2::text = "NotIn_SUBQUERY".c1::text -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..1.03 rows=1 width=42) Hash Key: table_source4.c2 -> Seq Scan on table_source4 (cost=0.00..1.01 rows=1 width=42) Filter: c3::text = 'INC'::text AND c4::text = '0000000001'::text -> Hash (cost=25.50..25.50 rows=334 width=38) -> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..25.50 rows=334 width=38) -> Seq Scan on table_config (cost=0.00..15.50 rows=334 width=3) Filter: c2::text = 'test'::text Settings: optimizer=off Optimizer status: legacy query optimizer (13 rows) select * from table_source4 where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test'); c1 | c2 | c3 | c4 --------------------------+----+-----+------------ 000181202006010000003158 | a | INC | 0000000001 (1 row) reset search_path; drop schema notin cascade; NOTICE: drop cascades to table notin.table_config NOTICE: drop cascades to table notin.table_source4 NOTICE: drop cascades to table notin.table_source2 NOTICE: drop cascades to table notin.table_source NOTICE: drop cascades to table notin.l1 NOTICE: drop cascades to table notin.g1 NOTICE: drop cascades to table notin.t1n NOTICE: drop cascades to table notin.t4 NOTICE: drop cascades to table notin.t3 NOTICE: drop cascades to table notin.t2 NOTICE: drop cascades to table notin.t1