-- Extra GPDB tests for joins. -- Ignore "workfile compresssion is not supported by this build" (see -- 'zlib' test): -- -- start_matchignore -- m/ERROR: workfile compresssion is not supported by this build/ -- end_matchignore -- -- test numeric hash join -- set enable_hashjoin to on; set enable_mergejoin to off; set enable_nestloop to off; create table nhtest (i numeric(10, 2)) distributed by (i); insert into nhtest values(100000.22); insert into nhtest values(300000.19); explain select * from nhtest a join nhtest b using (i); QUERY PLAN ----------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=9) -> Hash Join (cost=0.00..862.00 rows=1 width=9) Hash Cond: nhtest.i = nhtest_1.i -> Seq Scan on nhtest (cost=0.00..431.00 rows=1 width=9) -> Hash (cost=431.00..431.00 rows=1 width=9) -> Seq Scan on nhtest nhtest_1 (cost=0.00..431.00 rows=1 width=9) Optimizer: Pivotal Optimizer (GPORCA) version 2.74.0 (7 rows) select * from nhtest a join nhtest b using (i); i ----------- 100000.22 300000.19 (2 rows) create temp table l(a int); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into l values (1), (1), (2); select * from l l1 join l l2 on l1.a = l2.a left join l l3 on l1.a = l3.a and l1.a = 2 order by 1,2,3; a | a | a ---+---+--- 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 2 | 2 | 2 (5 rows) -- -- test hash join -- create table hjtest (i int, j int) distributed by (i,j); insert into hjtest values(3, 4); select count(*) from hjtest a1, hjtest a2 where a2.i = least (a1.i,4) and a2.j = 4; count ------- 1 (1 row) -- -- Test for correct behavior when there is a Merge Join on top of Materialize -- on top of a Motion : -- 1. Use FULL OUTER JOIN to induce a Merge Join -- 2. Use a large tuple size to induce a Materialize -- 3. Use gp_dist_random() to induce a Redistribute -- set enable_hashjoin to off; set enable_mergejoin to on; set enable_nestloop to off; DROP TABLE IF EXISTS alpha; NOTICE: table "alpha" does not exist, skipping DROP TABLE IF EXISTS theta; NOTICE: table "theta" does not exist, skipping CREATE TABLE alpha (i int, j int) distributed by (i); CREATE TABLE theta (i int, j char(10000000)) distributed by (i); INSERT INTO alpha values (1, 1), (2, 2); INSERT INTO theta values (1, 'f'), (2, 'g'); SELECT * FROM gp_dist_random('alpha') FULL OUTER JOIN gp_dist_random('theta') ON (alpha.i = theta.i) WHERE (alpha.j IS NULL or theta.j IS NULL); i | j | i | j ---+---+---+--- (0 rows) reset enable_hashjoin; reset enable_mergejoin; reset enable_nestloop; -- -- Predicate propagation over equality conditions -- drop schema if exists pred; NOTICE: schema "pred" does not exist, skipping create schema pred; set search_path=pred; create table t1 (x int, y int, z int) distributed by (y); create table t2 (x int, y int, z int) distributed by (x); insert into t1 select i, i, i from generate_series(1,100) i; insert into t2 select * from t1; analyze t1; analyze t2; -- -- infer over equalities -- explain select count(*) from t1,t2 where t1.x = 100 and t1.x = t2.x; QUERY PLAN ------------------------------------------------------------------------------------------------------------ Aggregate (cost=0.00..862.00 rows=1 width=8) -> Hash Join (cost=0.00..862.00 rows=1 width=1) Hash Cond: (t1.x = t2.x) -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=4) -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=4) Filter: (x = 100) -> Hash (cost=431.00..431.00 rows=1 width=4) -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4) -> Seq Scan on t2 (cost=0.00..431.00 rows=1 width=4) Filter: (x = 100) Optimizer: Pivotal Optimizer (GPORCA) version 3.64.0 (11 rows) select count(*) from t1,t2 where t1.x = 100 and t1.x = t2.x; count ------- 1 (1 row) -- -- infer over >= -- explain select * from t1,t2 where t1.x = 100 and t2.x >= t1.x; QUERY PLAN ---------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..1324039.56 rows=34 width=24) -> Nested Loop (cost=0.00..1324039.56 rows=12 width=24) Join Filter: t2.x >= t1.x -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=12) -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=12) Filter: x = 100 -> Seq Scan on t2 (cost=0.00..431.00 rows=34 width=12) Settings: optimizer=on Optimizer status: Pivotal Optimizer (GPORCA) version 2.39.2 (9 rows) select * from t1,t2 where t1.x = 100 and t2.x >= t1.x; x | y | z | x | y | z -----+-----+-----+-----+-----+----- 100 | 100 | 100 | 100 | 100 | 100 (1 row) -- -- multiple inferences -- set optimizer_segments=2; explain select * from t1,t2 where t1.x = 100 and t1.x = t2.y and t1.x <= t2.x; QUERY PLAN ------------------------------------------------------------------------------------------- Hash Join (cost=0.00..862.01 rows=1 width=24) Hash Cond: (t1.x = t2.y) Join Filter: (t1.x <= t2.x) -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=12) -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=12) Filter: (x = 100) -> Hash (cost=431.00..431.00 rows=1 width=12) -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=12) -> Seq Scan on t2 (cost=0.00..431.00 rows=1 width=12) Filter: (y = 100) Optimizer: Pivotal Optimizer (GPORCA) version 3.64.0 (11 rows) reset optimizer_segments; select * from t1,t2 where t1.x = 100 and t1.x = t2.y and t1.x <= t2.x; x | y | z | x | y | z -----+-----+-----+-----+-----+----- 100 | 100 | 100 | 100 | 100 | 100 (1 row) -- -- MPP-18537: hash clause references a constant in outer child target list -- create table hjn_test (i int, j int) distributed by (i,j); insert into hjn_test values(3, 4); create table int4_tbl (f1 int) distributed by (f1); insert into int4_tbl values(123456), (-2147483647), (0), (-123456), (2147483647); select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,4) and hjn_test.j = 4; count ------- 1 (1 row) select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,(array[4])[1]) and hjn_test.j = (array[4])[1]; count ------- 1 (1 row) select count(*) from hjn_test, (select 3 as bar) foo where least (foo.bar,(array[4])[1]) = hjn_test.i and hjn_test.j = (array[4])[1]; count ------- 1 (1 row) select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar, least(4,10)) and hjn_test.j = least(4,10); count ------- 1 (1 row) select * from int4_tbl a join int4_tbl b on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1)); f1 | f1 -------------+------------- 2147483647 | 2147483647 123456 | 123456 -2147483647 | -2147483647 0 | 0 -123456 | -123456 (5 rows) -- Same as the last query, but with a partitioned table (which requires a -- Result node to do projection of the hash expression, as Append is not -- projection-capable) create table part4_tbl (f1 int4) partition by range (f1) (start(-1000000) end (1000000) every (1000000)); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. NOTICE: CREATE TABLE will create partition "part4_tbl_1_prt_1" for table "part4_tbl" NOTICE: CREATE TABLE will create partition "part4_tbl_1_prt_2" for table "part4_tbl" insert into part4_tbl values (-123457), (-123456), (-123455), (-1), (0), (1), (123455), (123456), (123457); select * from part4_tbl a join part4_tbl b on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1)); f1 | f1 ---------+--------- 0 | 0 123456 | 123456 -123456 | -123456 (3 rows) -- -- Test case where a Motion hash key is only needed for the redistribution, -- and not returned in the final result set. There was a bug at one point where -- tjoin.c1 was used as the hash key in a Motion node, but it was not added -- to the sub-plans target list, causing a "variable not found in subplan -- target list" error. -- create table tjoin1(dk integer, id integer) distributed by (dk); create table tjoin2(dk integer, id integer, t text) distributed by (dk); create table tjoin3(dk integer, id integer, t text) distributed by (dk); insert into tjoin1 values (1, 1), (1, 2), (1, 3), (2, 1), (2, 2), (2, 3); insert into tjoin2 values (1, 1, '1-1'), (1, 2, '1-2'), (2, 1, '2-1'), (2, 2, '2-2'); insert into tjoin3 values (1, 1, '1-1'), (2, 1, '2-1'); select tjoin1.id, tjoin2.t, tjoin3.t from tjoin1 left outer join (tjoin2 left outer join tjoin3 on tjoin2.id=tjoin3.id) on tjoin1.id=tjoin3.id; id | t | t ----+-----+----- 3 | | 3 | | 1 | 2-1 | 1-1 1 | 2-1 | 2-1 1 | 1-1 | 1-1 1 | 1-1 | 2-1 2 | | 1 | 2-1 | 1-1 1 | 2-1 | 2-1 1 | 1-1 | 1-1 1 | 1-1 | 2-1 2 | | (12 rows) set enable_hashjoin to off; set optimizer_enable_hashjoin = off; select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,4) and hjn_test.j = 4; count ------- 1 (1 row) select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,(array[4])[1]) and hjn_test.j = (array[4])[1]; count ------- 1 (1 row) select count(*) from hjn_test, (select 3 as bar) foo where least (foo.bar,(array[4])[1]) = hjn_test.i and hjn_test.j = (array[4])[1]; count ------- 1 (1 row) select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar, least(4,10)) and hjn_test.j = least(4,10); count ------- 1 (1 row) select * from int4_tbl a join int4_tbl b on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1)); f1 | f1 -------------+------------- 2147483647 | 2147483647 123456 | 123456 -2147483647 | -2147483647 0 | 0 -123456 | -123456 (5 rows) reset enable_hashjoin; reset optimizer_enable_hashjoin; -- In case of Left Anti Semi Join, if the left rel is empty a dummy join -- should be created drop table if exists foo; NOTICE: table "foo" does not exist, skipping drop table if exists bar; NOTICE: table "bar" does not exist, skipping create table foo (a int, b int) distributed randomly; create table bar (c int, d int) distributed randomly; insert into foo select generate_series(1,10); insert into bar select generate_series(1,10); explain select a from foo where a<1 and a>1 and not exists (select c from bar where c=a); QUERY PLAN ------------------------------------------------ Result (cost=0.00..0.00 rows=0 width=4) -> Result (cost=0.00..0.00 rows=0 width=4) One-Time Filter: false Settings: optimizer=on Optimizer status: Pivotal Optimizer (GPORCA) version 2.39.2 (5 rows) select a from foo where a<1 and a>1 and not exists (select c from bar where c=a); a --- (0 rows) -- The merge join executor code doesn't support LASJ_NOTIN joins. Make sure -- the planner doesn't create an invalid plan with them. create index index_foo on foo (a); create index index_bar on bar (c); set enable_nestloop to off; set enable_hashjoin to off; set enable_mergejoin to on; select * from foo where a not in (select c from bar where c <= 5); a | b ----+--- 7 | 6 | 8 | 9 | 10 | (5 rows) set enable_nestloop to off; set enable_hashjoin to on; set enable_mergejoin to off; create table dept ( id int, pid int, name char(40) ); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into dept values(3, 0, 'root'); insert into dept values(4, 3, '2<-1'); insert into dept values(5, 4, '3<-2<-1'); insert into dept values(6, 4, '4<-2<-1'); insert into dept values(7, 3, '5<-1'); insert into dept values(8, 7, '5<-1'); insert into dept select i, i % 6 + 3 from generate_series(9,50) as i; insert into dept select i, 99 from generate_series(100,15000) as i; ANALYZE dept; -- Test rescannable hashjoin with spilling hashtable set statement_mem='1000kB'; set gp_workfile_compression = off; WITH RECURSIVE subdept(id, parent_department, name) AS ( -- non recursive term SELECT * FROM dept WHERE name = 'root' UNION ALL -- recursive term SELECT d.* FROM dept AS d, subdept AS sd WHERE d.pid = sd.id ) SELECT count(*) FROM subdept; count ------- 48 (1 row) -- Test rescannable hashjoin with spilling hashtable, with compression set gp_workfile_compression = on; WITH RECURSIVE subdept(id, parent_department, name) AS ( -- non recursive term SELECT * FROM dept WHERE name = 'root' UNION ALL -- recursive term SELECT d.* FROM dept AS d, subdept AS sd WHERE d.pid = sd.id ) SELECT count(*) FROM subdept; count ------- 48 (1 row) -- Test rescannable hashjoin with in-memory hashtable reset statement_mem; WITH RECURSIVE subdept(id, parent_department, name) AS ( -- non recursive term SELECT * FROM dept WHERE name = 'root' UNION ALL -- recursive term SELECT d.* FROM dept AS d, subdept AS sd WHERE d.pid = sd.id ) SELECT count(*) FROM subdept; count ------- 48 (1 row) -- MPP-29458 -- When we join on a clause with two different types. If one table distribute by one type, the query plan -- will redistribute data on another type. But the has values of two types would not be equal. The data will -- redistribute to wrong segments. create table test_timestamp_t1 (id numeric(10,0) ,field_dt date) distributed by (id); create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone) distributed by (id,field_tms); insert into test_timestamp_t1 values(10 ,'2018-1-10'); insert into test_timestamp_t1 values(11 ,'2018-1-11'); insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp); insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp); -- Test nest loop redistribute keys set enable_nestloop to on; set enable_hashjoin to on; set enable_mergejoin to on; select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms; count ------- 2 (1 row) -- Test hash join redistribute keys set enable_nestloop to off; set enable_hashjoin to on; set enable_mergejoin to on; select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms; count ------- 2 (1 row) drop table test_timestamp_t1; drop table test_timestamp_t2; -- Test merge join redistribute keys create table test_timestamp_t1 (id numeric(10,0) ,field_dt date) distributed randomly; create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone) distributed by (field_tms); insert into test_timestamp_t1 values(10 ,'2018-1-10'); insert into test_timestamp_t1 values(11 ,'2018-1-11'); insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp); insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp); select * from test_timestamp_t1 t1 full outer join test_timestamp_t2 t2 on T1.id = T2.id and T1.field_dt = t2.field_tms; id | field_dt | id | field_tms ----+------------+----+-------------------------- 10 | 01-10-2018 | 10 | Wed Jan 10 00:00:00 2018 11 | 01-11-2018 | 11 | Thu Jan 11 00:00:00 2018 (2 rows) -- test float type set enable_nestloop to off; set enable_hashjoin to on; set enable_mergejoin to on; create table test_float1(id int, data float4) DISTRIBUTED BY (data); create table test_float2(id int, data float8) DISTRIBUTED BY (data); insert into test_float1 values(1, 10), (2, 20); insert into test_float2 values(3, 10), (4, 20); select t1.id, t1.data, t2.id, t2.data from test_float1 t1, test_float2 t2 where t1.data = t2.data; id | data | id | data ----+------+----+------ 2 | 20 | 4 | 20 1 | 10 | 3 | 10 (2 rows) -- test int type create table test_int1(id int, data int4) DISTRIBUTED BY (data); create table test_int2(id int, data int8) DISTRIBUTED BY (data); insert into test_int1 values(1, 10), (2, 20); insert into test_int2 values(3, 10), (4, 20); select t1.id, t1.data, t2.id, t2.data from test_int1 t1, test_int2 t2 where t1.data = t2.data; id | data | id | data ----+------+----+------ 1 | 10 | 3 | 10 2 | 20 | 4 | 20 (2 rows) -- Test to ensure that for full outer join on varchar columns, planner is successful in finding a sort operator in the catalog create table input_table(a varchar(30), b varchar(30)) distributed by (a); set enable_hashjoin = off; explain (costs off) select X.a from input_table X full join (select a from input_table) Y ON X.a = Y.a; QUERY PLAN ----------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> Merge Full Join Merge Cond: ((x.a)::text = (input_table.a)::text) -> Sort Sort Key: x.a -> Seq Scan on input_table x -> Sort Sort Key: input_table.a -> Seq Scan on input_table Optimizer: Postgres query optimizer (10 rows) -- Cleanup reset enable_hashjoin; set client_min_messages='warning'; -- silence drop-cascade NOTICEs drop schema pred cascade; reset search_path; -- github issue 5370 cases drop table if exists t5370; NOTICE: table "t5370" does not exist, skipping drop table if exists t5370_2; NOTICE: table "t5370_2" does not exist, skipping create table t5370(id int,name text) distributed by(id); insert into t5370 select i,i from generate_series(1,1000) i; create table t5370_2 as select * from t5370 distributed by (id); analyze t5370_2; analyze t5370; explain select * from t5370 a , t5370_2 b where a.name=b.name; QUERY PLAN -------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..862.25 rows=1000 width=14) -> Hash Join (cost=0.00..862.20 rows=334 width=14) Hash Cond: t5370.name = t5370_2.name -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..431.02 rows=334 width=7) Hash Key: t5370.name -> Seq Scan on t5370 (cost=0.00..431.01 rows=334 width=7) -> Hash (cost=431.02..431.02 rows=334 width=7) -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.02 rows=334 width=7) Hash Key: t5370_2.name -> Seq Scan on t5370_2 (cost=0.00..431.01 rows=334 width=7) Optimizer: Pivotal Optimizer (GPORCA) version 2.70.2 (11 rows) drop table t5370; drop table t5370_2; -- github issue 6215 cases -- When executing the following plan -- ``` -- Gather Motion 1:1 (slice1; segments: 1) -- -> Merge Full Join -- -> Seq Scan on int4_tbl a -- -> Seq Scan on int4_tbl b --``` -- Greenplum will raise an Assert Fail. -- We force adding a material node for -- merge full join on true. drop table if exists t6215; create table t6215(f1 int4) distributed replicated; insert into t6215(f1) values (1), (2), (3); set enable_material = off; -- The plan still have Material operator explain (costs off) select * from t6215 a full join t6215 b on true; QUERY PLAN ------------------------------------------ Gather Motion 1:1 (slice1; segments: 1) -> Merge Full Join -> Seq Scan on t6215 a -> Materialize -> Seq Scan on t6215 b Optimizer: Postgres query optimizer (6 rows) select * from t6215 a full join t6215 b on true; f1 | f1 ----+---- 1 | 1 1 | 2 1 | 3 2 | 1 2 | 2 2 | 3 3 | 1 3 | 2 3 | 3 (9 rows) drop table t6215; -- -- This tripped an assertion while deciding the locus for the joins. -- The code was failing to handle join between SingleQE and Hash correctly, -- when there were join order restricitions. (see -- https://github.com/greenplum-db/gpdb/issues/6643 -- select a.f1, b.f1, t.thousand, t.tenthous from (select sum(f1) as f1 from int4_tbl i4b) b left outer join (select sum(f1)+1 as f1 from int4_tbl i4a) a ON a.f1 = b.f1 left outer join tenk1 t ON b.f1 = t.thousand and (a.f1+b.f1+999) = t.tenthous; f1 | f1 | thousand | tenthous ----+----+----------+---------- | 0 | | (1 row) -- tests to ensure that join reordering of LOJs and inner joins produces the -- correct join predicates & residual filters drop table if exists t1, t2, t3; CREATE TABLE t1 (a int, b int, c int); CREATE TABLE t2 (a int, b int, c int); CREATE TABLE t3 (a int, b int, c int); INSERT INTO t1 SELECT i, i, i FROM generate_series(1, 1000) i; INSERT INTO t2 SELECT i, i, i FROM generate_series(2, 1000) i; -- start from 2 so that one row from t1 doesn't match INSERT INTO t3 VALUES (1, 2, 3), (NULL, 2, 2); ANALYZE t1; ANALYZE t2; ANALYZE t3; -- ensure plan has a filter over left outer join explain (costs off) select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a IS NULL OR (t1.c = t3.c)); QUERY PLAN --------------------------------------------------------------------------- Gather Motion 3:1 (slice2; segments: 3) -> Result Filter: ((t2.a IS NULL) OR (t1.c = t3.c)) -> Hash Left Join Hash Cond: (t1.a = t2.a) -> Hash Join Hash Cond: (t1.b = t3.b) -> Seq Scan on t1 -> Hash -> Broadcast Motion 3:3 (slice1; segments: 3) -> Seq Scan on t3 -> Hash -> Seq Scan on t2 Optimizer: Pivotal Optimizer (GPORCA) version 3.23.0 (14 rows) select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a IS NULL OR (t1.c = t3.c)); a | b | c | a | b | c | a | b | c ---+---+---+---+---+---+---+---+--- 2 | 2 | 2 | 2 | 2 | 2 | | 2 | 2 (1 row) -- ensure plan has two inner joins with the where clause & join predicates ANDed explain (costs off) select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a = t3.a); QUERY PLAN ------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> Hash Join Hash Cond: ((t1.a = t2.a) AND (t1.b = t3.b)) -> Seq Scan on t1 -> Hash -> Hash Join Hash Cond: (t2.a = t3.a) -> Seq Scan on t2 -> Hash -> Seq Scan on t3 Optimizer: Pivotal Optimizer (GPORCA) version 3.75.0 (11 rows) select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a = t3.a); a | b | c | a | b | c | a | b | c ---+---+---+---+---+---+---+---+--- (0 rows) -- ensure plan has a filter over left outer join explain (costs off) select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a is distinct from t3.a); QUERY PLAN --------------------------------------------------------------------------- Gather Motion 3:1 (slice2; segments: 3) -> Result Filter: (t2.a IS DISTINCT FROM t3.a) -> Hash Left Join Hash Cond: (t1.a = t2.a) -> Hash Join Hash Cond: (t1.b = t3.b) -> Seq Scan on t1 -> Hash -> Broadcast Motion 3:3 (slice1; segments: 3) -> Seq Scan on t3 -> Hash -> Seq Scan on t2 Optimizer: Pivotal Optimizer (GPORCA) version 3.23.0 (14 rows) select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a is distinct from t3.a); a | b | c | a | b | c | a | b | c ---+---+---+---+---+---+---+---+--- 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3 2 | 2 | 2 | 2 | 2 | 2 | | 2 | 2 (2 rows) -- ensure plan has a filter over left outer join explain select * from t3 join (select t1.a t1a, t1.b t1b, t1.c t1c, t2.a t2a, t2.b t2b, t2.c t2c from t1 left join t2 on (t1.a = t2.a)) t on (t1a = t3.a) WHERE (t2a IS NULL OR (t1c = t3.a)); QUERY PLAN ----------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.22 rows=4 width=36) -> Result (cost=0.00..1293.22 rows=2 width=36) Filter: ((t2.a IS NULL) OR (t1.c = t3.a)) -> Hash Left Join (cost=0.00..1293.22 rows=2 width=36) Hash Cond: (t1.a = t2.a) -> Hash Join (cost=0.00..862.08 rows=1 width=24) Hash Cond: (t1.a = t3.a) -> Seq Scan on t1 (cost=0.00..431.01 rows=334 width=12) -> Hash (cost=431.00..431.00 rows=1 width=12) -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=12) -> Hash (cost=431.01..431.01 rows=333 width=12) -> Seq Scan on t2 (cost=0.00..431.01 rows=333 width=12) Optimizer: Pivotal Optimizer (GPORCA) version 3.23.0 (13 rows) select * from t3 join (select t1.a t1a, t1.b t1b, t1.c t1c, t2.a t2a, t2.b t2b, t2.c t2c from t1 left join t2 on (t1.a = t2.a)) t on (t1a = t3.a) WHERE (t2a IS NULL OR (t1c = t3.a)); a | b | c | t1a | t1b | t1c | t2a | t2b | t2c ---+---+---+-----+-----+-----+-----+-----+----- 1 | 2 | 3 | 1 | 1 | 1 | | | (1 row) -- ensure plan has a filter over left outer join explain select * from (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt join t3 on tt.t1b = t3.b join (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt1 on tt1.t1b = t3.b join t3 t3_1 on tt1.t1b = t3_1.b and (tt1.t2a is NULL OR tt1.t1b = t3.b); QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice4; segments: 3) (cost=0.00..2586.40 rows=8 width=56) -> Hash Join (cost=0.00..2586.39 rows=3 width=56) Hash Cond: ((t1.b = t3_1.b) AND (t1_1.b = t3_1.b) AND (t3.b = t3_1.b)) -> Result (cost=0.00..2155.39 rows=2 width=44) Filter: ((t2_1.a IS NULL) OR (t1.b = t3.b)) -> Hash Left Join (cost=0.00..2155.39 rows=2 width=44) Hash Cond: (t1.a = t2_1.a) -> Hash Join (cost=0.00..1724.29 rows=1 width=36) Hash Cond: ((t1.b = t3.b) AND (t1.b = t1_1.b)) -> Seq Scan on t1 (cost=0.00..431.01 rows=334 width=8) -> Hash (cost=1293.18..1293.18 rows=3 width=28) -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1293.18 rows=3 width=28) -> Hash Left Join (cost=0.00..1293.18 rows=1 width=28) Hash Cond: (t1_1.a = t2.a) -> Hash Join (cost=0.00..862.07 rows=1 width=20) Hash Cond: (t1_1.b = t3.b) -> Seq Scan on t1 t1_1 (cost=0.00..431.01 rows=334 width=8) -> Hash (cost=431.00..431.00 rows=2 width=12) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..431.00 rows=2 width=12) -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=12) -> Hash (cost=431.01..431.01 rows=333 width=8) -> Seq Scan on t2 (cost=0.00..431.01 rows=333 width=8) -> Hash (cost=431.01..431.01 rows=333 width=8) -> Seq Scan on t2 t2_1 (cost=0.00..431.01 rows=333 width=8) -> Hash (cost=431.00..431.00 rows=2 width=12) -> Broadcast Motion 3:3 (slice3; segments: 3) (cost=0.00..431.00 rows=2 width=12) -> Seq Scan on t3 t3_1 (cost=0.00..431.00 rows=1 width=12) Optimizer: Pivotal Optimizer (GPORCA) version 3.23.0 (28 rows) select * from (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt join t3 on tt.t1b = t3.b join (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt1 on tt1.t1b = t3.b join t3 t3_1 on tt1.t1b = t3_1.b and (tt1.t2a is NULL OR tt1.t1b = t3.b); t1a | t1b | t2a | t2b | a | b | c | t1a | t1b | t2a | t2b | a | b | c -----+-----+-----+-----+---+---+---+-----+-----+-----+-----+---+---+--- 2 | 2 | 2 | 2 | | 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3 2 | 2 | 2 | 2 | | 2 | 2 | 2 | 2 | 2 | 2 | | 2 | 2 2 | 2 | 2 | 2 | 1 | 2 | 3 | 2 | 2 | 2 | 2 | 1 | 2 | 3 2 | 2 | 2 | 2 | 1 | 2 | 3 | 2 | 2 | 2 | 2 | | 2 | 2 (4 rows) -- test different join order enumeration methods set optimizer_join_order = query; select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b; a | b | c | a | b | c | a | b | c ---+---+---+---+---+---+---+---+--- 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3 2 | 2 | 2 | 2 | 2 | 2 | | 2 | 2 (2 rows) set optimizer_join_order = greedy; select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b; a | b | c | a | b | c | a | b | c ---+---+---+---+---+---+---+---+--- 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3 2 | 2 | 2 | 2 | 2 | 2 | | 2 | 2 (2 rows) set optimizer_join_order = exhaustive; select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b; a | b | c | a | b | c | a | b | c ---+---+---+---+---+---+---+---+--- 2 | 2 | 2 | 2 | 2 | 2 | | 2 | 2 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3 (2 rows) set optimizer_join_order = exhaustive2; select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b; a | b | c | a | b | c | a | b | c ---+---+---+---+---+---+---+---+--- 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3 2 | 2 | 2 | 2 | 2 | 2 | | 2 | 2 (2 rows) reset optimizer_join_order; select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b; a | b | c | a | b | c | a | b | c ---+---+---+---+---+---+---+---+--- 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3 2 | 2 | 2 | 2 | 2 | 2 | | 2 | 2 (2 rows) drop table t1, t2, t3; -- -- Test a bug that nestloop path previously can not generate motion above -- index path, which sometimes is wrong (this test case is an example). -- We now depend on parameterized path related variables to judge instead. -- We conservatively disallow motion when there is parameter requirement -- for either outer or inner at this moment though there could be room -- for further improvement (e.g. referring subplan code to do broadcast -- for base rel if needed, which needs much effort and does not seem to -- be deserved given we will probably refactor related code for the lateral -- support in the near future). For the query and guc settings below, Postgres -- planner can not generate a plan. set enable_nestloop = 1; set enable_material = 0; set enable_seqscan = 0; set enable_bitmapscan = 0; explain select tenk1.unique2 >= 0 from tenk1 left join tenk2 on true limit 1; QUERY PLAN ------------------------------------------------------------------------------------------------------------------------- Limit (cost=0.00..1885517.36 rows=1 width=1) -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..1885517.36 rows=1 width=1) -> Limit (cost=0.00..1885517.36 rows=1 width=1) -> Result (cost=0.00..1885517.36 rows=33336667 width=1) -> Nested Loop Left Join (cost=0.00..1885484.02 rows=33336667 width=4) Join Filter: true -> Seq Scan on tenk1 (cost=0.00..431.51 rows=3334 width=4) -> Materialize (cost=0.00..431.70 rows=10000 width=1) -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..431.69 rows=10000 width=1) -> Seq Scan on tenk2 (cost=0.00..431.50 rows=3334 width=1) Optimizer: Pivotal Optimizer (GPORCA) version 3.23.0 (11 rows) select tenk1.unique2 >= 0 from tenk1 left join tenk2 on true limit 1; ?column? ---------- t (1 row) reset enable_nestloop; reset enable_material; reset enable_seqscan; reset enable_bitmapscan; -- test that flow->hashExpr variables can be resolved CREATE TABLE hexpr_t1 (c1 int, c2 character varying(16)) DISTRIBUTED BY (c1); CREATE TABLE hexpr_t2 (c3 character varying(16)) DISTRIBUTED BY (c3); INSERT INTO hexpr_t1 SELECT i, i::character varying FROM generate_series(1,10)i; INSERT INTO hexpr_t2 SELECT i::character varying FROM generate_series(1,10)i; EXPLAIN SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2 ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text); QUERY PLAN ------------------------------------------------------------------------------------------------------ Result (cost=0.00..862.00 rows=5 width=8) -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..862.00 rows=14 width=2) -> Hash Left Join (cost=0.00..862.00 rows=5 width=2) Hash Cond: (btrim((hexpr_t1.c2)::text) = (hexpr_t2.c3)::text) -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..431.00 rows=4 width=2) Hash Key: btrim((hexpr_t1.c2)::text) -> Seq Scan on hexpr_t1 (cost=0.00..431.00 rows=4 width=2) -> Hash (cost=431.00..431.00 rows=4 width=2) -> Seq Scan on hexpr_t2 (cost=0.00..431.00 rows=4 width=2) Optimizer: Pivotal Optimizer (GPORCA) version 3.80.0 (10 rows) EXPLAIN SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2 ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text); QUERY PLAN ------------------------------------------------------------------------------------------------------ Result (cost=0.00..862.00 rows=5 width=8) -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..862.00 rows=14 width=2) -> Hash Left Join (cost=0.00..862.00 rows=5 width=2) Hash Cond: (btrim((hexpr_t1.c2)::text) = (hexpr_t2.c3)::text) -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..431.00 rows=4 width=2) Hash Key: btrim((hexpr_t1.c2)::text) -> Seq Scan on hexpr_t1 (cost=0.00..431.00 rows=4 width=2) -> Hash (cost=431.00..431.00 rows=4 width=2) -> Seq Scan on hexpr_t2 (cost=0.00..431.00 rows=4 width=2) Optimizer: Pivotal Optimizer (GPORCA) version 3.80.0 (10 rows) EXPLAIN SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2 ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text); QUERY PLAN ------------------------------------------------------------------------------------------------------ Result (cost=0.00..862.00 rows=5 width=8) -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..862.00 rows=14 width=2) -> Hash Left Join (cost=0.00..862.00 rows=5 width=2) Hash Cond: (btrim((hexpr_t1.c2)::text) = (hexpr_t2.c3)::text) -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..431.00 rows=4 width=2) Hash Key: btrim((hexpr_t1.c2)::text) -> Seq Scan on hexpr_t1 (cost=0.00..431.00 rows=4 width=2) -> Hash (cost=431.00..431.00 rows=4 width=2) -> Seq Scan on hexpr_t2 (cost=0.00..431.00 rows=4 width=2) Optimizer: Pivotal Optimizer (GPORCA) version 3.80.0 (10 rows) SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2 ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text); foo ----- 2 3 4 7 5 6 8 9 10 1 (10 rows) SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2 ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text); foo ----- 8 5 6 1 9 10 2 3 4 7 (10 rows) SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2 ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text); foo ----- 9 10 1 2 3 4 7 5 6 8 (10 rows)