join_gp_optimizer.out

-- Extra GPDB tests for joins.
-- Ignore "workfile compresssion is not supported by this build" (see
-- 'zlib' test):
--
-- start_matchignore
-- m/ERROR:  workfile compresssion is not supported by this build/
-- end_matchignore
--
-- test numeric hash join
--
set enable_hashjoin to on;
set enable_mergejoin to off;
set enable_nestloop to off;
create table nhtest (i numeric(10, 2)) distributed by (i);
insert into nhtest values(100000.22);
insert into nhtest values(300000.19);
explain select * from nhtest a join nhtest b using (i);
                                    QUERY PLAN                                     
-----------------------------------------------------------------------------------
 Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..862.00 rows=1 width=9)
   ->  Hash Join  (cost=0.00..862.00 rows=1 width=9)
         Hash Cond: nhtest.i = nhtest_1.i
         ->  Seq Scan on nhtest  (cost=0.00..431.00 rows=1 width=9)
         ->  Hash  (cost=431.00..431.00 rows=1 width=9)
               ->  Seq Scan on nhtest nhtest_1  (cost=0.00..431.00 rows=1 width=9)
 Optimizer: Pivotal Optimizer (GPORCA) version 2.74.0
(7 rows)

select * from nhtest a join nhtest b using (i);
     i     
-----------
 100000.22
 300000.19
(2 rows)

create temp table l(a int);
NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into l values (1), (1), (2);
select * from l l1 join l l2 on l1.a = l2.a left join l l3 on l1.a = l3.a and l1.a = 2 order by 1,2,3;
 a | a | a 
---+---+---
 1 | 1 |  
 1 | 1 |  
 1 | 1 |  
 1 | 1 |  
 2 | 2 | 2
(5 rows)

--
-- test hash join
--
create table hjtest (i int, j int) distributed by (i,j);
insert into hjtest values(3, 4);
select count(*) from hjtest a1, hjtest a2 where a2.i = least (a1.i,4) and a2.j = 4;
 count 
-------
     1
(1 row)

--
-- Test for correct behavior when there is a Merge Join on top of Materialize
-- on top of a Motion :
-- 1. Use FULL OUTER JOIN to induce a Merge Join
-- 2. Use a large tuple size to induce a Materialize
-- 3. Use gp_dist_random() to induce a Redistribute
--
set enable_hashjoin to off;
set enable_mergejoin to on;
set enable_nestloop to off;
DROP TABLE IF EXISTS alpha;
NOTICE:  table "alpha" does not exist, skipping
DROP TABLE IF EXISTS theta;
NOTICE:  table "theta" does not exist, skipping
CREATE TABLE alpha (i int, j int) distributed by (i);
CREATE TABLE theta (i int, j char(10000000)) distributed by (i);
INSERT INTO alpha values (1, 1), (2, 2);
INSERT INTO theta values (1, 'f'), (2, 'g');
SELECT *
FROM gp_dist_random('alpha') FULL OUTER JOIN gp_dist_random('theta')
  ON (alpha.i = theta.i)
WHERE (alpha.j IS NULL or theta.j IS NULL);
 i | j | i | j 
---+---+---+---
(0 rows)

reset enable_hashjoin;
reset enable_mergejoin;
reset enable_nestloop;
--
-- Predicate propagation over equality conditions
--
drop schema if exists pred;
NOTICE:  schema "pred" does not exist, skipping
create schema pred;
set search_path=pred;
create table t1 (x int, y int, z int) distributed by (y);
create table t2 (x int, y int, z int) distributed by (x);
insert into t1 select i, i, i from generate_series(1,100) i;
insert into t2 select * from t1;
analyze t1;
analyze t2;
--
-- infer over equalities
--
explain select count(*) from t1,t2 where t1.x = 100 and t1.x = t2.x;
                                                 QUERY PLAN                                                 
------------------------------------------------------------------------------------------------------------
 Aggregate  (cost=0.00..862.00 rows=1 width=8)
   ->  Hash Join  (cost=0.00..862.00 rows=1 width=1)
         Hash Cond: (t1.x = t2.x)
         ->  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..431.00 rows=1 width=4)
               ->  Seq Scan on t1  (cost=0.00..431.00 rows=1 width=4)
                     Filter: (x = 100)
         ->  Hash  (cost=431.00..431.00 rows=1 width=4)
               ->  Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..431.00 rows=1 width=4)
                     ->  Seq Scan on t2  (cost=0.00..431.00 rows=1 width=4)
                           Filter: (x = 100)
 Optimizer: Pivotal Optimizer (GPORCA) version 3.64.0
(11 rows)

select count(*) from t1,t2 where t1.x = 100 and t1.x = t2.x;
 count 
-------
     1
(1 row)

--
-- infer over >=
--
explain select * from t1,t2 where t1.x = 100 and t2.x >= t1.x;
                                          QUERY PLAN                                          
----------------------------------------------------------------------------------------------
 Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..1324039.56 rows=34 width=24)
   ->  Nested Loop  (cost=0.00..1324039.56 rows=12 width=24)
         Join Filter: t2.x >= t1.x
         ->  Broadcast Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.00 rows=1 width=12)
               ->  Seq Scan on t1  (cost=0.00..431.00 rows=1 width=12)
                     Filter: x = 100
         ->  Seq Scan on t2  (cost=0.00..431.00 rows=34 width=12)
 Settings:  optimizer=on
 Optimizer status: Pivotal Optimizer (GPORCA) version 2.39.2
(9 rows)

select * from t1,t2 where t1.x = 100 and t2.x >= t1.x;
  x  |  y  |  z  |  x  |  y  |  z  
-----+-----+-----+-----+-----+-----
 100 | 100 | 100 | 100 | 100 | 100
(1 row)

--
-- multiple inferences
--
set optimizer_segments=2;
explain select * from t1,t2 where t1.x = 100 and t1.x = t2.y and t1.x <= t2.x;
                                        QUERY PLAN                                         
-------------------------------------------------------------------------------------------
 Hash Join  (cost=0.00..862.01 rows=1 width=24)
   Hash Cond: (t1.x = t2.y)
   Join Filter: (t1.x <= t2.x)
   ->  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..431.00 rows=1 width=12)
         ->  Seq Scan on t1  (cost=0.00..431.00 rows=1 width=12)
               Filter: (x = 100)
   ->  Hash  (cost=431.00..431.00 rows=1 width=12)
         ->  Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..431.00 rows=1 width=12)
               ->  Seq Scan on t2  (cost=0.00..431.00 rows=1 width=12)
                     Filter: (y = 100)
 Optimizer: Pivotal Optimizer (GPORCA) version 3.64.0
(11 rows)

reset optimizer_segments;
select * from t1,t2 where t1.x = 100 and t1.x = t2.y and t1.x <= t2.x;
  x  |  y  |  z  |  x  |  y  |  z  
-----+-----+-----+-----+-----+-----
 100 | 100 | 100 | 100 | 100 | 100
(1 row)

--
-- MPP-18537: hash clause references a constant in outer child target list
--
create table hjn_test (i int, j int) distributed by (i,j);
insert into hjn_test values(3, 4);
create table int4_tbl (f1 int) distributed by (f1);
insert into int4_tbl values(123456), (-2147483647), (0), (-123456), (2147483647);
select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,4) and hjn_test.j = 4;
 count 
-------
     1
(1 row)

select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,(array[4])[1]) and hjn_test.j = (array[4])[1];
 count 
-------
     1
(1 row)

select count(*) from hjn_test, (select 3 as bar) foo where least (foo.bar,(array[4])[1]) = hjn_test.i and hjn_test.j = (array[4])[1];
 count 
-------
     1
(1 row)

select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar, least(4,10)) and hjn_test.j = least(4,10);
 count 
-------
     1
(1 row)

select * from int4_tbl a join int4_tbl b on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1));
     f1      |     f1      
-------------+-------------
  2147483647 |  2147483647
      123456 |      123456
 -2147483647 | -2147483647
           0 |           0
     -123456 |     -123456
(5 rows)

-- Same as the last query, but with a partitioned table (which requires a
-- Result node to do projection of the hash expression, as Append is not
-- projection-capable)
create table part4_tbl (f1 int4) partition by range (f1) (start(-1000000) end (1000000) every (1000000));
NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Greenplum Database data distribution key for this table.
HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
NOTICE:  CREATE TABLE will create partition "part4_tbl_1_prt_1" for table "part4_tbl"
NOTICE:  CREATE TABLE will create partition "part4_tbl_1_prt_2" for table "part4_tbl"
insert into part4_tbl values
       (-123457), (-123456), (-123455),
       (-1), (0), (1),
       (123455), (123456), (123457);
select * from part4_tbl a join part4_tbl b on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1));
   f1    |   f1    
---------+---------
       0 |       0
  123456 |  123456
 -123456 | -123456
(3 rows)

--
-- Test case where a Motion hash key is only needed for the redistribution,
-- and not returned in the final result set. There was a bug at one point where
-- tjoin.c1 was used as the hash key in a Motion node, but it was not added
-- to the sub-plans target list, causing a "variable not found in subplan
-- target list" error.
--
create table tjoin1(dk integer, id integer) distributed by (dk);
create table tjoin2(dk integer, id integer, t text) distributed by (dk);
create table tjoin3(dk integer, id integer, t text) distributed by (dk);
insert into tjoin1 values (1, 1), (1, 2), (1, 3), (2, 1), (2, 2), (2, 3);
insert into tjoin2 values (1, 1, '1-1'), (1, 2, '1-2'), (2, 1, '2-1'), (2, 2, '2-2');
insert into tjoin3 values (1, 1, '1-1'), (2, 1, '2-1');
select tjoin1.id, tjoin2.t, tjoin3.t
from tjoin1
left outer join (tjoin2 left outer join tjoin3 on tjoin2.id=tjoin3.id) on tjoin1.id=tjoin3.id;
 id |  t  |  t  
----+-----+-----
  3 |     | 
  3 |     | 
  1 | 2-1 | 1-1
  1 | 2-1 | 2-1
  1 | 1-1 | 1-1
  1 | 1-1 | 2-1
  2 |     | 
  1 | 2-1 | 1-1
  1 | 2-1 | 2-1
  1 | 1-1 | 1-1
  1 | 1-1 | 2-1
  2 |     | 
(12 rows)

set enable_hashjoin to off;
set optimizer_enable_hashjoin = off;
select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,4) and hjn_test.j = 4;
 count 
-------
     1
(1 row)

select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,(array[4])[1]) and hjn_test.j = (array[4])[1];
 count 
-------
     1
(1 row)

select count(*) from hjn_test, (select 3 as bar) foo where least (foo.bar,(array[4])[1]) = hjn_test.i and hjn_test.j = (array[4])[1];
 count 
-------
     1
(1 row)

select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar, least(4,10)) and hjn_test.j = least(4,10);
 count 
-------
     1
(1 row)

select * from int4_tbl a join int4_tbl b on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1));
     f1      |     f1      
-------------+-------------
  2147483647 |  2147483647
      123456 |      123456
 -2147483647 | -2147483647
           0 |           0
     -123456 |     -123456
(5 rows)

reset enable_hashjoin;
reset optimizer_enable_hashjoin;
-- In case of Left Anti Semi Join, if the left rel is empty a dummy join
-- should be created
drop table if exists foo;
NOTICE:  table "foo" does not exist, skipping
drop table if exists bar;
NOTICE:  table "bar" does not exist, skipping
create table foo (a int, b int) distributed randomly;
create table bar (c int, d int) distributed randomly;
insert into foo select generate_series(1,10);
insert into bar select generate_series(1,10);
explain select a from foo where a<1 and a>1 and not exists (select c from bar where c=a);
                   QUERY PLAN                   
------------------------------------------------
 Result  (cost=0.00..0.00 rows=0 width=4)
   ->  Result  (cost=0.00..0.00 rows=0 width=4)
         One-Time Filter: false
 Settings:  optimizer=on
 Optimizer status: Pivotal Optimizer (GPORCA) version 2.39.2
(5 rows)

select a from foo where a<1 and a>1 and not exists (select c from bar where c=a);
 a 
---
(0 rows)

-- The merge join executor code doesn't support LASJ_NOTIN joins. Make sure
-- the planner doesn't create an invalid plan with them.
create index index_foo on foo (a);
create index index_bar on bar (c);
set enable_nestloop to off;
set enable_hashjoin to off;
set enable_mergejoin to on;
select * from foo where a not in (select c from bar where c <= 5);
 a  | b 
----+---
  7 |  
  6 |  
  8 |  
  9 |  
 10 |  
(5 rows)

set enable_nestloop to off;
set enable_hashjoin to on;
set enable_mergejoin to off;
create table dept
(
	id int,
	pid int,
	name char(40)
);
NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Greenplum Database data distribution key for this table.
HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into dept values(3, 0, 'root');
insert into dept values(4, 3, '2<-1');
insert into dept values(5, 4, '3<-2<-1');
insert into dept values(6, 4, '4<-2<-1');
insert into dept values(7, 3, '5<-1');
insert into dept values(8, 7, '5<-1');
insert into dept select i, i % 6 + 3 from generate_series(9,50) as i;
insert into dept select i, 99 from generate_series(100,15000) as i;
ANALYZE dept;
-- Test rescannable hashjoin with spilling hashtable
set statement_mem='1000kB';
set gp_workfile_compression = off;
WITH RECURSIVE subdept(id, parent_department, name) AS
(
	-- non recursive term
	SELECT * FROM dept WHERE name = 'root'
	UNION ALL
	-- recursive term
	SELECT d.* FROM dept AS d, subdept AS sd
		WHERE d.pid = sd.id
)
SELECT count(*) FROM subdept;
 count 
-------
    48
(1 row)

-- Test rescannable hashjoin with spilling hashtable, with compression
set gp_workfile_compression = on;
WITH RECURSIVE subdept(id, parent_department, name) AS
(
	-- non recursive term
	SELECT * FROM dept WHERE name = 'root'
	UNION ALL
	-- recursive term
	SELECT d.* FROM dept AS d, subdept AS sd
		WHERE d.pid = sd.id
)
SELECT count(*) FROM subdept;
 count 
-------
    48
(1 row)

-- Test rescannable hashjoin with in-memory hashtable
reset statement_mem;
WITH RECURSIVE subdept(id, parent_department, name) AS
(
	-- non recursive term
	SELECT * FROM dept WHERE name = 'root'
	UNION ALL
	-- recursive term
	SELECT d.* FROM dept AS d, subdept AS sd
		WHERE d.pid = sd.id
)
SELECT count(*) FROM subdept;
 count 
-------
    48
(1 row)

-- MPP-29458
-- When we join on a clause with two different types. If one table distribute by one type, the query plan
-- will redistribute data on another type. But the has values of two types would not be equal. The data will
-- redistribute to wrong segments.
create table test_timestamp_t1 (id  numeric(10,0) ,field_dt date) distributed by (id);
create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone) distributed by (id,field_tms);
insert into test_timestamp_t1 values(10 ,'2018-1-10');
insert into test_timestamp_t1 values(11 ,'2018-1-11');
insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp);
insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp);
-- Test nest loop redistribute keys
set enable_nestloop to on;
set enable_hashjoin to on;
set enable_mergejoin to on;
select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms;
 count 
-------
     2
(1 row)

-- Test hash join redistribute keys
set enable_nestloop to off;
set enable_hashjoin to on;
set enable_mergejoin to on;
select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms;
 count 
-------
     2
(1 row)

drop table test_timestamp_t1;
drop table test_timestamp_t2;
-- Test merge join redistribute keys
create table test_timestamp_t1 (id  numeric(10,0) ,field_dt date) distributed randomly;
create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone) distributed by (field_tms);
insert into test_timestamp_t1 values(10 ,'2018-1-10');
insert into test_timestamp_t1 values(11 ,'2018-1-11');
insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp);
insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp);
select * from test_timestamp_t1 t1 full outer join test_timestamp_t2 t2 on T1.id = T2.id and T1.field_dt = t2.field_tms;
 id |  field_dt  | id |        field_tms         
----+------------+----+--------------------------
 10 | 01-10-2018 | 10 | Wed Jan 10 00:00:00 2018
 11 | 01-11-2018 | 11 | Thu Jan 11 00:00:00 2018
(2 rows)

-- test float type
set enable_nestloop to off;
set enable_hashjoin to on;
set enable_mergejoin to on;
create table test_float1(id int, data float4)  DISTRIBUTED BY (data);
create table test_float2(id int, data float8)  DISTRIBUTED BY (data);
insert into test_float1 values(1, 10), (2, 20);
insert into test_float2 values(3, 10), (4, 20);
select t1.id, t1.data, t2.id, t2.data from test_float1 t1, test_float2 t2 where t1.data = t2.data;
 id | data | id | data 
----+------+----+------
  2 |   20 |  4 |   20
  1 |   10 |  3 |   10
(2 rows)

-- test int type
create table test_int1(id int, data int4)  DISTRIBUTED BY (data);
create table test_int2(id int, data int8)  DISTRIBUTED BY (data);
insert into test_int1 values(1, 10), (2, 20);
insert into test_int2 values(3, 10), (4, 20);
select t1.id, t1.data, t2.id, t2.data from test_int1 t1, test_int2 t2 where t1.data = t2.data;
 id | data | id | data 
----+------+----+------
  1 |   10 |  3 |   10
  2 |   20 |  4 |   20
(2 rows)

-- Test to ensure that for full outer join on varchar columns, planner is successful in finding a sort operator in the catalog
create table input_table(a varchar(30), b varchar(30)) distributed by (a);
set enable_hashjoin = off;
explain (costs off) select X.a from input_table X full join (select a from input_table) Y ON X.a = Y.a;
                        QUERY PLAN                         
-----------------------------------------------------------
 Gather Motion 3:1  (slice1; segments: 3)
   ->  Merge Full Join
         Merge Cond: ((x.a)::text = (input_table.a)::text)
         ->  Sort
               Sort Key: x.a
               ->  Seq Scan on input_table x
         ->  Sort
               Sort Key: input_table.a
               ->  Seq Scan on input_table
 Optimizer: Postgres query optimizer
(10 rows)

-- Cleanup
reset enable_hashjoin;
set client_min_messages='warning'; -- silence drop-cascade NOTICEs
drop schema pred cascade;
reset search_path;
-- github issue 5370 cases
drop table if exists t5370;
NOTICE:  table "t5370" does not exist, skipping
drop table if exists t5370_2;
NOTICE:  table "t5370_2" does not exist, skipping
create table t5370(id int,name text) distributed by(id);
insert into t5370 select i,i from  generate_series(1,1000) i;
create table t5370_2 as select * from t5370 distributed by (id);
analyze t5370_2;
analyze t5370;
explain select * from t5370 a , t5370_2 b where a.name=b.name;
                                               QUERY PLAN                                               
--------------------------------------------------------------------------------------------------------
 Gather Motion 3:1  (slice3; segments: 3)  (cost=0.00..862.25 rows=1000 width=14)
   ->  Hash Join  (cost=0.00..862.20 rows=334 width=14)
         Hash Cond: t5370.name = t5370_2.name
         ->  Redistribute Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.02 rows=334 width=7)
               Hash Key: t5370.name
               ->  Seq Scan on t5370  (cost=0.00..431.01 rows=334 width=7)
         ->  Hash  (cost=431.02..431.02 rows=334 width=7)
               ->  Redistribute Motion 3:3  (slice2; segments: 3)  (cost=0.00..431.02 rows=334 width=7)
                     Hash Key: t5370_2.name
                     ->  Seq Scan on t5370_2  (cost=0.00..431.01 rows=334 width=7)
 Optimizer: Pivotal Optimizer (GPORCA) version 2.70.2
(11 rows)

drop table t5370;
drop table t5370_2;
-- github issue 6215 cases
-- When executing the following plan
-- ```
--  Gather Motion 1:1  (slice1; segments: 1)
--    ->  Merge Full Join
--         ->  Seq Scan on int4_tbl a
--         ->  Seq Scan on int4_tbl b
--```
-- Greenplum will raise an Assert Fail.
-- We force adding a material node for
-- merge full join on true.
drop table if exists t6215;
create table t6215(f1 int4) distributed replicated;
insert into t6215(f1) values (1), (2), (3);
set enable_material = off;
-- The plan still have Material operator
explain (costs off) select * from t6215 a full join t6215 b on true;
                QUERY PLAN                
------------------------------------------
 Gather Motion 1:1  (slice1; segments: 1)
   ->  Merge Full Join
         ->  Seq Scan on t6215 a
         ->  Materialize
               ->  Seq Scan on t6215 b
 Optimizer: Postgres query optimizer
(6 rows)

select * from t6215 a full join t6215 b on true;
 f1 | f1 
----+----
  1 |  1
  1 |  2
  1 |  3
  2 |  1
  2 |  2
  2 |  3
  3 |  1
  3 |  2
  3 |  3
(9 rows)

drop table t6215;
--
-- This tripped an assertion while deciding the locus for the joins.
-- The code was failing to handle join between SingleQE and Hash correctly,
-- when there were join order restricitions. (see
-- https://github.com/greenplum-db/gpdb/issues/6643
--
select a.f1, b.f1, t.thousand, t.tenthous from
  (select sum(f1) as f1 from int4_tbl i4b) b
   left outer join
     (select sum(f1)+1 as f1 from int4_tbl i4a) a ON a.f1 = b.f1
      left outer join
        tenk1 t ON b.f1 = t.thousand and (a.f1+b.f1+999) = t.tenthous;
 f1 | f1 | thousand | tenthous 
----+----+----------+----------
    |  0 |          |         
(1 row)

-- tests to ensure that join reordering of LOJs and inner joins produces the
-- correct join predicates & residual filters
drop table if exists t1, t2, t3;
CREATE TABLE t1 (a int, b int, c int);
CREATE TABLE t2 (a int, b int, c int);
CREATE TABLE t3 (a int, b int, c int);
INSERT INTO t1 SELECT i, i, i FROM generate_series(1, 1000) i;
INSERT INTO t2 SELECT i, i, i FROM generate_series(2, 1000) i; -- start from 2 so that one row from t1 doesn't match
INSERT INTO t3 VALUES (1, 2, 3), (NULL, 2, 2);
ANALYZE t1;
ANALYZE t2;
ANALYZE t3;
-- ensure plan has a filter over left outer join
explain (costs off) select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a IS NULL OR (t1.c = t3.c));
                                QUERY PLAN                                 
---------------------------------------------------------------------------
 Gather Motion 3:1  (slice2; segments: 3)
   ->  Result
         Filter: ((t2.a IS NULL) OR (t1.c = t3.c))
         ->  Hash Left Join
               Hash Cond: (t1.a = t2.a)
               ->  Hash Join
                     Hash Cond: (t1.b = t3.b)
                     ->  Seq Scan on t1
                     ->  Hash
                           ->  Broadcast Motion 3:3  (slice1; segments: 3)
                                 ->  Seq Scan on t3
               ->  Hash
                     ->  Seq Scan on t2
 Optimizer: Pivotal Optimizer (GPORCA) version 3.23.0
(14 rows)

select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a IS NULL OR (t1.c = t3.c));
 a | b | c | a | b | c | a | b | c 
---+---+---+---+---+---+---+---+---
 2 | 2 | 2 | 2 | 2 | 2 |   | 2 | 2
(1 row)

-- ensure plan has two inner joins with the where clause & join predicates ANDed
explain (costs off) select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a = t3.a);
                      QUERY PLAN                      
------------------------------------------------------
 Gather Motion 3:1  (slice1; segments: 3)
   ->  Hash Join
         Hash Cond: ((t1.a = t2.a) AND (t1.b = t3.b))
         ->  Seq Scan on t1
         ->  Hash
               ->  Hash Join
                     Hash Cond: (t2.a = t3.a)
                     ->  Seq Scan on t2
                     ->  Hash
                           ->  Seq Scan on t3
 Optimizer: Pivotal Optimizer (GPORCA) version 3.75.0
(11 rows)

select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a = t3.a);
 a | b | c | a | b | c | a | b | c 
---+---+---+---+---+---+---+---+---
(0 rows)

-- ensure plan has a filter over left outer join
explain (costs off) select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a is distinct from t3.a);
                                QUERY PLAN                                 
---------------------------------------------------------------------------
 Gather Motion 3:1  (slice2; segments: 3)
   ->  Result
         Filter: (t2.a IS DISTINCT FROM t3.a)
         ->  Hash Left Join
               Hash Cond: (t1.a = t2.a)
               ->  Hash Join
                     Hash Cond: (t1.b = t3.b)
                     ->  Seq Scan on t1
                     ->  Hash
                           ->  Broadcast Motion 3:3  (slice1; segments: 3)
                                 ->  Seq Scan on t3
               ->  Hash
                     ->  Seq Scan on t2
 Optimizer: Pivotal Optimizer (GPORCA) version 3.23.0
(14 rows)

select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a is distinct from t3.a);
 a | b | c | a | b | c | a | b | c 
---+---+---+---+---+---+---+---+---
 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3
 2 | 2 | 2 | 2 | 2 | 2 |   | 2 | 2
(2 rows)

-- ensure plan has a filter over left outer join
explain select * from t3 join (select t1.a t1a, t1.b t1b, t1.c t1c, t2.a t2a, t2.b t2b, t2.c t2c from t1 left join t2 on (t1.a = t2.a)) t on (t1a = t3.a) WHERE (t2a IS NULL OR (t1c = t3.a));
                                    QUERY PLAN                                     
-----------------------------------------------------------------------------------
 Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..1293.22 rows=4 width=36)
   ->  Result  (cost=0.00..1293.22 rows=2 width=36)
         Filter: ((t2.a IS NULL) OR (t1.c = t3.a))
         ->  Hash Left Join  (cost=0.00..1293.22 rows=2 width=36)
               Hash Cond: (t1.a = t2.a)
               ->  Hash Join  (cost=0.00..862.08 rows=1 width=24)
                     Hash Cond: (t1.a = t3.a)
                     ->  Seq Scan on t1  (cost=0.00..431.01 rows=334 width=12)
                     ->  Hash  (cost=431.00..431.00 rows=1 width=12)
                           ->  Seq Scan on t3  (cost=0.00..431.00 rows=1 width=12)
               ->  Hash  (cost=431.01..431.01 rows=333 width=12)
                     ->  Seq Scan on t2  (cost=0.00..431.01 rows=333 width=12)
 Optimizer: Pivotal Optimizer (GPORCA) version 3.23.0
(13 rows)

select * from t3 join (select t1.a t1a, t1.b t1b, t1.c t1c, t2.a t2a, t2.b t2b, t2.c t2c from t1 left join t2 on (t1.a = t2.a)) t on (t1a = t3.a) WHERE (t2a IS NULL OR (t1c = t3.a));
 a | b | c | t1a | t1b | t1c | t2a | t2b | t2c 
---+---+---+-----+-----+-----+-----+-----+-----
 1 | 2 | 3 |   1 |   1 |   1 |     |     |    
(1 row)

-- ensure plan has a filter over left outer join
explain select * from (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt 
  join t3 on tt.t1b = t3.b 
  join (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt1 on tt1.t1b = t3.b 
  join t3 t3_1 on tt1.t1b = t3_1.b and (tt1.t2a is NULL OR tt1.t1b = t3.b);
                                                                  QUERY PLAN                                                                  
----------------------------------------------------------------------------------------------------------------------------------------------
 Gather Motion 3:1  (slice4; segments: 3)  (cost=0.00..2586.40 rows=8 width=56)
   ->  Hash Join  (cost=0.00..2586.39 rows=3 width=56)
         Hash Cond: ((t1.b = t3_1.b) AND (t1_1.b = t3_1.b) AND (t3.b = t3_1.b))
         ->  Result  (cost=0.00..2155.39 rows=2 width=44)
               Filter: ((t2_1.a IS NULL) OR (t1.b = t3.b))
               ->  Hash Left Join  (cost=0.00..2155.39 rows=2 width=44)
                     Hash Cond: (t1.a = t2_1.a)
                     ->  Hash Join  (cost=0.00..1724.29 rows=1 width=36)
                           Hash Cond: ((t1.b = t3.b) AND (t1.b = t1_1.b))
                           ->  Seq Scan on t1  (cost=0.00..431.01 rows=334 width=8)
                           ->  Hash  (cost=1293.18..1293.18 rows=3 width=28)
                                 ->  Broadcast Motion 3:3  (slice2; segments: 3)  (cost=0.00..1293.18 rows=3 width=28)
                                       ->  Hash Left Join  (cost=0.00..1293.18 rows=1 width=28)
                                             Hash Cond: (t1_1.a = t2.a)
                                             ->  Hash Join  (cost=0.00..862.07 rows=1 width=20)
                                                   Hash Cond: (t1_1.b = t3.b)
                                                   ->  Seq Scan on t1 t1_1  (cost=0.00..431.01 rows=334 width=8)
                                                   ->  Hash  (cost=431.00..431.00 rows=2 width=12)
                                                         ->  Broadcast Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.00 rows=2 width=12)
                                                               ->  Seq Scan on t3  (cost=0.00..431.00 rows=1 width=12)
                                             ->  Hash  (cost=431.01..431.01 rows=333 width=8)
                                                   ->  Seq Scan on t2  (cost=0.00..431.01 rows=333 width=8)
                     ->  Hash  (cost=431.01..431.01 rows=333 width=8)
                           ->  Seq Scan on t2 t2_1  (cost=0.00..431.01 rows=333 width=8)
         ->  Hash  (cost=431.00..431.00 rows=2 width=12)
               ->  Broadcast Motion 3:3  (slice3; segments: 3)  (cost=0.00..431.00 rows=2 width=12)
                     ->  Seq Scan on t3 t3_1  (cost=0.00..431.00 rows=1 width=12)
 Optimizer: Pivotal Optimizer (GPORCA) version 3.23.0
(28 rows)

select * from (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt 
  join t3 on tt.t1b = t3.b 
  join (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt1 on tt1.t1b = t3.b 
  join t3 t3_1 on tt1.t1b = t3_1.b and (tt1.t2a is NULL OR tt1.t1b = t3.b);
 t1a | t1b | t2a | t2b | a | b | c | t1a | t1b | t2a | t2b | a | b | c 
-----+-----+-----+-----+---+---+---+-----+-----+-----+-----+---+---+---
   2 |   2 |   2 |   2 |   | 2 | 2 |   2 |   2 |   2 |   2 | 1 | 2 | 3
   2 |   2 |   2 |   2 |   | 2 | 2 |   2 |   2 |   2 |   2 |   | 2 | 2
   2 |   2 |   2 |   2 | 1 | 2 | 3 |   2 |   2 |   2 |   2 | 1 | 2 | 3
   2 |   2 |   2 |   2 | 1 | 2 | 3 |   2 |   2 |   2 |   2 |   | 2 | 2
(4 rows)

-- test different join order enumeration methods
set optimizer_join_order = query;
select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;
 a | b | c | a | b | c | a | b | c 
---+---+---+---+---+---+---+---+---
 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3
 2 | 2 | 2 | 2 | 2 | 2 |   | 2 | 2
(2 rows)

set optimizer_join_order = greedy;
select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;
 a | b | c | a | b | c | a | b | c 
---+---+---+---+---+---+---+---+---
 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3
 2 | 2 | 2 | 2 | 2 | 2 |   | 2 | 2
(2 rows)

set optimizer_join_order = exhaustive;
select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;
 a | b | c | a | b | c | a | b | c 
---+---+---+---+---+---+---+---+---
 2 | 2 | 2 | 2 | 2 | 2 |   | 2 | 2
 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3
(2 rows)

set optimizer_join_order = exhaustive2;
select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;
 a | b | c | a | b | c | a | b | c 
---+---+---+---+---+---+---+---+---
 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3
 2 | 2 | 2 | 2 | 2 | 2 |   | 2 | 2
(2 rows)

reset optimizer_join_order;
select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;
 a | b | c | a | b | c | a | b | c 
---+---+---+---+---+---+---+---+---
 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3
 2 | 2 | 2 | 2 | 2 | 2 |   | 2 | 2
(2 rows)

drop table t1, t2, t3;
--
-- Test a bug that nestloop path previously can not generate motion above
-- index path, which sometimes is wrong (this test case is an example).
-- We now depend on parameterized path related variables to judge instead.
-- We conservatively disallow motion when there is parameter requirement
-- for either outer or inner at this moment though there could be room
-- for further improvement (e.g. referring subplan code to do broadcast
-- for base rel if needed, which needs much effort and does not seem to
-- be deserved given we will probably refactor related code for the lateral
-- support in the near future). For the query and guc settings below, Postgres
-- planner can not generate a plan.
set enable_nestloop = 1;
set enable_material = 0;
set enable_seqscan = 0;
set enable_bitmapscan = 0;
explain select tenk1.unique2 >= 0 from tenk1 left join tenk2 on true limit 1;
                                                       QUERY PLAN                                                        
-------------------------------------------------------------------------------------------------------------------------
 Limit  (cost=0.00..1885517.36 rows=1 width=1)
   ->  Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..1885517.36 rows=1 width=1)
         ->  Limit  (cost=0.00..1885517.36 rows=1 width=1)
               ->  Result  (cost=0.00..1885517.36 rows=33336667 width=1)
                     ->  Nested Loop Left Join  (cost=0.00..1885484.02 rows=33336667 width=4)
                           Join Filter: true
                           ->  Seq Scan on tenk1  (cost=0.00..431.51 rows=3334 width=4)
                           ->  Materialize  (cost=0.00..431.70 rows=10000 width=1)
                                 ->  Broadcast Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.69 rows=10000 width=1)
                                       ->  Seq Scan on tenk2  (cost=0.00..431.50 rows=3334 width=1)
 Optimizer: Pivotal Optimizer (GPORCA) version 3.23.0
(11 rows)

select tenk1.unique2 >= 0 from tenk1 left join tenk2 on true limit 1;
 ?column? 
----------
 t
(1 row)

reset enable_nestloop;
reset enable_material;
reset enable_seqscan;
reset enable_bitmapscan;
-- test that flow->hashExpr variables can be resolved
CREATE TABLE hexpr_t1 (c1 int, c2 character varying(16)) DISTRIBUTED BY (c1);
CREATE TABLE hexpr_t2 (c3 character varying(16)) DISTRIBUTED BY (c3);
INSERT INTO hexpr_t1 SELECT i, i::character varying FROM generate_series(1,10)i;
INSERT INTO hexpr_t2 SELECT i::character varying FROM generate_series(1,10)i;
EXPLAIN SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
                                              QUERY PLAN                                              
------------------------------------------------------------------------------------------------------
 Result  (cost=0.00..862.00 rows=5 width=8)
   ->  Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..862.00 rows=14 width=2)
         ->  Hash Left Join  (cost=0.00..862.00 rows=5 width=2)
               Hash Cond: (btrim((hexpr_t1.c2)::text) = (hexpr_t2.c3)::text)
               ->  Redistribute Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.00 rows=4 width=2)
                     Hash Key: btrim((hexpr_t1.c2)::text)
                     ->  Seq Scan on hexpr_t1  (cost=0.00..431.00 rows=4 width=2)
               ->  Hash  (cost=431.00..431.00 rows=4 width=2)
                     ->  Seq Scan on hexpr_t2  (cost=0.00..431.00 rows=4 width=2)
 Optimizer: Pivotal Optimizer (GPORCA) version 3.80.0
(10 rows)

EXPLAIN SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
                                              QUERY PLAN                                              
------------------------------------------------------------------------------------------------------
 Result  (cost=0.00..862.00 rows=5 width=8)
   ->  Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..862.00 rows=14 width=2)
         ->  Hash Left Join  (cost=0.00..862.00 rows=5 width=2)
               Hash Cond: (btrim((hexpr_t1.c2)::text) = (hexpr_t2.c3)::text)
               ->  Redistribute Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.00 rows=4 width=2)
                     Hash Key: btrim((hexpr_t1.c2)::text)
                     ->  Seq Scan on hexpr_t1  (cost=0.00..431.00 rows=4 width=2)
               ->  Hash  (cost=431.00..431.00 rows=4 width=2)
                     ->  Seq Scan on hexpr_t2  (cost=0.00..431.00 rows=4 width=2)
 Optimizer: Pivotal Optimizer (GPORCA) version 3.80.0
(10 rows)

EXPLAIN SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
                                              QUERY PLAN                                              
------------------------------------------------------------------------------------------------------
 Result  (cost=0.00..862.00 rows=5 width=8)
   ->  Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..862.00 rows=14 width=2)
         ->  Hash Left Join  (cost=0.00..862.00 rows=5 width=2)
               Hash Cond: (btrim((hexpr_t1.c2)::text) = (hexpr_t2.c3)::text)
               ->  Redistribute Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.00 rows=4 width=2)
                     Hash Key: btrim((hexpr_t1.c2)::text)
                     ->  Seq Scan on hexpr_t1  (cost=0.00..431.00 rows=4 width=2)
               ->  Hash  (cost=431.00..431.00 rows=4 width=2)
                     ->  Seq Scan on hexpr_t2  (cost=0.00..431.00 rows=4 width=2)
 Optimizer: Pivotal Optimizer (GPORCA) version 3.80.0
(10 rows)

SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
 foo 
-----
 2
 3
 4
 7
 5
 6
 8
 9
 10
 1
(10 rows)

SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
 foo 
-----
 8
 5
 6
 1
 9
 10
 2
 3
 4
 7
(10 rows)

SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
 foo 
-----
 9
 10
 1
 2
 3
 4
 7
 5
 6
 8
(10 rows)