join_gp_optimizer.out

-- test numeric hash join
set enable_hashjoin to on;
set enable_mergejoin to off;
set enable_nestloop to off;
create table nhtest (i numeric(10, 2)) distributed by (i);
insert into nhtest values(100000.22);
insert into nhtest values(300000.19);
explain select * from nhtest a join nhtest b using (i);
                                       QUERY PLAN                                       
----------------------------------------------------------------------------------------
 Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..862.00 rows=1 width=11)
   ->  Hash Join  (cost=0.00..862.00 rows=1 width=11)
         Hash Cond: public.nhtest.i = public.nhtest.i
         ->  Table Scan on nhtest  (cost=0.00..431.00 rows=1 width=11)
         ->  Hash  (cost=431.00..431.00 rows=1 width=11)
               ->  Table Scan on nhtest  (cost=0.00..431.00 rows=1 width=11)
 Settings:  enable_hashjoin=on; enable_mergejoin=off; enable_nestloop=off; optimizer=on
 Optimizer status: PQO version 2.39.2
(8 rows)

select * from nhtest a join nhtest b using (i);
     i     
-----------
 100000.22
 300000.19
(2 rows)

create temp table l(a int);
NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into l values (1), (1), (2);
select * from l l1 join l l2 on l1.a = l2.a left join l l3 on l1.a = l3.a and l1.a = 2 order by 1,2,3;
 a | a | a 
---+---+---
 1 | 1 |  
 1 | 1 |  
 1 | 1 |  
 1 | 1 |  
 2 | 2 | 2
(5 rows)

--
-- test hash join
--
create table hjtest (i int, j int) distributed by (i,j);
insert into hjtest values(3, 4);
select count(*) from hjtest a1, hjtest a2 where a2.i = least (a1.i,4) and a2.j = 4;
 count 
-------
     1
(1 row)

--
-- Test for correct behavior when there is a Merge Join on top of Materialize
-- on top of a Motion :
-- 1. Use FULL OUTER JOIN to induce a Merge Join
-- 2. Use a large tuple size to induce a Materialize
-- 3. Use gp_dist_random() to induce a Redistribute
--
set enable_hashjoin to off;
set enable_mergejoin to on;
set enable_nestloop to off;
DROP TABLE IF EXISTS alpha;
NOTICE:  table "alpha" does not exist, skipping
DROP TABLE IF EXISTS theta;
NOTICE:  table "theta" does not exist, skipping
CREATE TABLE alpha (i int, j int) distributed by (i);
CREATE TABLE theta (i int, j char(10000000)) distributed by (i);
INSERT INTO alpha values (1, 1), (2, 2);
INSERT INTO theta values (1, 'f'), (2, 'g');
SELECT *
FROM gp_dist_random('alpha') FULL OUTER JOIN gp_dist_random('theta')
  ON (alpha.i = theta.i)
WHERE (alpha.j IS NULL or theta.j IS NULL);
 i | j | i | j 
---+---+---+---
(0 rows)

reset enable_hashjoin;
reset enable_mergejoin;
reset enable_nestloop;
--
-- Predicate propagation over equality conditions
--
drop schema if exists pred;
NOTICE:  schema "pred" does not exist, skipping
create schema pred;
set search_path=pred;
create table t1 (x int, y int, z int) distributed by (y);
create table t2 (x int, y int, z int) distributed by (x);
insert into t1 select i, i, i from generate_series(1,100) i;
insert into t2 select * from t1;
analyze t1;
analyze t2;
--
-- infer over equalities
--
explain select count(*) from t1,t2 where t1.x = 100 and t1.x = t2.x;
                                                 QUERY PLAN                                                 
------------------------------------------------------------------------------------------------------------
 Aggregate  (cost=0.00..862.00 rows=1 width=8)
   ->  Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..862.00 rows=1 width=8)
         ->  Aggregate  (cost=0.00..862.00 rows=1 width=8)
               ->  Hash Join  (cost=0.00..862.00 rows=1 width=1)
                     Hash Cond: t1.x = t2.x
                     ->  Redistribute Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.00 rows=1 width=4)
                           Hash Key: t1.x
                           ->  Table Scan on t1  (cost=0.00..431.00 rows=1 width=4)
                                 Filter: x = 100
                     ->  Hash  (cost=431.00..431.00 rows=1 width=4)
                           ->  Table Scan on t2  (cost=0.00..431.00 rows=1 width=4)
                                 Filter: x = 100
 Settings:  optimizer=on
 Optimizer status: PQO version 2.39.2
(14 rows)

select count(*) from t1,t2 where t1.x = 100 and t1.x = t2.x;
 count 
-------
     1
(1 row)

--
-- infer over >=
--
explain select * from t1,t2 where t1.x = 100 and t2.x >= t1.x;
                                          QUERY PLAN                                          
----------------------------------------------------------------------------------------------
 Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..1324039.56 rows=34 width=24)
   ->  Nested Loop  (cost=0.00..1324039.56 rows=12 width=24)
         Join Filter: t2.x >= t1.x
         ->  Broadcast Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.00 rows=1 width=12)
               ->  Table Scan on t1  (cost=0.00..431.00 rows=1 width=12)
                     Filter: x = 100
         ->  Table Scan on t2  (cost=0.00..431.00 rows=34 width=12)
 Settings:  optimizer=on
 Optimizer status: PQO version 2.39.2
(9 rows)

select * from t1,t2 where t1.x = 100 and t2.x >= t1.x;
  x  |  y  |  z  |  x  |  y  |  z  
-----+-----+-----+-----+-----+-----
 100 | 100 | 100 | 100 | 100 | 100
(1 row)

--
-- multiple inferences
--
set optimizer_segments=2;
explain select * from t1,t2 where t1.x = 100 and t1.x = t2.y and t1.x <= t2.x;
                                              QUERY PLAN                                               
-------------------------------------------------------------------------------------------------------
 Gather Motion 3:1  (slice3; segments: 3)  (cost=0.00..862.01 rows=1 width=24)
   ->  Hash Join  (cost=0.00..862.01 rows=1 width=24)
         Hash Cond: t1.x = t2.y
         Join Filter: t1.x <= t2.x
         ->  Redistribute Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.00 rows=1 width=12)
               Hash Key: t1.x
               ->  Table Scan on t1  (cost=0.00..431.00 rows=1 width=12)
                     Filter: x = 100
         ->  Hash  (cost=431.00..431.00 rows=1 width=12)
               ->  Redistribute Motion 3:3  (slice2; segments: 3)  (cost=0.00..431.00 rows=1 width=12)
                     Hash Key: t2.y
                     ->  Table Scan on t2  (cost=0.00..431.00 rows=1 width=12)
                           Filter: y = 100
 Settings:  optimizer=on; optimizer_segments=2
 Optimizer status: PQO version 2.39.2
(15 rows)

reset optimizer_segments;
select * from t1,t2 where t1.x = 100 and t1.x = t2.y and t1.x <= t2.x;
  x  |  y  |  z  |  x  |  y  |  z  
-----+-----+-----+-----+-----+-----
 100 | 100 | 100 | 100 | 100 | 100
(1 row)

--
-- MPP-18537: hash clause references a constant in outer child target list
--
create table hjn_test (i int, j int) distributed by (i,j);
insert into hjn_test values(3, 4);
create table int4_tbl (f1 int) distributed by (f1);
insert into int4_tbl values(123456), (-2147483647), (0), (-123456), (2147483647);
select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,4) and hjn_test.j = 4;
 count 
-------
     1
(1 row)

select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,(array[4])[1]) and hjn_test.j = (array[4])[1];
 count 
-------
     1
(1 row)

select count(*) from hjn_test, (select 3 as bar) foo where least (foo.bar,(array[4])[1]) = hjn_test.i and hjn_test.j = (array[4])[1];
 count 
-------
     1
(1 row)

select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar, least(4,10)) and hjn_test.j = least(4,10);
 count 
-------
     1
(1 row)

select * from int4_tbl a join int4_tbl b on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1));
     f1      |     f1      
-------------+-------------
  2147483647 |  2147483647
      123456 |      123456
 -2147483647 | -2147483647
           0 |           0
     -123456 |     -123456
(5 rows)

-- Same as the last query, but with a partitioned table (which requires a
-- Result node to do projection of the hash expression, as Append is not
-- projection-capable)
create table part4_tbl (f1 int4) partition by range (f1) (start(-1000000) end (1000000) every (1000000));
NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Greenplum Database data distribution key for this table.
HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
NOTICE:  CREATE TABLE will create partition "part4_tbl_1_prt_1" for table "part4_tbl"
NOTICE:  CREATE TABLE will create partition "part4_tbl_1_prt_2" for table "part4_tbl"
insert into part4_tbl values
       (-123457), (-123456), (-123455),
       (-1), (0), (1),
       (123455), (123456), (123457);
select * from part4_tbl a join part4_tbl b on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1));
   f1    |   f1    
---------+---------
       0 |       0
  123456 |  123456
 -123456 | -123456
(3 rows)

--
-- Test case where a Motion hash key is only needed for the redistribution,
-- and not returned in the final result set. There was a bug at one point where
-- tjoin.c1 was used as the hash key in a Motion node, but it was not added
-- to the sub-plans target list, causing a "variable not found in subplan
-- target list" error.
--
create table tjoin1(dk integer, id integer) distributed by (dk);
create table tjoin2(dk integer, id integer, t text) distributed by (dk);
create table tjoin3(dk integer, id integer, t text) distributed by (dk);
insert into tjoin1 values (1, 1), (1, 2), (1, 3), (2, 1), (2, 2), (2, 3);
insert into tjoin2 values (1, 1, '1-1'), (1, 2, '1-2'), (2, 1, '2-1'), (2, 2, '2-2');
insert into tjoin3 values (1, 1, '1-1'), (2, 1, '2-1');
select tjoin1.id, tjoin2.t, tjoin3.t
from tjoin1
left outer join (tjoin2 left outer join tjoin3 on tjoin2.id=tjoin3.id) on tjoin1.id=tjoin3.id;
 id |  t  |  t  
----+-----+-----
  3 |     | 
  3 |     | 
  1 | 2-1 | 1-1
  1 | 2-1 | 2-1
  1 | 1-1 | 1-1
  1 | 1-1 | 2-1
  2 |     | 
  1 | 2-1 | 1-1
  1 | 2-1 | 2-1
  1 | 1-1 | 1-1
  1 | 1-1 | 2-1
  2 |     | 
(12 rows)

set enable_hashjoin to off;
set optimizer_enable_hashjoin = off;
select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,4) and hjn_test.j = 4;
 count 
-------
     1
(1 row)

select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,(array[4])[1]) and hjn_test.j = (array[4])[1];
 count 
-------
     1
(1 row)

select count(*) from hjn_test, (select 3 as bar) foo where least (foo.bar,(array[4])[1]) = hjn_test.i and hjn_test.j = (array[4])[1];
 count 
-------
     1
(1 row)

select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar, least(4,10)) and hjn_test.j = least(4,10);
 count 
-------
     1
(1 row)

select * from int4_tbl a join int4_tbl b on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1));
     f1      |     f1      
-------------+-------------
  2147483647 |  2147483647
      123456 |      123456
 -2147483647 | -2147483647
           0 |           0
     -123456 |     -123456
(5 rows)

reset enable_hashjoin;
reset optimizer_enable_hashjoin;
-- In case of Left Anti Semi Join, if the left rel is empty a dummy join
-- should be created
drop table if exists foo;
NOTICE:  table "foo" does not exist, skipping
drop table if exists bar;
NOTICE:  table "bar" does not exist, skipping
create table foo (a int, b int) distributed randomly;
create table bar (c int, d int) distributed randomly;
insert into foo select generate_series(1,10);
insert into bar select generate_series(1,10);
explain select a from foo where a<1 and a>1 and not exists (select c from bar where c=a);
                   QUERY PLAN                   
------------------------------------------------
 Result  (cost=0.00..0.00 rows=0 width=4)
   ->  Result  (cost=0.00..0.00 rows=0 width=4)
         One-Time Filter: false
 Settings:  optimizer=on
 Optimizer status: PQO version 2.39.2
(5 rows)

select a from foo where a<1 and a>1 and not exists (select c from bar where c=a);
 a 
---
(0 rows)

-- The merge join executor code doesn't support LASJ_NOTIN joins. Make sure
-- the planner doesn't create an invalid plan with them.
create index index_foo on foo (a);
create index index_bar on bar (c);
set enable_nestloop to off;
set enable_hashjoin to off;
set enable_mergejoin to on;
select * from foo where a not in (select c from bar where c <= 5);
 a  | b 
----+---
  7 |  
  6 |  
  8 |  
  9 |  
 10 |  
(5 rows)

-- Cleanup
set client_min_messages='warning'; -- silence drop-cascade NOTICEs
drop schema pred cascade;