join_gp_optimizer.out 34.1 KB
Newer Older
1 2 3 4 5 6 7 8
-- Extra GPDB tests for joins.
-- Ignore "workfile compresssion is not supported by this build" (see
-- 'zlib' test):
--
-- start_matchignore
-- m/ERROR:  workfile compresssion is not supported by this build/
-- end_matchignore
--
9
-- test numeric hash join
10
--
11 12 13 14 15 16 17
set enable_hashjoin to on;
set enable_mergejoin to off;
set enable_nestloop to off;
create table nhtest (i numeric(10, 2)) distributed by (i);
insert into nhtest values(100000.22);
insert into nhtest values(300000.19);
explain select * from nhtest a join nhtest b using (i);
18 19
                                    QUERY PLAN                                     
-----------------------------------------------------------------------------------
20 21 22
 Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..862.00 rows=1 width=9)
   ->  Hash Join  (cost=0.00..862.00 rows=1 width=9)
         Hash Cond: nhtest.i = nhtest_1.i
23
         ->  Seq Scan on nhtest  (cost=0.00..431.00 rows=1 width=9)
24
         ->  Hash  (cost=431.00..431.00 rows=1 width=9)
25
               ->  Seq Scan on nhtest nhtest_1  (cost=0.00..431.00 rows=1 width=9)
26
 Optimizer: Pivotal Optimizer (GPORCA) version 2.74.0
27
(7 rows)
28 29 30 31 32

select * from nhtest a join nhtest b using (i);
     i     
-----------
 100000.22
33
 300000.19
34 35 36
(2 rows)

create temp table l(a int);
37 38
NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
insert into l values (1), (1), (2);
select * from l l1 join l l2 on l1.a = l2.a left join l l3 on l1.a = l3.a and l1.a = 2 order by 1,2,3;
 a | a | a 
---+---+---
 1 | 1 |  
 1 | 1 |  
 1 | 1 |  
 1 | 1 |  
 2 | 2 | 2
(5 rows)

--
-- test hash join
--
create table hjtest (i int, j int) distributed by (i,j);
insert into hjtest values(3, 4);
select count(*) from hjtest a1, hjtest a2 where a2.i = least (a1.i,4) and a2.j = 4;
 count 
-------
     1
(1 row)

61 62 63 64 65 66
--
-- Test for correct behavior when there is a Merge Join on top of Materialize
-- on top of a Motion :
-- 1. Use FULL OUTER JOIN to induce a Merge Join
-- 2. Use a large tuple size to induce a Materialize
-- 3. Use gp_dist_random() to induce a Redistribute
67 68 69 70
--
set enable_hashjoin to off;
set enable_mergejoin to on;
set enable_nestloop to off;
71
DROP TABLE IF EXISTS alpha;
72
NOTICE:  table "alpha" does not exist, skipping
73
DROP TABLE IF EXISTS theta;
74 75 76
NOTICE:  table "theta" does not exist, skipping
CREATE TABLE alpha (i int, j int) distributed by (i);
CREATE TABLE theta (i int, j char(10000000)) distributed by (i);
77 78 79
INSERT INTO alpha values (1, 1), (2, 2);
INSERT INTO theta values (1, 'f'), (2, 'g');
SELECT *
80 81
FROM gp_dist_random('alpha') FULL OUTER JOIN gp_dist_random('theta')
  ON (alpha.i = theta.i)
82
WHERE (alpha.j IS NULL or theta.j IS NULL);
83 84
 i | j | i | j 
---+---+---+---
85 86 87 88 89
(0 rows)

reset enable_hashjoin;
reset enable_mergejoin;
reset enable_nestloop;
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
--
-- Predicate propagation over equality conditions
--
drop schema if exists pred;
NOTICE:  schema "pred" does not exist, skipping
create schema pred;
set search_path=pred;
create table t1 (x int, y int, z int) distributed by (y);
create table t2 (x int, y int, z int) distributed by (x);
insert into t1 select i, i, i from generate_series(1,100) i;
insert into t2 select * from t1;
analyze t1;
analyze t2;
--
-- infer over equalities
--
explain select count(*) from t1,t2 where t1.x = 100 and t1.x = t2.x;
107 108 109 110 111 112
                                                 QUERY PLAN                                                 
------------------------------------------------------------------------------------------------------------
 Aggregate  (cost=0.00..862.00 rows=1 width=8)
   ->  Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..862.00 rows=1 width=8)
         ->  Aggregate  (cost=0.00..862.00 rows=1 width=8)
               ->  Hash Join  (cost=0.00..862.00 rows=1 width=1)
113
                     Hash Cond: t1.x = t2.x
114
                     ->  Redistribute Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.00 rows=1 width=4)
115
                           Hash Key: t1.x
116
                           ->  Seq Scan on t1  (cost=0.00..431.00 rows=1 width=4)
117 118
                                 Filter: x = 100
                     ->  Hash  (cost=431.00..431.00 rows=1 width=4)
119
                           ->  Seq Scan on t2  (cost=0.00..431.00 rows=1 width=4)
120
                                 Filter: x = 100
121
 Settings:  optimizer=on
122
 Optimizer status: Pivotal Optimizer (GPORCA) version 2.39.2
123
(14 rows)
124 125 126 127 128 129 130 131 132 133 134

select count(*) from t1,t2 where t1.x = 100 and t1.x = t2.x;
 count 
-------
     1
(1 row)

--
-- infer over >=
--
explain select * from t1,t2 where t1.x = 100 and t2.x >= t1.x;
135 136 137 138
                                          QUERY PLAN                                          
----------------------------------------------------------------------------------------------
 Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..1324039.56 rows=34 width=24)
   ->  Nested Loop  (cost=0.00..1324039.56 rows=12 width=24)
139
         Join Filter: t2.x >= t1.x
140
         ->  Broadcast Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.00 rows=1 width=12)
141
               ->  Seq Scan on t1  (cost=0.00..431.00 rows=1 width=12)
142
                     Filter: x = 100
143
         ->  Seq Scan on t2  (cost=0.00..431.00 rows=34 width=12)
144
 Settings:  optimizer=on
145
 Optimizer status: Pivotal Optimizer (GPORCA) version 2.39.2
146
(9 rows)
147 148 149 150 151 152 153 154 155 156 157 158

select * from t1,t2 where t1.x = 100 and t2.x >= t1.x;
  x  |  y  |  z  |  x  |  y  |  z  
-----+-----+-----+-----+-----+-----
 100 | 100 | 100 | 100 | 100 | 100
(1 row)

--
-- multiple inferences
--
set optimizer_segments=2;
explain select * from t1,t2 where t1.x = 100 and t1.x = t2.y and t1.x <= t2.x;
159 160 161
                                              QUERY PLAN                                               
-------------------------------------------------------------------------------------------------------
 Gather Motion 3:1  (slice3; segments: 3)  (cost=0.00..862.01 rows=1 width=24)
162 163 164
   ->  Hash Join  (cost=0.00..862.01 rows=1 width=24)
         Hash Cond: t1.x = t2.y
         Join Filter: t1.x <= t2.x
165
         ->  Redistribute Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.00 rows=1 width=12)
166
               Hash Key: t1.x
167
               ->  Seq Scan on t1  (cost=0.00..431.00 rows=1 width=12)
168 169
                     Filter: x = 100
         ->  Hash  (cost=431.00..431.00 rows=1 width=12)
170
               ->  Redistribute Motion 3:3  (slice2; segments: 3)  (cost=0.00..431.00 rows=1 width=12)
171
                     Hash Key: t2.y
172
                     ->  Seq Scan on t2  (cost=0.00..431.00 rows=1 width=12)
173
                           Filter: y = 100
174
 Settings:  optimizer=on; optimizer_segments=2
175
 Optimizer status: Pivotal Optimizer (GPORCA) version 2.39.2
176
(15 rows)
177 178 179 180 181 182 183 184 185 186 187 188 189

reset optimizer_segments;
select * from t1,t2 where t1.x = 100 and t1.x = t2.y and t1.x <= t2.x;
  x  |  y  |  z  |  x  |  y  |  z  
-----+-----+-----+-----+-----+-----
 100 | 100 | 100 | 100 | 100 | 100
(1 row)

--
-- MPP-18537: hash clause references a constant in outer child target list
--
create table hjn_test (i int, j int) distributed by (i,j);
insert into hjn_test values(3, 4);
190
create table int4_tbl (f1 int) distributed by (f1);
191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
insert into int4_tbl values(123456), (-2147483647), (0), (-123456), (2147483647);
select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,4) and hjn_test.j = 4;
 count 
-------
     1
(1 row)

select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,(array[4])[1]) and hjn_test.j = (array[4])[1];
 count 
-------
     1
(1 row)

select count(*) from hjn_test, (select 3 as bar) foo where least (foo.bar,(array[4])[1]) = hjn_test.i and hjn_test.j = (array[4])[1];
 count 
-------
     1
(1 row)

select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar, least(4,10)) and hjn_test.j = least(4,10);
 count 
-------
     1
(1 row)

select * from int4_tbl a join int4_tbl b on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1));
     f1      |     f1      
-------------+-------------
219
  2147483647 |  2147483647
220 221 222 223 224 225
      123456 |      123456
 -2147483647 | -2147483647
           0 |           0
     -123456 |     -123456
(5 rows)

226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
-- Same as the last query, but with a partitioned table (which requires a
-- Result node to do projection of the hash expression, as Append is not
-- projection-capable)
create table part4_tbl (f1 int4) partition by range (f1) (start(-1000000) end (1000000) every (1000000));
NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Greenplum Database data distribution key for this table.
HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
NOTICE:  CREATE TABLE will create partition "part4_tbl_1_prt_1" for table "part4_tbl"
NOTICE:  CREATE TABLE will create partition "part4_tbl_1_prt_2" for table "part4_tbl"
insert into part4_tbl values
       (-123457), (-123456), (-123455),
       (-1), (0), (1),
       (123455), (123456), (123457);
select * from part4_tbl a join part4_tbl b on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1));
   f1    |   f1    
---------+---------
241
       0 |       0
242 243 244 245
  123456 |  123456
 -123456 | -123456
(3 rows)

246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264
--
-- Test case where a Motion hash key is only needed for the redistribution,
-- and not returned in the final result set. There was a bug at one point where
-- tjoin.c1 was used as the hash key in a Motion node, but it was not added
-- to the sub-plans target list, causing a "variable not found in subplan
-- target list" error.
--
create table tjoin1(dk integer, id integer) distributed by (dk);
create table tjoin2(dk integer, id integer, t text) distributed by (dk);
create table tjoin3(dk integer, id integer, t text) distributed by (dk);
insert into tjoin1 values (1, 1), (1, 2), (1, 3), (2, 1), (2, 2), (2, 3);
insert into tjoin2 values (1, 1, '1-1'), (1, 2, '1-2'), (2, 1, '2-1'), (2, 2, '2-2');
insert into tjoin3 values (1, 1, '1-1'), (2, 1, '2-1');
select tjoin1.id, tjoin2.t, tjoin3.t
from tjoin1
left outer join (tjoin2 left outer join tjoin3 on tjoin2.id=tjoin3.id) on tjoin1.id=tjoin3.id;
 id |  t  |  t  
----+-----+-----
  3 |     | 
265
  3 |     | 
266
  1 | 2-1 | 1-1
267
  1 | 2-1 | 2-1
268
  1 | 1-1 | 1-1
269
  1 | 1-1 | 2-1
270
  2 |     | 
271 272 273 274
  1 | 2-1 | 1-1
  1 | 2-1 | 2-1
  1 | 1-1 | 1-1
  1 | 1-1 | 2-1
275 276 277
  2 |     | 
(12 rows)

278
set enable_hashjoin to off;
279
set optimizer_enable_hashjoin = off;
280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,4) and hjn_test.j = 4;
 count 
-------
     1
(1 row)

select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,(array[4])[1]) and hjn_test.j = (array[4])[1];
 count 
-------
     1
(1 row)

select count(*) from hjn_test, (select 3 as bar) foo where least (foo.bar,(array[4])[1]) = hjn_test.i and hjn_test.j = (array[4])[1];
 count 
-------
     1
(1 row)

select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar, least(4,10)) and hjn_test.j = least(4,10);
 count 
-------
     1
(1 row)

select * from int4_tbl a join int4_tbl b on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1));
     f1      |     f1      
-------------+-------------
307
  2147483647 |  2147483647
308 309 310 311 312 313 314
      123456 |      123456
 -2147483647 | -2147483647
           0 |           0
     -123456 |     -123456
(5 rows)

reset enable_hashjoin;
315
reset optimizer_enable_hashjoin;
316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332
-- In case of Left Anti Semi Join, if the left rel is empty a dummy join
-- should be created
drop table if exists foo;
NOTICE:  table "foo" does not exist, skipping
drop table if exists bar;
NOTICE:  table "bar" does not exist, skipping
create table foo (a int, b int) distributed randomly;
create table bar (c int, d int) distributed randomly;
insert into foo select generate_series(1,10);
insert into bar select generate_series(1,10);
explain select a from foo where a<1 and a>1 and not exists (select c from bar where c=a);
                   QUERY PLAN                   
------------------------------------------------
 Result  (cost=0.00..0.00 rows=0 width=4)
   ->  Result  (cost=0.00..0.00 rows=0 width=4)
         One-Time Filter: false
 Settings:  optimizer=on
333
 Optimizer status: Pivotal Optimizer (GPORCA) version 2.39.2
334 335 336 337 338 339 340
(5 rows)

select a from foo where a<1 and a>1 and not exists (select c from bar where c=a);
 a 
---
(0 rows)

341 342 343 344 345 346 347 348 349 350 351
-- The merge join executor code doesn't support LASJ_NOTIN joins. Make sure
-- the planner doesn't create an invalid plan with them.
create index index_foo on foo (a);
create index index_bar on bar (c);
set enable_nestloop to off;
set enable_hashjoin to off;
set enable_mergejoin to on;
select * from foo where a not in (select c from bar where c <= 5);
 a  | b 
----+---
  7 |  
352
  6 |  
353 354 355 356 357
  8 |  
  9 |  
 10 |  
(5 rows)

358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
set enable_nestloop to off;
set enable_hashjoin to on;
set enable_mergejoin to off;
create table dept
(
	id int,
	pid int,
	name char(40)
);
NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Greenplum Database data distribution key for this table.
HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into dept values(3, 0, 'root');
insert into dept values(4, 3, '2<-1');
insert into dept values(5, 4, '3<-2<-1');
insert into dept values(6, 4, '4<-2<-1');
insert into dept values(7, 3, '5<-1');
insert into dept values(8, 7, '5<-1');
insert into dept select i, i % 6 + 3 from generate_series(9,50) as i;
insert into dept select i, 99 from generate_series(100,15000) as i;
ANALYZE dept;
378
-- Test rescannable hashjoin with spilling hashtable
379
set statement_mem='1000kB';
380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
set gp_workfile_compression = off;
WITH RECURSIVE subdept(id, parent_department, name) AS
(
	-- non recursive term
	SELECT * FROM dept WHERE name = 'root'
	UNION ALL
	-- recursive term
	SELECT d.* FROM dept AS d, subdept AS sd
		WHERE d.pid = sd.id
)
SELECT count(*) FROM subdept;
 count 
-------
    48
(1 row)

-- Test rescannable hashjoin with spilling hashtable, with compression
set gp_workfile_compression = on;
398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429
WITH RECURSIVE subdept(id, parent_department, name) AS
(
	-- non recursive term
	SELECT * FROM dept WHERE name = 'root'
	UNION ALL
	-- recursive term
	SELECT d.* FROM dept AS d, subdept AS sd
		WHERE d.pid = sd.id
)
SELECT count(*) FROM subdept;
 count 
-------
    48
(1 row)

-- Test rescannable hashjoin with in-memory hashtable
reset statement_mem;
WITH RECURSIVE subdept(id, parent_department, name) AS
(
	-- non recursive term
	SELECT * FROM dept WHERE name = 'root'
	UNION ALL
	-- recursive term
	SELECT d.* FROM dept AS d, subdept AS sd
		WHERE d.pid = sd.id
)
SELECT count(*) FROM subdept;
 count 
-------
    48
(1 row)

430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502
-- MPP-29458
-- When we join on a clause with two different types. If one table distribute by one type, the query plan
-- will redistribute data on another type. But the has values of two types would not be equal. The data will
-- redistribute to wrong segments.
create table test_timestamp_t1 (id  numeric(10,0) ,field_dt date) distributed by (id);
create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone) distributed by (id,field_tms);
insert into test_timestamp_t1 values(10 ,'2018-1-10');
insert into test_timestamp_t1 values(11 ,'2018-1-11');
insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp);
insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp);
-- Test nest loop redistribute keys
set enable_nestloop to on;
set enable_hashjoin to on;
set enable_mergejoin to on;
select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms;
 count 
-------
     2
(1 row)

-- Test hash join redistribute keys
set enable_nestloop to off;
set enable_hashjoin to on;
set enable_mergejoin to on;
select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms;
 count 
-------
     2
(1 row)

drop table test_timestamp_t1;
drop table test_timestamp_t2;
-- Test merge join redistribute keys
create table test_timestamp_t1 (id  numeric(10,0) ,field_dt date) distributed randomly;
create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone) distributed by (field_tms);
insert into test_timestamp_t1 values(10 ,'2018-1-10');
insert into test_timestamp_t1 values(11 ,'2018-1-11');
insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp);
insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp);
select * from test_timestamp_t1 t1 full outer join test_timestamp_t2 t2 on T1.id = T2.id and T1.field_dt = t2.field_tms;
 id |  field_dt  | id |        field_tms         
----+------------+----+--------------------------
 10 | 01-10-2018 | 10 | Wed Jan 10 00:00:00 2018
 11 | 01-11-2018 | 11 | Thu Jan 11 00:00:00 2018
(2 rows)

-- test float type
set enable_nestloop to off;
set enable_hashjoin to on;
set enable_mergejoin to on;
create table test_float1(id int, data float4)  DISTRIBUTED BY (data);
create table test_float2(id int, data float8)  DISTRIBUTED BY (data);
insert into test_float1 values(1, 10), (2, 20);
insert into test_float2 values(3, 10), (4, 20);
select t1.id, t1.data, t2.id, t2.data from test_float1 t1, test_float2 t2 where t1.data = t2.data;
 id | data | id | data 
----+------+----+------
  2 |   20 |  4 |   20
  1 |   10 |  3 |   10
(2 rows)

-- test int type
create table test_int1(id int, data int4)  DISTRIBUTED BY (data);
create table test_int2(id int, data int8)  DISTRIBUTED BY (data);
insert into test_int1 values(1, 10), (2, 20);
insert into test_int2 values(3, 10), (4, 20);
select t1.id, t1.data, t2.id, t2.data from test_int1 t1, test_int2 t2 where t1.data = t2.data;
 id | data | id | data 
----+------+----+------
  1 |   10 |  3 |   10
  2 |   20 |  4 |   20
(2 rows)

503
-- Test to ensure that for full outer join on varchar columns, planner is successful in finding a sort operator in the catalog
504
create table input_table(a varchar(30), b varchar(30)) distributed by (a);
505 506
set enable_hashjoin = off;
explain (costs off) select X.a from input_table X full join (select a from input_table) Y ON X.a = Y.a;
507 508 509 510 511 512 513 514 515 516 517 518 519
                        QUERY PLAN                         
-----------------------------------------------------------
 Gather Motion 3:1  (slice1; segments: 3)
   ->  Merge Full Join
         Merge Cond: ((x.a)::text = (input_table.a)::text)
         ->  Sort
               Sort Key: x.a
               ->  Seq Scan on input_table x
         ->  Sort
               Sort Key: input_table.a
               ->  Seq Scan on input_table
 Optimizer: Postgres query optimizer
(10 rows)
520

521
-- Cleanup
522
reset enable_hashjoin;
523
set client_min_messages='warning'; -- silence drop-cascade NOTICEs
524
drop schema pred cascade;
525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543
reset search_path;
-- github issue 5370 cases
drop table if exists t5370;
NOTICE:  table "t5370" does not exist, skipping
drop table if exists t5370_2;
NOTICE:  table "t5370_2" does not exist, skipping
create table t5370(id int,name text) distributed by(id);
insert into t5370 select i,i from  generate_series(1,1000) i;
create table t5370_2 as select * from t5370 distributed by (id);
analyze t5370_2;
analyze t5370;
explain select * from t5370 a , t5370_2 b where a.name=b.name;
                                               QUERY PLAN                                               
--------------------------------------------------------------------------------------------------------
 Gather Motion 3:1  (slice3; segments: 3)  (cost=0.00..862.25 rows=1000 width=14)
   ->  Hash Join  (cost=0.00..862.20 rows=334 width=14)
         Hash Cond: t5370.name = t5370_2.name
         ->  Redistribute Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.02 rows=334 width=7)
               Hash Key: t5370.name
544
               ->  Seq Scan on t5370  (cost=0.00..431.01 rows=334 width=7)
545 546 547
         ->  Hash  (cost=431.02..431.02 rows=334 width=7)
               ->  Redistribute Motion 3:3  (slice2; segments: 3)  (cost=0.00..431.02 rows=334 width=7)
                     Hash Key: t5370_2.name
548
                     ->  Seq Scan on t5370_2  (cost=0.00..431.01 rows=334 width=7)
549
 Optimizer: Pivotal Optimizer (GPORCA) version 2.70.2
550 551 552 553
(11 rows)

drop table t5370;
drop table t5370_2;
554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570
-- github issue 6215 cases
-- When executing the following plan
-- ```
--  Gather Motion 1:1  (slice1; segments: 1)
--    ->  Merge Full Join
--         ->  Seq Scan on int4_tbl a
--         ->  Seq Scan on int4_tbl b
--```
-- Greenplum will raise an Assert Fail.
-- We force adding a material node for
-- merge full join on true.
drop table if exists t6215;
create table t6215(f1 int4) distributed replicated;
insert into t6215(f1) values (1), (2), (3);
set enable_material = off;
-- The plan still have Material operator
explain (costs off) select * from t6215 a full join t6215 b on true;
571 572
                QUERY PLAN                
------------------------------------------
573
 Gather Motion 1:1  (slice1; segments: 1)
574 575 576 577 578 579
   ->  Merge Full Join
         ->  Seq Scan on t6215 a
         ->  Materialize
               ->  Seq Scan on t6215 b
 Optimizer: Postgres query optimizer
(6 rows)
580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595

select * from t6215 a full join t6215 b on true;
 f1 | f1 
----+----
  1 |  1
  1 |  2
  1 |  3
  2 |  1
  2 |  2
  2 |  3
  3 |  1
  3 |  2
  3 |  3
(9 rows)

drop table t6215;
596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612
--
-- This tripped an assertion while deciding the locus for the joins.
-- The code was failing to handle join between SingleQE and Hash correctly,
-- when there were join order restricitions. (see
-- https://github.com/greenplum-db/gpdb/issues/6643
--
select a.f1, b.f1, t.thousand, t.tenthous from
  (select sum(f1) as f1 from int4_tbl i4b) b
   left outer join
     (select sum(f1)+1 as f1 from int4_tbl i4a) a ON a.f1 = b.f1
      left outer join
        tenk1 t ON b.f1 = t.thousand and (a.f1+b.f1+999) = t.tenthous;
 f1 | f1 | thousand | tenthous 
----+----+----------+----------
    |  0 |          |         
(1 row)

613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641
-- tests to ensure that join reordering of LOJs and inner joins produces the
-- correct join predicates & residual filters
drop table if exists t1, t2, t3;
CREATE TABLE t1 (a int, b int, c int);
CREATE TABLE t2 (a int, b int, c int);
CREATE TABLE t3 (a int, b int, c int);
INSERT INTO t1 SELECT i, i, i FROM generate_series(1, 1000) i;
INSERT INTO t2 SELECT i, i, i FROM generate_series(2, 1000) i; -- start from 2 so that one row from t1 doesn't match
INSERT INTO t3 VALUES (1, 2, 3), (NULL, 2, 2);
ANALYZE t1;
ANALYZE t2;
ANALYZE t3;
-- ensure plan has a filter over left outer join
explain (costs off) select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a IS NULL OR (t1.c = t3.c));
                                QUERY PLAN                                 
---------------------------------------------------------------------------
 Gather Motion 3:1  (slice2; segments: 3)
   ->  Result
         Filter: ((t2.a IS NULL) OR (t1.c = t3.c))
         ->  Hash Left Join
               Hash Cond: (t1.a = t2.a)
               ->  Hash Join
                     Hash Cond: (t1.b = t3.b)
                     ->  Seq Scan on t1
                     ->  Hash
                           ->  Broadcast Motion 3:3  (slice1; segments: 3)
                                 ->  Seq Scan on t3
               ->  Hash
                     ->  Seq Scan on t2
642
 Optimizer: Pivotal Optimizer (GPORCA) version 3.23.0
643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665
(14 rows)

select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a IS NULL OR (t1.c = t3.c));
 a | b | c | a | b | c | a | b | c 
---+---+---+---+---+---+---+---+---
 2 | 2 | 2 | 2 | 2 | 2 |   | 2 | 2
(1 row)

-- ensure plan has two inner joins with the where clause & join predicates ANDed
explain (costs off) select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a = t3.a);
                                QUERY PLAN                                 
---------------------------------------------------------------------------
 Gather Motion 3:1  (slice2; segments: 3)
   ->  Hash Join
         Hash Cond: ((t2.a = t3.a) AND (t2.a = t1.a))
         ->  Seq Scan on t2
         ->  Hash
               ->  Hash Join
                     Hash Cond: (t1.b = t3.b)
                     ->  Seq Scan on t1
                     ->  Hash
                           ->  Broadcast Motion 3:3  (slice1; segments: 3)
                                 ->  Seq Scan on t3
666
 Optimizer: Pivotal Optimizer (GPORCA) version 3.23.0
667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690
(12 rows)

select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a = t3.a);
 a | b | c | a | b | c | a | b | c 
---+---+---+---+---+---+---+---+---
(0 rows)

-- ensure plan has a filter over left outer join
explain (costs off) select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a is distinct from t3.a);
                                QUERY PLAN                                 
---------------------------------------------------------------------------
 Gather Motion 3:1  (slice2; segments: 3)
   ->  Result
         Filter: (t2.a IS DISTINCT FROM t3.a)
         ->  Hash Left Join
               Hash Cond: (t1.a = t2.a)
               ->  Hash Join
                     Hash Cond: (t1.b = t3.b)
                     ->  Seq Scan on t1
                     ->  Hash
                           ->  Broadcast Motion 3:3  (slice1; segments: 3)
                                 ->  Seq Scan on t3
               ->  Hash
                     ->  Seq Scan on t2
691
 Optimizer: Pivotal Optimizer (GPORCA) version 3.23.0
692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716
(14 rows)

select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a is distinct from t3.a);
 a | b | c | a | b | c | a | b | c 
---+---+---+---+---+---+---+---+---
 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3
 2 | 2 | 2 | 2 | 2 | 2 |   | 2 | 2
(2 rows)

-- ensure plan has a filter over left outer join
explain select * from t3 join (select t1.a t1a, t1.b t1b, t1.c t1c, t2.a t2a, t2.b t2b, t2.c t2c from t1 left join t2 on (t1.a = t2.a)) t on (t1a = t3.a) WHERE (t2a IS NULL OR (t1c = t3.a));
                                    QUERY PLAN                                     
-----------------------------------------------------------------------------------
 Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..1293.22 rows=4 width=36)
   ->  Result  (cost=0.00..1293.22 rows=2 width=36)
         Filter: ((t2.a IS NULL) OR (t1.c = t3.a))
         ->  Hash Left Join  (cost=0.00..1293.22 rows=2 width=36)
               Hash Cond: (t1.a = t2.a)
               ->  Hash Join  (cost=0.00..862.08 rows=1 width=24)
                     Hash Cond: (t1.a = t3.a)
                     ->  Seq Scan on t1  (cost=0.00..431.01 rows=334 width=12)
                     ->  Hash  (cost=431.00..431.00 rows=1 width=12)
                           ->  Seq Scan on t3  (cost=0.00..431.00 rows=1 width=12)
               ->  Hash  (cost=431.01..431.01 rows=333 width=12)
                     ->  Seq Scan on t2  (cost=0.00..431.01 rows=333 width=12)
717
 Optimizer: Pivotal Optimizer (GPORCA) version 3.23.0
718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759
(13 rows)

select * from t3 join (select t1.a t1a, t1.b t1b, t1.c t1c, t2.a t2a, t2.b t2b, t2.c t2c from t1 left join t2 on (t1.a = t2.a)) t on (t1a = t3.a) WHERE (t2a IS NULL OR (t1c = t3.a));
 a | b | c | t1a | t1b | t1c | t2a | t2b | t2c 
---+---+---+-----+-----+-----+-----+-----+-----
 1 | 2 | 3 |   1 |   1 |   1 |     |     |    
(1 row)

-- ensure plan has a filter over left outer join
explain select * from (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt 
  join t3 on tt.t1b = t3.b 
  join (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt1 on tt1.t1b = t3.b 
  join t3 t3_1 on tt1.t1b = t3_1.b and (tt1.t2a is NULL OR tt1.t1b = t3.b);
                                                                  QUERY PLAN                                                                  
----------------------------------------------------------------------------------------------------------------------------------------------
 Gather Motion 3:1  (slice4; segments: 3)  (cost=0.00..2586.40 rows=8 width=56)
   ->  Hash Join  (cost=0.00..2586.39 rows=3 width=56)
         Hash Cond: ((t1.b = t3_1.b) AND (t1_1.b = t3_1.b) AND (t3.b = t3_1.b))
         ->  Result  (cost=0.00..2155.39 rows=2 width=44)
               Filter: ((t2_1.a IS NULL) OR (t1.b = t3.b))
               ->  Hash Left Join  (cost=0.00..2155.39 rows=2 width=44)
                     Hash Cond: (t1.a = t2_1.a)
                     ->  Hash Join  (cost=0.00..1724.29 rows=1 width=36)
                           Hash Cond: ((t1.b = t3.b) AND (t1.b = t1_1.b))
                           ->  Seq Scan on t1  (cost=0.00..431.01 rows=334 width=8)
                           ->  Hash  (cost=1293.18..1293.18 rows=3 width=28)
                                 ->  Broadcast Motion 3:3  (slice2; segments: 3)  (cost=0.00..1293.18 rows=3 width=28)
                                       ->  Hash Left Join  (cost=0.00..1293.18 rows=1 width=28)
                                             Hash Cond: (t1_1.a = t2.a)
                                             ->  Hash Join  (cost=0.00..862.07 rows=1 width=20)
                                                   Hash Cond: (t1_1.b = t3.b)
                                                   ->  Seq Scan on t1 t1_1  (cost=0.00..431.01 rows=334 width=8)
                                                   ->  Hash  (cost=431.00..431.00 rows=2 width=12)
                                                         ->  Broadcast Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.00 rows=2 width=12)
                                                               ->  Seq Scan on t3  (cost=0.00..431.00 rows=1 width=12)
                                             ->  Hash  (cost=431.01..431.01 rows=333 width=8)
                                                   ->  Seq Scan on t2  (cost=0.00..431.01 rows=333 width=8)
                     ->  Hash  (cost=431.01..431.01 rows=333 width=8)
                           ->  Seq Scan on t2 t2_1  (cost=0.00..431.01 rows=333 width=8)
         ->  Hash  (cost=431.00..431.00 rows=2 width=12)
               ->  Broadcast Motion 3:3  (slice3; segments: 3)  (cost=0.00..431.00 rows=2 width=12)
                     ->  Seq Scan on t3 t3_1  (cost=0.00..431.00 rows=1 width=12)
760
 Optimizer: Pivotal Optimizer (GPORCA) version 3.23.0
761 762 763 764 765 766 767 768 769 770 771 772 773 774
(28 rows)

select * from (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt 
  join t3 on tt.t1b = t3.b 
  join (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt1 on tt1.t1b = t3.b 
  join t3 t3_1 on tt1.t1b = t3_1.b and (tt1.t2a is NULL OR tt1.t1b = t3.b);
 t1a | t1b | t2a | t2b | a | b | c | t1a | t1b | t2a | t2b | a | b | c 
-----+-----+-----+-----+---+---+---+-----+-----+-----+-----+---+---+---
   2 |   2 |   2 |   2 |   | 2 | 2 |   2 |   2 |   2 |   2 | 1 | 2 | 3
   2 |   2 |   2 |   2 |   | 2 | 2 |   2 |   2 |   2 |   2 |   | 2 | 2
   2 |   2 |   2 |   2 | 1 | 2 | 3 |   2 |   2 |   2 |   2 | 1 | 2 | 3
   2 |   2 |   2 |   2 | 1 | 2 | 3 |   2 |   2 |   2 |   2 |   | 2 | 2
(4 rows)

775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815
-- test different join order enumeration methods
set optimizer_join_order = query;
select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;
 a | b | c | a | b | c | a | b | c 
---+---+---+---+---+---+---+---+---
 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3
 2 | 2 | 2 | 2 | 2 | 2 |   | 2 | 2
(2 rows)

set optimizer_join_order = greedy;
select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;
 a | b | c | a | b | c | a | b | c 
---+---+---+---+---+---+---+---+---
 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3
 2 | 2 | 2 | 2 | 2 | 2 |   | 2 | 2
(2 rows)

set optimizer_join_order = exhaustive;
select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;
 a | b | c | a | b | c | a | b | c 
---+---+---+---+---+---+---+---+---
 2 | 2 | 2 | 2 | 2 | 2 |   | 2 | 2
 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3
(2 rows)

set optimizer_join_order = exhaustive2;
select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;
 a | b | c | a | b | c | a | b | c 
---+---+---+---+---+---+---+---+---
 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3
 2 | 2 | 2 | 2 | 2 | 2 |   | 2 | 2
(2 rows)

reset optimizer_join_order;
select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;
 a | b | c | a | b | c | a | b | c 
---+---+---+---+---+---+---+---+---
 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3
 2 | 2 | 2 | 2 | 2 | 2 |   | 2 | 2
(2 rows)

816
drop table t1, t2, t3;
817 818 819 820 821 822 823 824 825
--
-- Test a bug that nestloop path previously can not generate motion above
-- index path, which sometimes is wrong (this test case is an example).
-- We now depend on parameterized path related variables to judge instead.
-- We conservatively disallow motion when there is parameter requirement
-- for either outer or inner at this moment though there could be room
-- for further improvement (e.g. referring subplan code to do broadcast
-- for base rel if needed, which needs much effort and does not seem to
-- be deserved given we will probably refactor related code for the lateral
826
-- support in the near future). For the query and guc settings below, Postgres
827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844
-- planner can not generate a plan.
set enable_nestloop = 1;
set enable_material = 0;
set enable_seqscan = 0;
set enable_bitmapscan = 0;
explain select tenk1.unique2 >= 0 from tenk1 left join tenk2 on true limit 1;
                                                       QUERY PLAN                                                        
-------------------------------------------------------------------------------------------------------------------------
 Limit  (cost=0.00..1885517.36 rows=1 width=1)
   ->  Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..1885517.36 rows=1 width=1)
         ->  Limit  (cost=0.00..1885517.36 rows=1 width=1)
               ->  Result  (cost=0.00..1885517.36 rows=33336667 width=1)
                     ->  Nested Loop Left Join  (cost=0.00..1885484.02 rows=33336667 width=4)
                           Join Filter: true
                           ->  Seq Scan on tenk1  (cost=0.00..431.51 rows=3334 width=4)
                           ->  Materialize  (cost=0.00..431.70 rows=10000 width=1)
                                 ->  Broadcast Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.69 rows=10000 width=1)
                                       ->  Seq Scan on tenk2  (cost=0.00..431.50 rows=3334 width=1)
845
 Optimizer: Pivotal Optimizer (GPORCA) version 3.23.0
846 847 848 849 850 851 852 853 854 855 856 857
(11 rows)

select tenk1.unique2 >= 0 from tenk1 left join tenk2 on true limit 1;
 ?column? 
----------
 t
(1 row)

reset enable_nestloop;
reset enable_material;
reset enable_seqscan;
reset enable_bitmapscan;