join_gp_optimizer.out 37.5 KB
Newer Older
1 2 3 4 5 6 7 8
-- Extra GPDB tests for joins.
-- Ignore "workfile compresssion is not supported by this build" (see
-- 'zlib' test):
--
-- start_matchignore
-- m/ERROR:  workfile compresssion is not supported by this build/
-- end_matchignore
--
9
-- test numeric hash join
10
--
11 12 13 14 15 16 17
set enable_hashjoin to on;
set enable_mergejoin to off;
set enable_nestloop to off;
create table nhtest (i numeric(10, 2)) distributed by (i);
insert into nhtest values(100000.22);
insert into nhtest values(300000.19);
explain select * from nhtest a join nhtest b using (i);
18 19
                                    QUERY PLAN                                     
-----------------------------------------------------------------------------------
20 21 22
 Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..862.00 rows=1 width=9)
   ->  Hash Join  (cost=0.00..862.00 rows=1 width=9)
         Hash Cond: nhtest.i = nhtest_1.i
23
         ->  Seq Scan on nhtest  (cost=0.00..431.00 rows=1 width=9)
24
         ->  Hash  (cost=431.00..431.00 rows=1 width=9)
25
               ->  Seq Scan on nhtest nhtest_1  (cost=0.00..431.00 rows=1 width=9)
26
 Optimizer: Pivotal Optimizer (GPORCA) version 2.74.0
27
(7 rows)
28 29 30 31 32

select * from nhtest a join nhtest b using (i);
     i     
-----------
 100000.22
33
 300000.19
34 35 36
(2 rows)

create temp table l(a int);
37 38
NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
insert into l values (1), (1), (2);
select * from l l1 join l l2 on l1.a = l2.a left join l l3 on l1.a = l3.a and l1.a = 2 order by 1,2,3;
 a | a | a 
---+---+---
 1 | 1 |  
 1 | 1 |  
 1 | 1 |  
 1 | 1 |  
 2 | 2 | 2
(5 rows)

--
-- test hash join
--
create table hjtest (i int, j int) distributed by (i,j);
insert into hjtest values(3, 4);
select count(*) from hjtest a1, hjtest a2 where a2.i = least (a1.i,4) and a2.j = 4;
 count 
-------
     1
(1 row)

61 62 63 64 65 66
--
-- Test for correct behavior when there is a Merge Join on top of Materialize
-- on top of a Motion :
-- 1. Use FULL OUTER JOIN to induce a Merge Join
-- 2. Use a large tuple size to induce a Materialize
-- 3. Use gp_dist_random() to induce a Redistribute
67 68 69 70
--
set enable_hashjoin to off;
set enable_mergejoin to on;
set enable_nestloop to off;
71
DROP TABLE IF EXISTS alpha;
72
NOTICE:  table "alpha" does not exist, skipping
73
DROP TABLE IF EXISTS theta;
74 75 76
NOTICE:  table "theta" does not exist, skipping
CREATE TABLE alpha (i int, j int) distributed by (i);
CREATE TABLE theta (i int, j char(10000000)) distributed by (i);
77 78 79
INSERT INTO alpha values (1, 1), (2, 2);
INSERT INTO theta values (1, 'f'), (2, 'g');
SELECT *
80 81
FROM gp_dist_random('alpha') FULL OUTER JOIN gp_dist_random('theta')
  ON (alpha.i = theta.i)
82
WHERE (alpha.j IS NULL or theta.j IS NULL);
83 84
 i | j | i | j 
---+---+---+---
85 86 87 88 89
(0 rows)

reset enable_hashjoin;
reset enable_mergejoin;
reset enable_nestloop;
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
--
-- Predicate propagation over equality conditions
--
drop schema if exists pred;
NOTICE:  schema "pred" does not exist, skipping
create schema pred;
set search_path=pred;
create table t1 (x int, y int, z int) distributed by (y);
create table t2 (x int, y int, z int) distributed by (x);
insert into t1 select i, i, i from generate_series(1,100) i;
insert into t2 select * from t1;
analyze t1;
analyze t2;
--
-- infer over equalities
--
explain select count(*) from t1,t2 where t1.x = 100 and t1.x = t2.x;
107 108 109
                                                 QUERY PLAN                                                 
------------------------------------------------------------------------------------------------------------
 Aggregate  (cost=0.00..862.00 rows=1 width=8)
S
Sambitesh Dash 已提交
110 111 112 113 114 115 116 117 118 119 120
   ->  Hash Join  (cost=0.00..862.00 rows=1 width=1)
         Hash Cond: (t1.x = t2.x)
         ->  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..431.00 rows=1 width=4)
               ->  Seq Scan on t1  (cost=0.00..431.00 rows=1 width=4)
                     Filter: (x = 100)
         ->  Hash  (cost=431.00..431.00 rows=1 width=4)
               ->  Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..431.00 rows=1 width=4)
                     ->  Seq Scan on t2  (cost=0.00..431.00 rows=1 width=4)
                           Filter: (x = 100)
 Optimizer: Pivotal Optimizer (GPORCA) version 3.64.0
(11 rows)
121 122 123 124 125 126 127 128 129 130 131

select count(*) from t1,t2 where t1.x = 100 and t1.x = t2.x;
 count 
-------
     1
(1 row)

--
-- infer over >=
--
explain select * from t1,t2 where t1.x = 100 and t2.x >= t1.x;
132 133 134 135
                                          QUERY PLAN                                          
----------------------------------------------------------------------------------------------
 Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..1324039.56 rows=34 width=24)
   ->  Nested Loop  (cost=0.00..1324039.56 rows=12 width=24)
136
         Join Filter: t2.x >= t1.x
137
         ->  Broadcast Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.00 rows=1 width=12)
138
               ->  Seq Scan on t1  (cost=0.00..431.00 rows=1 width=12)
139
                     Filter: x = 100
140
         ->  Seq Scan on t2  (cost=0.00..431.00 rows=34 width=12)
141
 Settings:  optimizer=on
142
 Optimizer status: Pivotal Optimizer (GPORCA) version 2.39.2
143
(9 rows)
144 145 146 147 148 149 150 151 152 153 154 155

select * from t1,t2 where t1.x = 100 and t2.x >= t1.x;
  x  |  y  |  z  |  x  |  y  |  z  
-----+-----+-----+-----+-----+-----
 100 | 100 | 100 | 100 | 100 | 100
(1 row)

--
-- multiple inferences
--
set optimizer_segments=2;
explain select * from t1,t2 where t1.x = 100 and t1.x = t2.y and t1.x <= t2.x;
S
Sambitesh Dash 已提交
156 157 158 159 160 161 162 163 164 165 166 167 168 169
                                        QUERY PLAN                                         
-------------------------------------------------------------------------------------------
 Hash Join  (cost=0.00..862.01 rows=1 width=24)
   Hash Cond: (t1.x = t2.y)
   Join Filter: (t1.x <= t2.x)
   ->  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..431.00 rows=1 width=12)
         ->  Seq Scan on t1  (cost=0.00..431.00 rows=1 width=12)
               Filter: (x = 100)
   ->  Hash  (cost=431.00..431.00 rows=1 width=12)
         ->  Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..431.00 rows=1 width=12)
               ->  Seq Scan on t2  (cost=0.00..431.00 rows=1 width=12)
                     Filter: (y = 100)
 Optimizer: Pivotal Optimizer (GPORCA) version 3.64.0
(11 rows)
170 171 172 173 174 175 176 177 178 179 180 181 182

reset optimizer_segments;
select * from t1,t2 where t1.x = 100 and t1.x = t2.y and t1.x <= t2.x;
  x  |  y  |  z  |  x  |  y  |  z  
-----+-----+-----+-----+-----+-----
 100 | 100 | 100 | 100 | 100 | 100
(1 row)

--
-- MPP-18537: hash clause references a constant in outer child target list
--
create table hjn_test (i int, j int) distributed by (i,j);
insert into hjn_test values(3, 4);
183
create table int4_tbl (f1 int) distributed by (f1);
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
insert into int4_tbl values(123456), (-2147483647), (0), (-123456), (2147483647);
select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,4) and hjn_test.j = 4;
 count 
-------
     1
(1 row)

select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,(array[4])[1]) and hjn_test.j = (array[4])[1];
 count 
-------
     1
(1 row)

select count(*) from hjn_test, (select 3 as bar) foo where least (foo.bar,(array[4])[1]) = hjn_test.i and hjn_test.j = (array[4])[1];
 count 
-------
     1
(1 row)

select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar, least(4,10)) and hjn_test.j = least(4,10);
 count 
-------
     1
(1 row)

select * from int4_tbl a join int4_tbl b on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1));
     f1      |     f1      
-------------+-------------
212
  2147483647 |  2147483647
213 214 215 216 217 218
      123456 |      123456
 -2147483647 | -2147483647
           0 |           0
     -123456 |     -123456
(5 rows)

219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
-- Same as the last query, but with a partitioned table (which requires a
-- Result node to do projection of the hash expression, as Append is not
-- projection-capable)
create table part4_tbl (f1 int4) partition by range (f1) (start(-1000000) end (1000000) every (1000000));
NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Greenplum Database data distribution key for this table.
HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
NOTICE:  CREATE TABLE will create partition "part4_tbl_1_prt_1" for table "part4_tbl"
NOTICE:  CREATE TABLE will create partition "part4_tbl_1_prt_2" for table "part4_tbl"
insert into part4_tbl values
       (-123457), (-123456), (-123455),
       (-1), (0), (1),
       (123455), (123456), (123457);
select * from part4_tbl a join part4_tbl b on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1));
   f1    |   f1    
---------+---------
234
       0 |       0
235 236 237 238
  123456 |  123456
 -123456 | -123456
(3 rows)

239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
--
-- Test case where a Motion hash key is only needed for the redistribution,
-- and not returned in the final result set. There was a bug at one point where
-- tjoin.c1 was used as the hash key in a Motion node, but it was not added
-- to the sub-plans target list, causing a "variable not found in subplan
-- target list" error.
--
create table tjoin1(dk integer, id integer) distributed by (dk);
create table tjoin2(dk integer, id integer, t text) distributed by (dk);
create table tjoin3(dk integer, id integer, t text) distributed by (dk);
insert into tjoin1 values (1, 1), (1, 2), (1, 3), (2, 1), (2, 2), (2, 3);
insert into tjoin2 values (1, 1, '1-1'), (1, 2, '1-2'), (2, 1, '2-1'), (2, 2, '2-2');
insert into tjoin3 values (1, 1, '1-1'), (2, 1, '2-1');
select tjoin1.id, tjoin2.t, tjoin3.t
from tjoin1
left outer join (tjoin2 left outer join tjoin3 on tjoin2.id=tjoin3.id) on tjoin1.id=tjoin3.id;
 id |  t  |  t  
----+-----+-----
  3 |     | 
258
  3 |     | 
259
  1 | 2-1 | 1-1
260
  1 | 2-1 | 2-1
261
  1 | 1-1 | 1-1
262
  1 | 1-1 | 2-1
263
  2 |     | 
264 265 266 267
  1 | 2-1 | 1-1
  1 | 2-1 | 2-1
  1 | 1-1 | 1-1
  1 | 1-1 | 2-1
268 269 270
  2 |     | 
(12 rows)

271
set enable_hashjoin to off;
272
set optimizer_enable_hashjoin = off;
273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299
select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,4) and hjn_test.j = 4;
 count 
-------
     1
(1 row)

select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,(array[4])[1]) and hjn_test.j = (array[4])[1];
 count 
-------
     1
(1 row)

select count(*) from hjn_test, (select 3 as bar) foo where least (foo.bar,(array[4])[1]) = hjn_test.i and hjn_test.j = (array[4])[1];
 count 
-------
     1
(1 row)

select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar, least(4,10)) and hjn_test.j = least(4,10);
 count 
-------
     1
(1 row)

select * from int4_tbl a join int4_tbl b on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1));
     f1      |     f1      
-------------+-------------
300
  2147483647 |  2147483647
301 302 303 304 305 306 307
      123456 |      123456
 -2147483647 | -2147483647
           0 |           0
     -123456 |     -123456
(5 rows)

reset enable_hashjoin;
308
reset optimizer_enable_hashjoin;
309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325
-- In case of Left Anti Semi Join, if the left rel is empty a dummy join
-- should be created
drop table if exists foo;
NOTICE:  table "foo" does not exist, skipping
drop table if exists bar;
NOTICE:  table "bar" does not exist, skipping
create table foo (a int, b int) distributed randomly;
create table bar (c int, d int) distributed randomly;
insert into foo select generate_series(1,10);
insert into bar select generate_series(1,10);
explain select a from foo where a<1 and a>1 and not exists (select c from bar where c=a);
                   QUERY PLAN                   
------------------------------------------------
 Result  (cost=0.00..0.00 rows=0 width=4)
   ->  Result  (cost=0.00..0.00 rows=0 width=4)
         One-Time Filter: false
 Settings:  optimizer=on
326
 Optimizer status: Pivotal Optimizer (GPORCA) version 2.39.2
327 328 329 330 331 332 333
(5 rows)

select a from foo where a<1 and a>1 and not exists (select c from bar where c=a);
 a 
---
(0 rows)

334 335 336 337 338 339 340 341 342 343 344
-- The merge join executor code doesn't support LASJ_NOTIN joins. Make sure
-- the planner doesn't create an invalid plan with them.
create index index_foo on foo (a);
create index index_bar on bar (c);
set enable_nestloop to off;
set enable_hashjoin to off;
set enable_mergejoin to on;
select * from foo where a not in (select c from bar where c <= 5);
 a  | b 
----+---
  7 |  
345
  6 |  
346 347 348 349 350
  8 |  
  9 |  
 10 |  
(5 rows)

351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
set enable_nestloop to off;
set enable_hashjoin to on;
set enable_mergejoin to off;
create table dept
(
	id int,
	pid int,
	name char(40)
);
NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Greenplum Database data distribution key for this table.
HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into dept values(3, 0, 'root');
insert into dept values(4, 3, '2<-1');
insert into dept values(5, 4, '3<-2<-1');
insert into dept values(6, 4, '4<-2<-1');
insert into dept values(7, 3, '5<-1');
insert into dept values(8, 7, '5<-1');
insert into dept select i, i % 6 + 3 from generate_series(9,50) as i;
insert into dept select i, 99 from generate_series(100,15000) as i;
ANALYZE dept;
371
-- Test rescannable hashjoin with spilling hashtable
372
set statement_mem='1000kB';
373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
set gp_workfile_compression = off;
WITH RECURSIVE subdept(id, parent_department, name) AS
(
	-- non recursive term
	SELECT * FROM dept WHERE name = 'root'
	UNION ALL
	-- recursive term
	SELECT d.* FROM dept AS d, subdept AS sd
		WHERE d.pid = sd.id
)
SELECT count(*) FROM subdept;
 count 
-------
    48
(1 row)

-- Test rescannable hashjoin with spilling hashtable, with compression
set gp_workfile_compression = on;
391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
WITH RECURSIVE subdept(id, parent_department, name) AS
(
	-- non recursive term
	SELECT * FROM dept WHERE name = 'root'
	UNION ALL
	-- recursive term
	SELECT d.* FROM dept AS d, subdept AS sd
		WHERE d.pid = sd.id
)
SELECT count(*) FROM subdept;
 count 
-------
    48
(1 row)

-- Test rescannable hashjoin with in-memory hashtable
reset statement_mem;
WITH RECURSIVE subdept(id, parent_department, name) AS
(
	-- non recursive term
	SELECT * FROM dept WHERE name = 'root'
	UNION ALL
	-- recursive term
	SELECT d.* FROM dept AS d, subdept AS sd
		WHERE d.pid = sd.id
)
SELECT count(*) FROM subdept;
 count 
-------
    48
(1 row)

423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495
-- MPP-29458
-- When we join on a clause with two different types. If one table distribute by one type, the query plan
-- will redistribute data on another type. But the has values of two types would not be equal. The data will
-- redistribute to wrong segments.
create table test_timestamp_t1 (id  numeric(10,0) ,field_dt date) distributed by (id);
create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone) distributed by (id,field_tms);
insert into test_timestamp_t1 values(10 ,'2018-1-10');
insert into test_timestamp_t1 values(11 ,'2018-1-11');
insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp);
insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp);
-- Test nest loop redistribute keys
set enable_nestloop to on;
set enable_hashjoin to on;
set enable_mergejoin to on;
select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms;
 count 
-------
     2
(1 row)

-- Test hash join redistribute keys
set enable_nestloop to off;
set enable_hashjoin to on;
set enable_mergejoin to on;
select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms;
 count 
-------
     2
(1 row)

drop table test_timestamp_t1;
drop table test_timestamp_t2;
-- Test merge join redistribute keys
create table test_timestamp_t1 (id  numeric(10,0) ,field_dt date) distributed randomly;
create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone) distributed by (field_tms);
insert into test_timestamp_t1 values(10 ,'2018-1-10');
insert into test_timestamp_t1 values(11 ,'2018-1-11');
insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp);
insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp);
select * from test_timestamp_t1 t1 full outer join test_timestamp_t2 t2 on T1.id = T2.id and T1.field_dt = t2.field_tms;
 id |  field_dt  | id |        field_tms         
----+------------+----+--------------------------
 10 | 01-10-2018 | 10 | Wed Jan 10 00:00:00 2018
 11 | 01-11-2018 | 11 | Thu Jan 11 00:00:00 2018
(2 rows)

-- test float type
set enable_nestloop to off;
set enable_hashjoin to on;
set enable_mergejoin to on;
create table test_float1(id int, data float4)  DISTRIBUTED BY (data);
create table test_float2(id int, data float8)  DISTRIBUTED BY (data);
insert into test_float1 values(1, 10), (2, 20);
insert into test_float2 values(3, 10), (4, 20);
select t1.id, t1.data, t2.id, t2.data from test_float1 t1, test_float2 t2 where t1.data = t2.data;
 id | data | id | data 
----+------+----+------
  2 |   20 |  4 |   20
  1 |   10 |  3 |   10
(2 rows)

-- test int type
create table test_int1(id int, data int4)  DISTRIBUTED BY (data);
create table test_int2(id int, data int8)  DISTRIBUTED BY (data);
insert into test_int1 values(1, 10), (2, 20);
insert into test_int2 values(3, 10), (4, 20);
select t1.id, t1.data, t2.id, t2.data from test_int1 t1, test_int2 t2 where t1.data = t2.data;
 id | data | id | data 
----+------+----+------
  1 |   10 |  3 |   10
  2 |   20 |  4 |   20
(2 rows)

496
-- Test to ensure that for full outer join on varchar columns, planner is successful in finding a sort operator in the catalog
497
create table input_table(a varchar(30), b varchar(30)) distributed by (a);
498 499
set enable_hashjoin = off;
explain (costs off) select X.a from input_table X full join (select a from input_table) Y ON X.a = Y.a;
500 501 502 503 504 505 506 507 508 509 510 511 512
                        QUERY PLAN                         
-----------------------------------------------------------
 Gather Motion 3:1  (slice1; segments: 3)
   ->  Merge Full Join
         Merge Cond: ((x.a)::text = (input_table.a)::text)
         ->  Sort
               Sort Key: x.a
               ->  Seq Scan on input_table x
         ->  Sort
               Sort Key: input_table.a
               ->  Seq Scan on input_table
 Optimizer: Postgres query optimizer
(10 rows)
513

514
-- Cleanup
515
reset enable_hashjoin;
516
set client_min_messages='warning'; -- silence drop-cascade NOTICEs
517
drop schema pred cascade;
518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536
reset search_path;
-- github issue 5370 cases
drop table if exists t5370;
NOTICE:  table "t5370" does not exist, skipping
drop table if exists t5370_2;
NOTICE:  table "t5370_2" does not exist, skipping
create table t5370(id int,name text) distributed by(id);
insert into t5370 select i,i from  generate_series(1,1000) i;
create table t5370_2 as select * from t5370 distributed by (id);
analyze t5370_2;
analyze t5370;
explain select * from t5370 a , t5370_2 b where a.name=b.name;
                                               QUERY PLAN                                               
--------------------------------------------------------------------------------------------------------
 Gather Motion 3:1  (slice3; segments: 3)  (cost=0.00..862.25 rows=1000 width=14)
   ->  Hash Join  (cost=0.00..862.20 rows=334 width=14)
         Hash Cond: t5370.name = t5370_2.name
         ->  Redistribute Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.02 rows=334 width=7)
               Hash Key: t5370.name
537
               ->  Seq Scan on t5370  (cost=0.00..431.01 rows=334 width=7)
538 539 540
         ->  Hash  (cost=431.02..431.02 rows=334 width=7)
               ->  Redistribute Motion 3:3  (slice2; segments: 3)  (cost=0.00..431.02 rows=334 width=7)
                     Hash Key: t5370_2.name
541
                     ->  Seq Scan on t5370_2  (cost=0.00..431.01 rows=334 width=7)
542
 Optimizer: Pivotal Optimizer (GPORCA) version 2.70.2
543 544 545 546
(11 rows)

drop table t5370;
drop table t5370_2;
547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563
-- github issue 6215 cases
-- When executing the following plan
-- ```
--  Gather Motion 1:1  (slice1; segments: 1)
--    ->  Merge Full Join
--         ->  Seq Scan on int4_tbl a
--         ->  Seq Scan on int4_tbl b
--```
-- Greenplum will raise an Assert Fail.
-- We force adding a material node for
-- merge full join on true.
drop table if exists t6215;
create table t6215(f1 int4) distributed replicated;
insert into t6215(f1) values (1), (2), (3);
set enable_material = off;
-- The plan still have Material operator
explain (costs off) select * from t6215 a full join t6215 b on true;
564 565
                QUERY PLAN                
------------------------------------------
566
 Gather Motion 1:1  (slice1; segments: 1)
567 568 569 570 571 572
   ->  Merge Full Join
         ->  Seq Scan on t6215 a
         ->  Materialize
               ->  Seq Scan on t6215 b
 Optimizer: Postgres query optimizer
(6 rows)
573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588

select * from t6215 a full join t6215 b on true;
 f1 | f1 
----+----
  1 |  1
  1 |  2
  1 |  3
  2 |  1
  2 |  2
  2 |  3
  3 |  1
  3 |  2
  3 |  3
(9 rows)

drop table t6215;
589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605
--
-- This tripped an assertion while deciding the locus for the joins.
-- The code was failing to handle join between SingleQE and Hash correctly,
-- when there were join order restricitions. (see
-- https://github.com/greenplum-db/gpdb/issues/6643
--
select a.f1, b.f1, t.thousand, t.tenthous from
  (select sum(f1) as f1 from int4_tbl i4b) b
   left outer join
     (select sum(f1)+1 as f1 from int4_tbl i4a) a ON a.f1 = b.f1
      left outer join
        tenk1 t ON b.f1 = t.thousand and (a.f1+b.f1+999) = t.tenthous;
 f1 | f1 | thousand | tenthous 
----+----+----------+----------
    |  0 |          |         
(1 row)

606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634
-- tests to ensure that join reordering of LOJs and inner joins produces the
-- correct join predicates & residual filters
drop table if exists t1, t2, t3;
CREATE TABLE t1 (a int, b int, c int);
CREATE TABLE t2 (a int, b int, c int);
CREATE TABLE t3 (a int, b int, c int);
INSERT INTO t1 SELECT i, i, i FROM generate_series(1, 1000) i;
INSERT INTO t2 SELECT i, i, i FROM generate_series(2, 1000) i; -- start from 2 so that one row from t1 doesn't match
INSERT INTO t3 VALUES (1, 2, 3), (NULL, 2, 2);
ANALYZE t1;
ANALYZE t2;
ANALYZE t3;
-- ensure plan has a filter over left outer join
explain (costs off) select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a IS NULL OR (t1.c = t3.c));
                                QUERY PLAN                                 
---------------------------------------------------------------------------
 Gather Motion 3:1  (slice2; segments: 3)
   ->  Result
         Filter: ((t2.a IS NULL) OR (t1.c = t3.c))
         ->  Hash Left Join
               Hash Cond: (t1.a = t2.a)
               ->  Hash Join
                     Hash Cond: (t1.b = t3.b)
                     ->  Seq Scan on t1
                     ->  Hash
                           ->  Broadcast Motion 3:3  (slice1; segments: 3)
                                 ->  Seq Scan on t3
               ->  Hash
                     ->  Seq Scan on t2
635
 Optimizer: Pivotal Optimizer (GPORCA) version 3.23.0
636 637 638 639 640 641 642 643 644 645
(14 rows)

select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a IS NULL OR (t1.c = t3.c));
 a | b | c | a | b | c | a | b | c 
---+---+---+---+---+---+---+---+---
 2 | 2 | 2 | 2 | 2 | 2 |   | 2 | 2
(1 row)

-- ensure plan has two inner joins with the where clause & join predicates ANDed
explain (costs off) select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a = t3.a);
646 647 648
                      QUERY PLAN                      
------------------------------------------------------
 Gather Motion 3:1  (slice1; segments: 3)
649
   ->  Hash Join
650 651
         Hash Cond: ((t1.a = t2.a) AND (t1.b = t3.b))
         ->  Seq Scan on t1
652 653
         ->  Hash
               ->  Hash Join
654 655
                     Hash Cond: (t2.a = t3.a)
                     ->  Seq Scan on t2
656
                     ->  Hash
657 658 659
                           ->  Seq Scan on t3
 Optimizer: Pivotal Optimizer (GPORCA) version 3.75.0
(11 rows)
660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682

select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a = t3.a);
 a | b | c | a | b | c | a | b | c 
---+---+---+---+---+---+---+---+---
(0 rows)

-- ensure plan has a filter over left outer join
explain (costs off) select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a is distinct from t3.a);
                                QUERY PLAN                                 
---------------------------------------------------------------------------
 Gather Motion 3:1  (slice2; segments: 3)
   ->  Result
         Filter: (t2.a IS DISTINCT FROM t3.a)
         ->  Hash Left Join
               Hash Cond: (t1.a = t2.a)
               ->  Hash Join
                     Hash Cond: (t1.b = t3.b)
                     ->  Seq Scan on t1
                     ->  Hash
                           ->  Broadcast Motion 3:3  (slice1; segments: 3)
                                 ->  Seq Scan on t3
               ->  Hash
                     ->  Seq Scan on t2
683
 Optimizer: Pivotal Optimizer (GPORCA) version 3.23.0
684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708
(14 rows)

select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a is distinct from t3.a);
 a | b | c | a | b | c | a | b | c 
---+---+---+---+---+---+---+---+---
 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3
 2 | 2 | 2 | 2 | 2 | 2 |   | 2 | 2
(2 rows)

-- ensure plan has a filter over left outer join
explain select * from t3 join (select t1.a t1a, t1.b t1b, t1.c t1c, t2.a t2a, t2.b t2b, t2.c t2c from t1 left join t2 on (t1.a = t2.a)) t on (t1a = t3.a) WHERE (t2a IS NULL OR (t1c = t3.a));
                                    QUERY PLAN                                     
-----------------------------------------------------------------------------------
 Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..1293.22 rows=4 width=36)
   ->  Result  (cost=0.00..1293.22 rows=2 width=36)
         Filter: ((t2.a IS NULL) OR (t1.c = t3.a))
         ->  Hash Left Join  (cost=0.00..1293.22 rows=2 width=36)
               Hash Cond: (t1.a = t2.a)
               ->  Hash Join  (cost=0.00..862.08 rows=1 width=24)
                     Hash Cond: (t1.a = t3.a)
                     ->  Seq Scan on t1  (cost=0.00..431.01 rows=334 width=12)
                     ->  Hash  (cost=431.00..431.00 rows=1 width=12)
                           ->  Seq Scan on t3  (cost=0.00..431.00 rows=1 width=12)
               ->  Hash  (cost=431.01..431.01 rows=333 width=12)
                     ->  Seq Scan on t2  (cost=0.00..431.01 rows=333 width=12)
709
 Optimizer: Pivotal Optimizer (GPORCA) version 3.23.0
710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751
(13 rows)

select * from t3 join (select t1.a t1a, t1.b t1b, t1.c t1c, t2.a t2a, t2.b t2b, t2.c t2c from t1 left join t2 on (t1.a = t2.a)) t on (t1a = t3.a) WHERE (t2a IS NULL OR (t1c = t3.a));
 a | b | c | t1a | t1b | t1c | t2a | t2b | t2c 
---+---+---+-----+-----+-----+-----+-----+-----
 1 | 2 | 3 |   1 |   1 |   1 |     |     |    
(1 row)

-- ensure plan has a filter over left outer join
explain select * from (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt 
  join t3 on tt.t1b = t3.b 
  join (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt1 on tt1.t1b = t3.b 
  join t3 t3_1 on tt1.t1b = t3_1.b and (tt1.t2a is NULL OR tt1.t1b = t3.b);
                                                                  QUERY PLAN                                                                  
----------------------------------------------------------------------------------------------------------------------------------------------
 Gather Motion 3:1  (slice4; segments: 3)  (cost=0.00..2586.40 rows=8 width=56)
   ->  Hash Join  (cost=0.00..2586.39 rows=3 width=56)
         Hash Cond: ((t1.b = t3_1.b) AND (t1_1.b = t3_1.b) AND (t3.b = t3_1.b))
         ->  Result  (cost=0.00..2155.39 rows=2 width=44)
               Filter: ((t2_1.a IS NULL) OR (t1.b = t3.b))
               ->  Hash Left Join  (cost=0.00..2155.39 rows=2 width=44)
                     Hash Cond: (t1.a = t2_1.a)
                     ->  Hash Join  (cost=0.00..1724.29 rows=1 width=36)
                           Hash Cond: ((t1.b = t3.b) AND (t1.b = t1_1.b))
                           ->  Seq Scan on t1  (cost=0.00..431.01 rows=334 width=8)
                           ->  Hash  (cost=1293.18..1293.18 rows=3 width=28)
                                 ->  Broadcast Motion 3:3  (slice2; segments: 3)  (cost=0.00..1293.18 rows=3 width=28)
                                       ->  Hash Left Join  (cost=0.00..1293.18 rows=1 width=28)
                                             Hash Cond: (t1_1.a = t2.a)
                                             ->  Hash Join  (cost=0.00..862.07 rows=1 width=20)
                                                   Hash Cond: (t1_1.b = t3.b)
                                                   ->  Seq Scan on t1 t1_1  (cost=0.00..431.01 rows=334 width=8)
                                                   ->  Hash  (cost=431.00..431.00 rows=2 width=12)
                                                         ->  Broadcast Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.00 rows=2 width=12)
                                                               ->  Seq Scan on t3  (cost=0.00..431.00 rows=1 width=12)
                                             ->  Hash  (cost=431.01..431.01 rows=333 width=8)
                                                   ->  Seq Scan on t2  (cost=0.00..431.01 rows=333 width=8)
                     ->  Hash  (cost=431.01..431.01 rows=333 width=8)
                           ->  Seq Scan on t2 t2_1  (cost=0.00..431.01 rows=333 width=8)
         ->  Hash  (cost=431.00..431.00 rows=2 width=12)
               ->  Broadcast Motion 3:3  (slice3; segments: 3)  (cost=0.00..431.00 rows=2 width=12)
                     ->  Seq Scan on t3 t3_1  (cost=0.00..431.00 rows=1 width=12)
752
 Optimizer: Pivotal Optimizer (GPORCA) version 3.23.0
753 754 755 756 757 758 759 760 761 762 763 764 765 766
(28 rows)

select * from (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt 
  join t3 on tt.t1b = t3.b 
  join (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt1 on tt1.t1b = t3.b 
  join t3 t3_1 on tt1.t1b = t3_1.b and (tt1.t2a is NULL OR tt1.t1b = t3.b);
 t1a | t1b | t2a | t2b | a | b | c | t1a | t1b | t2a | t2b | a | b | c 
-----+-----+-----+-----+---+---+---+-----+-----+-----+-----+---+---+---
   2 |   2 |   2 |   2 |   | 2 | 2 |   2 |   2 |   2 |   2 | 1 | 2 | 3
   2 |   2 |   2 |   2 |   | 2 | 2 |   2 |   2 |   2 |   2 |   | 2 | 2
   2 |   2 |   2 |   2 | 1 | 2 | 3 |   2 |   2 |   2 |   2 | 1 | 2 | 3
   2 |   2 |   2 |   2 | 1 | 2 | 3 |   2 |   2 |   2 |   2 |   | 2 | 2
(4 rows)

767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807
-- test different join order enumeration methods
set optimizer_join_order = query;
select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;
 a | b | c | a | b | c | a | b | c 
---+---+---+---+---+---+---+---+---
 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3
 2 | 2 | 2 | 2 | 2 | 2 |   | 2 | 2
(2 rows)

set optimizer_join_order = greedy;
select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;
 a | b | c | a | b | c | a | b | c 
---+---+---+---+---+---+---+---+---
 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3
 2 | 2 | 2 | 2 | 2 | 2 |   | 2 | 2
(2 rows)

set optimizer_join_order = exhaustive;
select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;
 a | b | c | a | b | c | a | b | c 
---+---+---+---+---+---+---+---+---
 2 | 2 | 2 | 2 | 2 | 2 |   | 2 | 2
 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3
(2 rows)

set optimizer_join_order = exhaustive2;
select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;
 a | b | c | a | b | c | a | b | c 
---+---+---+---+---+---+---+---+---
 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3
 2 | 2 | 2 | 2 | 2 | 2 |   | 2 | 2
(2 rows)

reset optimizer_join_order;
select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;
 a | b | c | a | b | c | a | b | c 
---+---+---+---+---+---+---+---+---
 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 3
 2 | 2 | 2 | 2 | 2 | 2 |   | 2 | 2
(2 rows)

808
drop table t1, t2, t3;
809 810 811 812 813 814 815 816 817
--
-- Test a bug that nestloop path previously can not generate motion above
-- index path, which sometimes is wrong (this test case is an example).
-- We now depend on parameterized path related variables to judge instead.
-- We conservatively disallow motion when there is parameter requirement
-- for either outer or inner at this moment though there could be room
-- for further improvement (e.g. referring subplan code to do broadcast
-- for base rel if needed, which needs much effort and does not seem to
-- be deserved given we will probably refactor related code for the lateral
818
-- support in the near future). For the query and guc settings below, Postgres
819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836
-- planner can not generate a plan.
set enable_nestloop = 1;
set enable_material = 0;
set enable_seqscan = 0;
set enable_bitmapscan = 0;
explain select tenk1.unique2 >= 0 from tenk1 left join tenk2 on true limit 1;
                                                       QUERY PLAN                                                        
-------------------------------------------------------------------------------------------------------------------------
 Limit  (cost=0.00..1885517.36 rows=1 width=1)
   ->  Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..1885517.36 rows=1 width=1)
         ->  Limit  (cost=0.00..1885517.36 rows=1 width=1)
               ->  Result  (cost=0.00..1885517.36 rows=33336667 width=1)
                     ->  Nested Loop Left Join  (cost=0.00..1885484.02 rows=33336667 width=4)
                           Join Filter: true
                           ->  Seq Scan on tenk1  (cost=0.00..431.51 rows=3334 width=4)
                           ->  Materialize  (cost=0.00..431.70 rows=10000 width=1)
                                 ->  Broadcast Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.69 rows=10000 width=1)
                                       ->  Seq Scan on tenk2  (cost=0.00..431.50 rows=3334 width=1)
837
 Optimizer: Pivotal Optimizer (GPORCA) version 3.23.0
838 839 840 841 842 843 844 845 846 847 848 849
(11 rows)

select tenk1.unique2 >= 0 from tenk1 left join tenk2 on true limit 1;
 ?column? 
----------
 t
(1 row)

reset enable_nestloop;
reset enable_material;
reset enable_seqscan;
reset enable_bitmapscan;
850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950
-- test that flow->hashExpr variables can be resolved
CREATE TABLE hexpr_t1 (c1 int, c2 character varying(16)) DISTRIBUTED BY (c1);
CREATE TABLE hexpr_t2 (c3 character varying(16)) DISTRIBUTED BY (c3);
INSERT INTO hexpr_t1 SELECT i, i::character varying FROM generate_series(1,10)i;
INSERT INTO hexpr_t2 SELECT i::character varying FROM generate_series(1,10)i;
EXPLAIN SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
                                              QUERY PLAN                                              
------------------------------------------------------------------------------------------------------
 Result  (cost=0.00..862.00 rows=5 width=8)
   ->  Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..862.00 rows=14 width=2)
         ->  Hash Left Join  (cost=0.00..862.00 rows=5 width=2)
               Hash Cond: (btrim((hexpr_t1.c2)::text) = (hexpr_t2.c3)::text)
               ->  Redistribute Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.00 rows=4 width=2)
                     Hash Key: btrim((hexpr_t1.c2)::text)
                     ->  Seq Scan on hexpr_t1  (cost=0.00..431.00 rows=4 width=2)
               ->  Hash  (cost=431.00..431.00 rows=4 width=2)
                     ->  Seq Scan on hexpr_t2  (cost=0.00..431.00 rows=4 width=2)
 Optimizer: Pivotal Optimizer (GPORCA) version 3.80.0
(10 rows)

EXPLAIN SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
                                              QUERY PLAN                                              
------------------------------------------------------------------------------------------------------
 Result  (cost=0.00..862.00 rows=5 width=8)
   ->  Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..862.00 rows=14 width=2)
         ->  Hash Left Join  (cost=0.00..862.00 rows=5 width=2)
               Hash Cond: (btrim((hexpr_t1.c2)::text) = (hexpr_t2.c3)::text)
               ->  Redistribute Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.00 rows=4 width=2)
                     Hash Key: btrim((hexpr_t1.c2)::text)
                     ->  Seq Scan on hexpr_t1  (cost=0.00..431.00 rows=4 width=2)
               ->  Hash  (cost=431.00..431.00 rows=4 width=2)
                     ->  Seq Scan on hexpr_t2  (cost=0.00..431.00 rows=4 width=2)
 Optimizer: Pivotal Optimizer (GPORCA) version 3.80.0
(10 rows)

EXPLAIN SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
                                              QUERY PLAN                                              
------------------------------------------------------------------------------------------------------
 Result  (cost=0.00..862.00 rows=5 width=8)
   ->  Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..862.00 rows=14 width=2)
         ->  Hash Left Join  (cost=0.00..862.00 rows=5 width=2)
               Hash Cond: (btrim((hexpr_t1.c2)::text) = (hexpr_t2.c3)::text)
               ->  Redistribute Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.00 rows=4 width=2)
                     Hash Key: btrim((hexpr_t1.c2)::text)
                     ->  Seq Scan on hexpr_t1  (cost=0.00..431.00 rows=4 width=2)
               ->  Hash  (cost=431.00..431.00 rows=4 width=2)
                     ->  Seq Scan on hexpr_t2  (cost=0.00..431.00 rows=4 width=2)
 Optimizer: Pivotal Optimizer (GPORCA) version 3.80.0
(10 rows)

SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
 foo 
-----
 2
 3
 4
 7
 5
 6
 8
 9
 10
 1
(10 rows)

SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
 foo 
-----
 8
 5
 6
 1
 9
 10
 2
 3
 4
 7
(10 rows)

SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
 foo 
-----
 9
 10
 1
 2
 3
 4
 7
 5
 6
 8
(10 rows)