update_gp_optimizer.out 20.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
-- Test DELETE and UPDATE on an inherited table.
-- The special aspect of this table is that the inherited table has
-- a different distribution key. 'p' table's distribution key matches
-- that of 'r', but 'p2's doesn't. Test that the planner adds a Motion
-- node correctly for p2.
create table todelete (a int) distributed by (a);
create table parent (a int, b int, c int) distributed by (a);
create table child (a int, b int, c int) inherits (parent) distributed by (b);
NOTICE:  merging column "a" with inherited definition
NOTICE:  merging column "b" with inherited definition
NOTICE:  merging column "c" with inherited definition
insert into parent select g, g, g from generate_series(1,5) g;
insert into child select g, g, g from generate_series(6,10) g;
insert into todelete select generate_series(3,4);
delete from parent using todelete where parent.a = todelete.a;
insert into todelete select generate_series(5,7);
update parent set c=c+100 from todelete where parent.a = todelete.a;
select * from parent;
 a  | b  |  c  
----+----+-----
  1 |  1 |   1
  2 |  2 |   2
  8 |  8 |   8
  9 |  9 |   9
 10 | 10 |  10
  5 |  5 | 105
  6 |  6 | 106
  7 |  7 | 107
(8 rows)

drop table todelete;
drop table child;
drop table parent;
-- This is similar to the above, but with a partitioned table (which is
-- implemented by inheritance) rather than an explicitly inherited table.
-- The scans on some of the partitions degenerate into Result nodes with
-- False one-time filter, which don't need a Motion node.
create table todelete (a int, b int) distributed by (a);
create table target (a int, b int, c int)
        distributed by (a)
        partition by range (c) (start(1) end(5) every(1), default partition extra);
NOTICE:  CREATE TABLE will create partition "target_1_prt_extra" for table "target"
NOTICE:  CREATE TABLE will create partition "target_1_prt_2" for table "target"
NOTICE:  CREATE TABLE will create partition "target_1_prt_3" for table "target"
NOTICE:  CREATE TABLE will create partition "target_1_prt_4" for table "target"
NOTICE:  CREATE TABLE will create partition "target_1_prt_5" for table "target"
insert into todelete select g, g % 4 from generate_series(1, 10) g;
insert into target select g, 0, 3 from generate_series(1, 5) g;
insert into target select g, 0, 1 from generate_series(1, 5) g;
delete from target where c = 3 and a in (select b from todelete);
insert into todelete values (1, 5);
update target set b=target.b+100 where c = 3 and a in (select b from todelete);
select * from target;
 a |  b  | c 
---+-----+---
 1 |   0 | 1
 2 |   0 | 1
 4 |   0 | 3
 5 | 100 | 3
 3 |   0 | 1
 4 |   0 | 1
 5 |   0 | 1
(7 rows)

-- Also test an update with a qual that doesn't match any partition. The
-- Append degenerates into a dummy Result with false One-Time Filter.
alter table target drop default partition;
NOTICE:  dropped partition "extra" for relation "target"
update target set b = 10 where c = 10;
drop table todelete;
drop table target;
--
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
-- Test updated on inheritance parent table, where some child tables need a
-- Split Update, but not all.
--
create table base_tbl (a int4, b int4) distributed by (a);
create table child_a (a int4, b int4) inherits (base_tbl) distributed by (a);
NOTICE:  merging column "a" with inherited definition
NOTICE:  merging column "b" with inherited definition
create table child_b (a int4, b int4) inherits (base_tbl) distributed by (b);
NOTICE:  merging column "a" with inherited definition
NOTICE:  merging column "b" with inherited definition
insert into base_tbl select g, g from generate_series(1, 5) g;
explain (costs off) update base_tbl set a=a+1;
                          QUERY PLAN                           
---------------------------------------------------------------
 Update on base_tbl
   ->  Redistribute Motion 3:3  (slice1; segments: 3)
         Hash Key: ((base_tbl.a + 1))
         ->  Split
               ->  Seq Scan on base_tbl
   ->  Redistribute Motion 3:3  (slice2; segments: 3)
         Hash Key: ((child_a.a + 1))
         ->  Split
               ->  Seq Scan on child_a
   ->  Explicit Redistribute Motion 3:3  (slice3; segments: 3)
         ->  Seq Scan on child_b
98
 Optimizer: Postgres query optimizer
99 100 101 102
(12 rows)

update base_tbl set a = 5;
--
103 104 105 106 107 108 109 110 111 112 113 114 115 116
-- Explicit Distribution motion must be added if any of the child nodes
-- contains any motion excluding the motions in initplans.
-- These test cases and expectation are applicable for GPDB planner not for ORCA.
--
SET gp_autostats_mode = NONE;
CREATE TABLE keo1 ( user_vie_project_code_pk character varying(24), user_vie_fiscal_year_period_sk character varying(24), user_vie_act_cntr_marg_cum character varying(24)) DISTRIBUTED RANDOMLY;
INSERT INTO keo1 VALUES ('1', '1', '1');
CREATE TABLE keo2 ( projects_pk character varying(24)) DISTRIBUTED RANDOMLY;
INSERT INTO keo2 VALUES ('1');
CREATE TABLE keo3 ( sky_per character varying(24), bky_per character varying(24)) DISTRIBUTED BY (sky_per);
INSERT INTO keo3 VALUES ('1', '1');
CREATE TABLE keo4 ( keo_para_required_period character varying(6), keo_para_budget_date character varying(24)) DISTRIBUTED RANDOMLY;
INSERT INTO keo4 VALUES ('1', '1');
-- Explicit Redistribution motion should be added in case of GPDB Planner (test case not applicable for ORCA)
R
Richard Guo 已提交
117
EXPLAIN (COSTS OFF) UPDATE keo1 SET user_vie_act_cntr_marg_cum = 234.682 FROM
118 119 120 121 122 123 124 125
    ( SELECT a.user_vie_project_code_pk FROM keo1 a INNER JOIN keo2 b 
        ON b.projects_pk=a.user_vie_project_code_pk
        WHERE a.user_vie_fiscal_year_period_sk =
          (SELECT MAX (sky_per) FROM keo3 WHERE bky_per =
             (SELECT keo4.keo_para_required_period FROM keo4 WHERE keo_para_budget_date =
                (SELECT min (keo4.keo_para_budget_date) FROM keo4)))
    ) t1
WHERE t1.user_vie_project_code_pk = keo1.user_vie_project_code_pk;
126
                                                                                                    QUERY PLAN
127
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
R
Richard Guo 已提交
128 129 130 131 132 133 134
 Update
   ->  Result
         ->  Explicit Redistribute Motion 3:3  (slice8; segments: 3)
               ->  Result
                     ->  Split
                           ->  Result
                                 ->  Hash Join
135
                                       Hash Cond: ((keo1.user_vie_project_code_pk)::text = (keo1_1.user_vie_project_code_pk)::text)
136
                                       ->  Seq Scan on keo1
R
Richard Guo 已提交
137 138 139
                                       ->  Hash
                                             ->  Broadcast Motion 3:3  (slice7; segments: 3)
                                                   ->  Hash Join
140
                                                         Hash Cond: ((keo1_1.user_vie_project_code_pk)::text = (keo2.projects_pk)::text)
141
                                                         ->  Redistribute Motion 1:3  (slice5; segments: 1)
R
Richard Guo 已提交
142
                                                               ->  Hash Join
143
                                                                     Hash Cond: ((keo1_1.user_vie_fiscal_year_period_sk)::text = (max((keo3.sky_per)::text)))
R
Richard Guo 已提交
144
                                                                     ->  Gather Motion 3:1  (slice1; segments: 3)
145
                                                                           ->  Seq Scan on keo1 keo1_1
R
Richard Guo 已提交
146 147 148
                                                                     ->  Hash
                                                                           ->  Aggregate
                                                                                 ->  Hash Join
149
                                                                                       Hash Cond: ((keo3.bky_per)::text = (keo4.keo_para_required_period)::text)
R
Richard Guo 已提交
150
                                                                                       ->  Gather Motion 3:1  (slice2; segments: 3)
151
                                                                                             ->  Seq Scan on keo3
R
Richard Guo 已提交
152 153
                                                                                       ->  Hash
                                                                                             ->  Assert
154
                                                                                                   Assert Cond: ((row_number() OVER (?)) = 1)
R
Richard Guo 已提交
155 156
                                                                                                   ->  WindowAgg
                                                                                                         ->  Hash Join
157
                                                                                                               Hash Cond: ((keo4.keo_para_budget_date)::text = (min((min((keo4_1.keo_para_budget_date)::text)))))
R
Richard Guo 已提交
158
                                                                                                               ->  Gather Motion 3:1  (slice3; segments: 3)
159
                                                                                                                     ->  Seq Scan on keo4
R
Richard Guo 已提交
160 161 162 163
                                                                                                               ->  Hash
                                                                                                                     ->  Aggregate
                                                                                                                           ->  Gather Motion 3:1  (slice4; segments: 3)
                                                                                                                                 ->  Aggregate
164
                                                                                                                                       ->  Seq Scan on keo4 keo4_1
R
Richard Guo 已提交
165 166
                                                         ->  Hash
                                                               ->  Broadcast Motion 3:3  (slice6; segments: 3)
167
                                                                     ->  Seq Scan on keo2
168
 Optimizer: PQO version 3.19.0
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
(41 rows)

UPDATE keo1 SET user_vie_act_cntr_marg_cum = 234.682 FROM
    ( SELECT a.user_vie_project_code_pk FROM keo1 a INNER JOIN keo2 b 
        ON b.projects_pk=a.user_vie_project_code_pk
        WHERE a.user_vie_fiscal_year_period_sk =
          (SELECT MAX (sky_per) FROM keo3 WHERE bky_per =
             (SELECT keo4.keo_para_required_period FROM keo4 WHERE keo_para_budget_date =
                (SELECT min (keo4.keo_para_budget_date) FROM keo4)))
    ) t1
WHERE t1.user_vie_project_code_pk = keo1.user_vie_project_code_pk;
SELECT user_vie_act_cntr_marg_cum FROM keo1;
 user_vie_act_cntr_marg_cum 
----------------------------
 234.682
(1 row)

-- Explicit Redistribution motion should not be added in case of GPDB Planner (test case not applicable to ORCA)
CREATE TABLE keo5 (x int, y int) DISTRIBUTED BY (x);
INSERT INTO keo5 VALUES (1,1);
R
Richard Guo 已提交
189 190 191 192 193 194
EXPLAIN (COSTS OFF) DELETE FROM keo5 WHERE x IN (SELECT x FROM keo5 WHERE EXISTS (SELECT x FROM keo5 WHERE x < 2));
                                        QUERY PLAN                                        
------------------------------------------------------------------------------------------
 Delete
   ->  Result
         ->  Hash Semi Join
195
               Hash Cond: (keo5.x = keo5_2.x)
196
               ->  Seq Scan on keo5
R
Richard Guo 已提交
197 198
               ->  Hash
                     ->  Nested Loop Semi Join
199
                           Join Filter: true
200
                           ->  Seq Scan on keo5 keo5_2
R
Richard Guo 已提交
201
                           ->  Materialize
202
                                 ->  Broadcast Motion 1:3  (slice2; segments: 1)
R
Richard Guo 已提交
203 204
                                       ->  Limit
                                             ->  Gather Motion 3:1  (slice1; segments: 3)
205
                                                   ->  Seq Scan on keo5 keo5_1
206
                                                         Filter: (x < 2)
207
 Optimizer: PQO version 2.74.0
R
Richard Guo 已提交
208
(16 rows)
209 210 211 212 213 214 215 216 217 218 219 220 221

DELETE FROM keo5 WHERE x IN (SELECT x FROM keo5 WHERE EXISTS (SELECT x FROM keo5 WHERE x < 2));
SELECT x FROM keo5;
 x 
---
(0 rows)

RESET gp_autostats_mode;
DROP TABLE keo1;
DROP TABLE keo2;
DROP TABLE keo3;
DROP TABLE keo4;
DROP TABLE keo5;
A
Asim R P 已提交
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
--
-- text types. We should support the following updates.
--
CREATE TEMP TABLE ttab1 (a varchar(15), b integer) DISTRIBUTED BY (a);
CREATE TEMP TABLE ttab2 (a varchar(15), b integer) DISTRIBUTED BY (a);
UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
DROP TABLE ttab1;
DROP TABLE ttab2;
CREATE TEMP TABLE ttab1 (a text, b integer) DISTRIBUTED BY (a);
CREATE TEMP TABLE ttab2 (a text, b integer) DISTRIBUTED BY (a);
UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
DROP TABLE ttab1;
DROP TABLE ttab2;
CREATE TEMP TABLE ttab1 (a varchar, b integer) DISTRIBUTED BY (a);
CREATE TEMP TABLE ttab2 (a varchar, b integer) DISTRIBUTED BY (a);
UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
DROP TABLE ttab1;
DROP TABLE ttab2;
CREATE TEMP TABLE ttab1 (a char(15), b integer) DISTRIBUTED BY (a);
CREATE TEMP TABLE ttab2 (a char(15), b integer) DISTRIBUTED BY (a);
UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
DROP TABLE IF EXISTS update_distr_key;
NOTICE:  table "update_distr_key" does not exist, skipping
CREATE TEMP TABLE update_distr_key (a int, b int) DISTRIBUTED BY (a);
INSERT INTO update_distr_key select i, i* 10 from generate_series(0, 9) i;
UPDATE update_distr_key SET a = 5 WHERE b = 10;
SELECT * from update_distr_key;
 a | b  
---+----
 0 |  0
 2 | 20
 4 | 40
 6 | 60
 8 | 80
 5 | 10
 3 | 30
 5 | 50
 7 | 70
 9 | 90
(10 rows)

DROP TABLE update_distr_key;
264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
-- Update distribution key
-- start_ignore
drop table if exists r;
NOTICE:  table "r" does not exist, skipping
drop table if exists s;
NOTICE:  table "s" does not exist, skipping
drop table if exists update_dist;
NOTICE:  table "update_dist" does not exist, skipping
drop table if exists ao_table;
NOTICE:  table "ao_table" does not exist, skipping
drop table if exists aoco_table;
NOTICE:  table "aoco_table" does not exist, skipping
-- end_ignore
-- Update normal table distribution key
create table update_dist(a int) distributed by (a);
insert into update_dist values(1);
update update_dist set a=0 where a=1;
select * from update_dist;
 a 
---
 0
(1 row)

-- Update distribution key with join
create table r (a int, b int) distributed by (a);
create table s (a int, b int) distributed by (a);
insert into r select generate_series(1, 5), generate_series(1, 5) * 2;
insert into s select generate_series(1, 5), generate_series(1, 5) * 2;
select * from r;
 a | b  
---+----
 1 |  2
 2 |  4
 3 |  6
 4 |  8
 5 | 10
(5 rows)

select * from s;
 a | b  
---+----
 1 |  2
 2 |  4
 3 |  6
 4 |  8
 5 | 10
(5 rows)

update r set a = r.a + 1 from s where r.a = s.a;
select * from r;
 a | b  
---+----
 4 |  6
 5 |  8
 6 | 10
 3 |  4
 2 |  2
(5 rows)

update r set a = r.a + 1 where a in (select a from s);
select * from r;
 a | b  
---+----
 6 | 10
 3 |  2
 5 |  6
 6 |  8
 4 |  4
(5 rows)

-- Update redistribution
delete from r;
delete from s;
insert into r select generate_series(1, 5), generate_series(1, 5);
insert into s select generate_series(1, 5), generate_series(1, 5) * 2;
select * from r;
 a | b 
---+---
 3 | 3
 4 | 4
 5 | 5
 1 | 1
 2 | 2
(5 rows)

select * from s;
 a | b  
---+----
 3 |  6
 4 |  8
 5 | 10
 1 |  2
 2 |  4
(5 rows)

update r set a = r.a + 1 from s where r.b = s.b;
select * from r;
 a | b 
---+---
 3 | 3
 5 | 5
 3 | 2
 5 | 4
 1 | 1
(5 rows)

update r set a = r.a + 1 where b in (select b from s);
select * from r;
 a | b 
---+---
 3 | 3
 5 | 5
 6 | 4
 4 | 2
 1 | 1
(5 rows)

-- Update hash aggreate group by
delete from r;
delete from s;
insert into r select generate_series(1, 5), generate_series(1, 5) * 2;
insert into s select generate_series(1, 5), generate_series(1, 5);
select * from r;
 a | b  
---+----
 1 |  2
 2 |  4
 3 |  6
 4 |  8
 5 | 10
(5 rows)

select * from s;
 a | b 
---+---
 3 | 3
 4 | 4
 5 | 5
 1 | 1
 2 | 2
(5 rows)

update s set a = s.a + 1 where exists (select 1 from r where s.a = r.b);
select * from s;
 a | b 
---+---
 1 | 1
 3 | 3
 5 | 5
 5 | 4
 3 | 2
(5 rows)

-- Update ao table distribution key
create table ao_table (a int, b int) WITH (appendonly=true) distributed by (a);
insert into ao_table select g, g from generate_series(1, 5) g;
select * from ao_table;
 a | b 
---+---
 1 | 1
 2 | 2
 3 | 3
 4 | 4
 5 | 5
(5 rows)

update ao_table set a = a + 1 where b = 3;
select * from ao_table;
 a | b 
---+---
 1 | 1
 2 | 2
 4 | 4
 5 | 5
 4 | 3
(5 rows)

-- Update aoco table distribution key
create table aoco_table (a int, b int) WITH (appendonly=true, orientation=column) distributed by (a);
insert into aoco_table select g,g from generate_series(1, 5) g;
select * from aoco_table;
 a | b 
---+---
 1 | 1
 2 | 2
 3 | 3
 4 | 4
 5 | 5
(5 rows)

update aoco_table set a = a + 1 where b = 3;
select * from aoco_table;
 a | b 
---+---
 1 | 1
 2 | 2
 4 | 4
 5 | 5
 4 | 3
(5 rows)

-- Update prepare
delete from s;
insert into s select generate_series(1, 5), generate_series(1, 5);
select * from r;
 a | b  
---+----
 1 |  2
 2 |  4
 3 |  6
 4 |  8
 5 | 10
(5 rows)

select * from s;
 a | b 
---+---
 1 | 1
 2 | 2
 3 | 3
 4 | 4
 5 | 5
(5 rows)

prepare update_s(int) as update s set a = s.a + $1 where exists (select 1 from r where s.a = r.b);
execute update_s(10);
select * from s;
 a  | b 
----+---
  1 | 1
 14 | 4
  3 | 3
  5 | 5
 12 | 2
(5 rows)

500 501 502 503 504 505 506 507 508 509 510 511 512 513
-- Confirm that a split update is not created for a table excluded by
-- constraints in the planner.
create table nosplitupdate (a int) distributed by (a);
explain update nosplitupdate set a=0 where a=1 and a<1;
                                  QUERY PLAN                                   
-------------------------------------------------------------------------------
 Update  (cost=0.00..0.00 rows=0 width=1)
   ->  Result  (cost=0.00..0.00 rows=0 width=22)
         ->  Result  (cost=0.00..0.00 rows=0 width=18)
               ->  Result  (cost=0.00..0.00 rows=0 width=18)
                     ->  Split  (cost=0.00..0.00 rows=0 width=18)
                           ->  Result  (cost=0.00..0.00 rows=0 width=18)
                                 ->  Result  (cost=0.00..0.00 rows=0 width=18)
                                       One-Time Filter: false
Z
Zhenghua Lyu 已提交
514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537
 Planning time: 17.926 ms
 Optimizer: PQO version 3.16.0
(10 rows)

-- test split-update when split-node's flow is entry
create table tsplit_entry (c int);
NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Greenplum Database data distribution key for this table.
HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into tsplit_entry values (1), (2);
explain update tsplit_entry set c = s.a from (select count(*) as a from gp_segment_configuration) s;
                                                    QUERY PLAN                                                    
------------------------------------------------------------------------------------------------------------------
 Update on tsplit_entry  (cost=10000000001.00..10000000003.18 rows=3 width=54)
   ->  Redistribute Motion 1:3  (slice2)  (cost=10000000001.00..10000000003.18 rows=7 width=54)
         Hash Key: ((s.a)::integer)
         ->  Split  (cost=10000000001.00..10000000003.18 rows=7 width=54)
               ->  Nested Loop  (cost=10000000001.00..10000000003.12 rows=4 width=54)
                     ->  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..2.06 rows=2 width=14)
                           ->  Seq Scan on tsplit_entry  (cost=0.00..2.02 rows=1 width=14)
                     ->  Materialize  (cost=1.00..1.03 rows=1 width=40)
                           ->  Subquery Scan on s  (cost=1.00..1.02 rows=1 width=40)
                                 ->  Aggregate  (cost=1.00..1.01 rows=1 width=8)
                                       ->  Seq Scan on gp_segment_configuration  (cost=0.00..1.00 rows=1 width=0)
 Planning time: 13.063 ms
538
 Optimizer: Postgres query optimizer
Z
Zhenghua Lyu 已提交
539
(13 rows)
540

Z
Zhenghua Lyu 已提交
541
update tsplit_entry set c = s.a from (select count(*) as a from gp_segment_configuration) s;
542 543 544 545 546 547
-- start_ignore
drop table r;
drop table s;
drop table update_dist;
drop table ao_table;
drop table aoco_table;
Z
Zhenghua Lyu 已提交
548 549
drop table nosplitupdate;
drop table tsplit_entry;
550
-- end_ignore