-- Test DELETE and UPDATE on an inherited table. -- The special aspect of this table is that the inherited table has -- a different distribution key. 'p' table's distribution key matches -- that of 'r', but 'p2's doesn't. Test that the planner adds a Motion -- node correctly for p2. create table todelete (a int) distributed by (a); create table parent (a int, b int, c int) distributed by (a); create table child (a int, b int, c int) inherits (parent) distributed by (b); NOTICE: merging column "a" with inherited definition NOTICE: merging column "b" with inherited definition NOTICE: merging column "c" with inherited definition insert into parent select g, g, g from generate_series(1,5) g; insert into child select g, g, g from generate_series(6,10) g; insert into todelete select generate_series(3,4); delete from parent using todelete where parent.a = todelete.a; insert into todelete select generate_series(5,7); update parent set c=c+100 from todelete where parent.a = todelete.a; select * from parent; a | b | c ----+----+----- 1 | 1 | 1 5 | 5 | 105 9 | 9 | 9 7 | 7 | 107 2 | 2 | 2 8 | 8 | 8 10 | 10 | 10 6 | 6 | 106 (8 rows) drop table todelete; drop table child; drop table parent; -- This is similar to the above, but with a partitioned table (which is -- implemented by inheritance) rather than an explicitly inherited table. -- The scans on some of the partitions degenerate into Result nodes with -- False one-time filter, which don't need a Motion node. create table todelete (a int, b int) distributed by (a); create table target (a int, b int, c int) distributed by (a) partition by range (c) (start(1) end(5) every(1), default partition extra); NOTICE: CREATE TABLE will create partition "target_1_prt_extra" for table "target" NOTICE: CREATE TABLE will create partition "target_1_prt_2" for table "target" NOTICE: CREATE TABLE will create partition "target_1_prt_3" for table "target" NOTICE: CREATE TABLE will create partition "target_1_prt_4" for table "target" NOTICE: CREATE TABLE will create partition "target_1_prt_5" for table "target" insert into todelete select g, g % 4 from generate_series(1, 10) g; insert into target select g, 0, 3 from generate_series(1, 5) g; insert into target select g, 0, 1 from generate_series(1, 5) g; delete from target where c = 3 and a in (select b from todelete); insert into todelete values (1, 5); update target set b=target.b+100 where c = 3 and a in (select b from todelete); select * from target; a | b | c ---+-----+--- 2 | 0 | 1 4 | 0 | 1 4 | 0 | 3 1 | 0 | 1 3 | 0 | 1 5 | 0 | 1 5 | 100 | 3 (7 rows) -- Also test an update with a qual that doesn't match any partition. The -- Append degenerates into a dummy Result with false One-Time Filter. alter table target drop default partition; NOTICE: dropped partition "extra" for relation "target" update target set b = 10 where c = 10; drop table todelete; drop table target; -- -- Test updated on inheritance parent table, where some child tables need a -- Split Update, but not all. -- create table base_tbl (a int4, b int4) distributed by (a); create table child_a (a int4, b int4) inherits (base_tbl) distributed by (a); NOTICE: merging column "a" with inherited definition NOTICE: merging column "b" with inherited definition create table child_b (a int4, b int4) inherits (base_tbl) distributed by (b); NOTICE: merging column "a" with inherited definition NOTICE: merging column "b" with inherited definition insert into base_tbl select g, g from generate_series(1, 5) g; explain (costs off) update base_tbl set a=a+1; QUERY PLAN --------------------------------------------------------------- Update on base_tbl -> Redistribute Motion 3:3 (slice1; segments: 3) Hash Key: ((base_tbl.a + 1)) -> Split -> Seq Scan on base_tbl -> Redistribute Motion 3:3 (slice2; segments: 3) Hash Key: ((child_a.a + 1)) -> Split -> Seq Scan on child_a -> Explicit Redistribute Motion 3:3 (slice3; segments: 3) -> Seq Scan on child_b Optimizer: Postgres query optimizer (12 rows) update base_tbl set a = 5; -- -- Explicit Distribution motion must be added if any of the child nodes -- contains any motion excluding the motions in initplans. -- These test cases and expectation are applicable for GPDB planner not for ORCA. -- SET gp_autostats_mode = NONE; CREATE TABLE keo1 ( user_vie_project_code_pk character varying(24), user_vie_fiscal_year_period_sk character varying(24), user_vie_act_cntr_marg_cum character varying(24)) DISTRIBUTED RANDOMLY; INSERT INTO keo1 VALUES ('1', '1', '1'); CREATE TABLE keo2 ( projects_pk character varying(24)) DISTRIBUTED RANDOMLY; INSERT INTO keo2 VALUES ('1'); CREATE TABLE keo3 ( sky_per character varying(24), bky_per character varying(24)) DISTRIBUTED BY (sky_per); INSERT INTO keo3 VALUES ('1', '1'); CREATE TABLE keo4 ( keo_para_required_period character varying(6), keo_para_budget_date character varying(24)) DISTRIBUTED RANDOMLY; INSERT INTO keo4 VALUES ('1', '1'); -- Explicit Redistribution motion should be added in case of GPDB Planner (test case not applicable for ORCA) EXPLAIN (COSTS OFF) UPDATE keo1 SET user_vie_act_cntr_marg_cum = 234.682 FROM ( SELECT a.user_vie_project_code_pk FROM keo1 a INNER JOIN keo2 b ON b.projects_pk=a.user_vie_project_code_pk WHERE a.user_vie_fiscal_year_period_sk = (SELECT MAX (sky_per) FROM keo3 WHERE bky_per = (SELECT keo4.keo_para_required_period FROM keo4 WHERE keo_para_budget_date = (SELECT min (keo4.keo_para_budget_date) FROM keo4))) ) t1 WHERE t1.user_vie_project_code_pk = keo1.user_vie_project_code_pk; QUERY PLAN ------------------------------------------------------------------------------------------------------------------------- Update on keo1 InitPlan 3 (returns $2) (slice9) -> Aggregate InitPlan 2 (returns $1) (slice8) -> Gather Motion 3:1 (slice3; segments: 3) -> Seq Scan on keo4 keo4_1 Filter: ((keo_para_budget_date)::text = $0) InitPlan 1 (returns $0) (slice7) -> Aggregate -> Gather Motion 3:1 (slice2; segments: 3) -> Aggregate -> Seq Scan on keo4 -> Gather Motion 3:1 (slice1; segments: 3) -> Aggregate -> Seq Scan on keo3 Filter: ((bky_per)::text = ($1)::text) -> Explicit Redistribute Motion 3:3 (slice6; segments: 3) -> Hash Join Hash Cond: ((b.projects_pk)::text = (a.user_vie_project_code_pk)::text) -> Seq Scan on keo2 b -> Hash -> Broadcast Motion 3:3 (slice5; segments: 3) -> Hash Join Hash Cond: ((keo1.user_vie_project_code_pk)::text = (a.user_vie_project_code_pk)::text) -> Seq Scan on keo1 -> Hash -> Broadcast Motion 3:3 (slice4; segments: 3) -> Seq Scan on keo1 a Filter: ((user_vie_fiscal_year_period_sk)::text = $2) Optimizer: Postgres query optimizer (30 rows) UPDATE keo1 SET user_vie_act_cntr_marg_cum = 234.682 FROM ( SELECT a.user_vie_project_code_pk FROM keo1 a INNER JOIN keo2 b ON b.projects_pk=a.user_vie_project_code_pk WHERE a.user_vie_fiscal_year_period_sk = (SELECT MAX (sky_per) FROM keo3 WHERE bky_per = (SELECT keo4.keo_para_required_period FROM keo4 WHERE keo_para_budget_date = (SELECT min (keo4.keo_para_budget_date) FROM keo4))) ) t1 WHERE t1.user_vie_project_code_pk = keo1.user_vie_project_code_pk; SELECT user_vie_act_cntr_marg_cum FROM keo1; user_vie_act_cntr_marg_cum ---------------------------- 234.682 (1 row) -- Explicit Redistribution motion should not be added in case of GPDB Planner (test case not applicable to ORCA) CREATE TABLE keo5 (x int, y int) DISTRIBUTED BY (x); INSERT INTO keo5 VALUES (1,1); EXPLAIN (COSTS OFF) DELETE FROM keo5 WHERE x IN (SELECT x FROM keo5 WHERE EXISTS (SELECT x FROM keo5 WHERE x < 2)); QUERY PLAN ---------------------------------------------------------- Delete on keo5 InitPlan 1 (returns $0) (slice2) -> Limit -> Gather Motion 3:1 (slice1; segments: 3) -> Limit -> Seq Scan on keo5 keo5_2 Filter: (x < 2) -> Result One-Time Filter: $0 -> Hash Join Hash Cond: (keo5.x = keo5_1.x) -> Seq Scan on keo5 -> Hash -> HashAggregate Group Key: keo5_1.x -> Seq Scan on keo5 keo5_1 Optimizer: Postgres query optimizer (17 rows) DELETE FROM keo5 WHERE x IN (SELECT x FROM keo5 WHERE EXISTS (SELECT x FROM keo5 WHERE x < 2)); SELECT x FROM keo5; x --- (0 rows) RESET gp_autostats_mode; DROP TABLE keo1; DROP TABLE keo2; DROP TABLE keo3; DROP TABLE keo4; DROP TABLE keo5; -- -- text types. We should support the following updates. -- CREATE TEMP TABLE ttab1 (a varchar(15), b integer) DISTRIBUTED BY (a); CREATE TEMP TABLE ttab2 (a varchar(15), b integer) DISTRIBUTED BY (a); UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a; DROP TABLE ttab1; DROP TABLE ttab2; CREATE TEMP TABLE ttab1 (a text, b integer) DISTRIBUTED BY (a); CREATE TEMP TABLE ttab2 (a text, b integer) DISTRIBUTED BY (a); UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a; DROP TABLE ttab1; DROP TABLE ttab2; CREATE TEMP TABLE ttab1 (a varchar, b integer) DISTRIBUTED BY (a); CREATE TEMP TABLE ttab2 (a varchar, b integer) DISTRIBUTED BY (a); UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a; DROP TABLE ttab1; DROP TABLE ttab2; CREATE TEMP TABLE ttab1 (a char(15), b integer) DISTRIBUTED BY (a); CREATE TEMP TABLE ttab2 (a char(15), b integer) DISTRIBUTED BY (a); UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a; DROP TABLE IF EXISTS update_distr_key; NOTICE: table "update_distr_key" does not exist, skipping CREATE TEMP TABLE update_distr_key (a int, b int) DISTRIBUTED BY (a); INSERT INTO update_distr_key select i, i* 10 from generate_series(0, 9) i; UPDATE update_distr_key SET a = 5 WHERE b = 10; SELECT * from update_distr_key; a | b ---+---- 0 | 0 2 | 20 4 | 40 6 | 60 8 | 80 5 | 10 3 | 30 5 | 50 7 | 70 9 | 90 (10 rows) DROP TABLE update_distr_key; -- Update distribution key -- start_ignore drop table if exists r; NOTICE: table "r" does not exist, skipping drop table if exists s; NOTICE: table "s" does not exist, skipping drop table if exists update_dist; NOTICE: table "update_dist" does not exist, skipping drop table if exists ao_table; NOTICE: table "ao_table" does not exist, skipping drop table if exists aoco_table; NOTICE: table "aoco_table" does not exist, skipping drop table if exists p_1; NOTICE: table "p_1" does not exist, skipping drop table if exists p_2; NOTICE: table "p_2" does not exist, skipping drop table if exists subpartition_1; NOTICE: table "subpartition_1" does not exist, skipping -- end_ignore -- Update normal table distribution key create table update_dist(a int) distributed by (a); insert into update_dist values(1); update update_dist set a=0 where a=1; select * from update_dist; a --- 0 (1 row) -- Update distribution key with join create table r (a int, b int) distributed by (a); create table s (a int, b int) distributed by (a); insert into r select generate_series(1, 5), generate_series(1, 5) * 2; insert into s select generate_series(1, 5), generate_series(1, 5) * 2; select * from r; a | b ---+---- 3 | 6 4 | 8 5 | 10 1 | 2 2 | 4 (5 rows) select * from s; a | b ---+---- 1 | 2 2 | 4 3 | 6 4 | 8 5 | 10 (5 rows) update r set a = r.a + 1 from s where r.a = s.a; select * from r; a | b ---+---- 4 | 6 5 | 8 6 | 10 3 | 4 2 | 2 (5 rows) update r set a = r.a + 1 where a in (select a from s); select * from r; a | b ---+---- 6 | 10 5 | 6 6 | 8 4 | 4 3 | 2 (5 rows) -- Update redistribution delete from r; delete from s; insert into r select generate_series(1, 5), generate_series(1, 5); insert into s select generate_series(1, 5), generate_series(1, 5) * 2; select * from r; a | b ---+--- 3 | 3 4 | 4 5 | 5 1 | 1 2 | 2 (5 rows) select * from s; a | b ---+---- 3 | 6 4 | 8 5 | 10 1 | 2 2 | 4 (5 rows) update r set a = r.a + 1 from s where r.b = s.b; select * from r; a | b ---+--- 1 | 1 3 | 3 5 | 5 5 | 4 3 | 2 (5 rows) update r set a = r.a + 1 where b in (select b from s); select * from r; a | b ---+--- 1 | 1 3 | 3 5 | 5 6 | 4 4 | 2 (5 rows) -- Update hash aggreate group by delete from r; delete from s; insert into r select generate_series(1, 5), generate_series(1, 5) * 2; insert into s select generate_series(1, 5), generate_series(1, 5); select * from r; a | b ---+---- 1 | 2 2 | 4 3 | 6 4 | 8 5 | 10 (5 rows) select * from s; a | b ---+--- 3 | 3 4 | 4 5 | 5 1 | 1 2 | 2 (5 rows) update s set a = s.a + 1 where exists (select 1 from r where s.a = r.b); select * from s; a | b ---+--- 1 | 1 3 | 3 5 | 5 3 | 2 5 | 4 (5 rows) -- Update ao table distribution key create table ao_table (a int, b int) WITH (appendonly=true) distributed by (a); insert into ao_table select g, g from generate_series(1, 5) g; select * from ao_table; a | b ---+--- 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 (5 rows) update ao_table set a = a + 1 where b = 3; select * from ao_table; a | b ---+--- 1 | 1 2 | 2 4 | 4 5 | 5 4 | 3 (5 rows) -- Update aoco table distribution key create table aoco_table (a int, b int) WITH (appendonly=true, orientation=column) distributed by (a); insert into aoco_table select g,g from generate_series(1, 5) g; select * from aoco_table; a | b ---+--- 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 (5 rows) update aoco_table set a = a + 1 where b = 3; select * from aoco_table; a | b ---+--- 1 | 1 2 | 2 4 | 4 5 | 5 4 | 3 (5 rows) -- Update prepare delete from s; insert into s select generate_series(1, 5), generate_series(1, 5); select * from r; a | b ---+---- 1 | 2 2 | 4 3 | 6 4 | 8 5 | 10 (5 rows) select * from s; a | b ---+--- 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 (5 rows) prepare update_s(int) as update s set a = s.a + $1 where exists (select 1 from r where s.a = r.b); execute update_s(10); select * from s; a | b ----+--- 1 | 1 14 | 4 3 | 3 5 | 5 12 | 2 (5 rows) -- Confirm that a split update is not created for a table excluded by -- constraints in the planner. create table nosplitupdate (a int) distributed by (a); explain update nosplitupdate set a=0 where a=1 and a<1; QUERY PLAN ----------------------------------------------------------- Update on nosplitupdate (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) One-Time Filter: false Planning time: 0.271 ms Optimizer: Postgres query optimizer (5 rows) -- test split-update when split-node's flow is entry create table tsplit_entry (c int); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into tsplit_entry values (1), (2); explain update tsplit_entry set c = s.a from (select count(*) as a from gp_segment_configuration) s; QUERY PLAN ------------------------------------------------------------------------------------------------------------------ Update on tsplit_entry (cost=10000000001.00..10000000003.18 rows=3 width=54) -> Redistribute Motion 1:3 (slice2) (cost=10000000001.00..10000000003.18 rows=7 width=54) Hash Key: ((s.a)::integer) -> Split (cost=10000000001.00..10000000003.18 rows=7 width=54) -> Nested Loop (cost=10000000001.00..10000000003.12 rows=4 width=54) -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..2.06 rows=2 width=14) -> Seq Scan on tsplit_entry (cost=0.00..2.02 rows=1 width=14) -> Materialize (cost=1.00..1.03 rows=1 width=40) -> Subquery Scan on s (cost=1.00..1.02 rows=1 width=40) -> Aggregate (cost=1.00..1.01 rows=1 width=8) -> Seq Scan on gp_segment_configuration (cost=0.00..1.00 rows=1 width=0) Planning time: 1.088 ms Optimizer: Postgres query optimizer (13 rows) update tsplit_entry set c = s.a from (select count(*) as a from gp_segment_configuration) s; -- start_ignore drop table r; drop table s; drop table update_dist; drop table ao_table; drop table aoco_table; drop table nosplitupdate; drop table tsplit_entry; -- end_ignore