diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 6bd6680df8b47f8e5b6fd33a657b0cdc636c280b..08bc07442e211d336d45ffee49861e762dbf35cb 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -675,7 +675,7 @@ vacuumStatement_Relation(VacuumStmt *vacstmt, Oid relid, { Assert(Gp_role == GP_ROLE_EXECUTE); lmode = AccessExclusiveLock; - + SIMPLE_FAULT_INJECTOR(VacuumRelationOpenRelationDuringDropPhase); } else if (!(vacstmt->options & VACOPT_VACUUM)) lmode = ShareUpdateExclusiveLock; diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index d3b9f1b0900e4f0122612f12c43b15d2eee67634..dad72e528e0a829de6bee61f9bf03cc816113eeb 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -1759,7 +1759,35 @@ InitPlan(QueryDesc *queryDesc, int eflags) * get each element's relation oid. */ if (containRoot) - all_relids = find_all_inheritors(relid, NoLock, NULL); + { + /* + * For partition table, INSERT plan only contains the root table + * in the result relations, whereas DELETE and UPDATE contain + * both root table and the partition tables. + * + * Without locking the partition relations on QD when INSERT + * the following dead lock scenario may happen between INSERT and + * AppendOnly VACUUM drop phase on the partition table: + * + * 1. AO VACUUM drop on QD: acquired AccessExclusiveLock + * 2. INSERT on QE: acquired RowExclusiveLock + * 3. AO VACUUM drop on QE: waiting for AccessExclusiveLock + * 4. INSERT on QD: waiting for AccessShareLock at ExecutorEnd() + * + * 2 blocks 3, 1 blocks 4, 1 and 2 will not release their locks + * until 3 and 4 proceed. Hence INSERT needs to Lock the partition + * tables on QD here (before 2) to prevent this dead lock. + * + * FIXME: Ideally we may want to + * 1) Lock the partition table during the parse stage just as when + * we lock the root oid + * 2) Only lock the particular partition table that we are + * inserting into, right now we don't have that info here. + */ + lockmode = (operation == CMD_INSERT && rel_is_partitioned(relid)) ? + RowExclusiveLock : NoLock; + all_relids = find_all_inheritors(relid, lockmode, NULL); + } else { ListCell *lc; diff --git a/src/test/isolation2/input/uao/insert_while_vacuum_drop.source b/src/test/isolation2/input/uao/insert_while_vacuum_drop.source new file mode 100644 index 0000000000000000000000000000000000000000..88690ebc4b474906b54b926a52d7030d25694403 --- /dev/null +++ b/src/test/isolation2/input/uao/insert_while_vacuum_drop.source @@ -0,0 +1,55 @@ +-- @Description Ensures that an INSERT while VACUUM drop phase does not leave +-- the segfile state inconsistent on master and the primary. +-- +CREATE EXTENSION IF NOT EXISTS gp_inject_fault; +select gp_inject_fault('all', 'reset', 1); + +-- Helper function +CREATE or REPLACE FUNCTION wait_until_acquired_lock_on_rel (rel_name text, lmode text, segment_id integer) RETURNS /*in func*/ + bool AS $$ /*in func*/ +declare /*in func*/ + result bool; /*in func*/ +begin /*in func*/ +result := false; /*in func*/ +-- Wait until lock is acquired /*in func*/ +while result = false loop /*in func*/ +select l.granted INTO result /*in func*/ +from pg_locks l, pg_class c /*in func*/ +where l.relation = c.oid /*in func*/ + and c.relname=rel_name /*in func*/ + and l.mode=lmode /*in func*/ + and l.gp_segment_id=segment_id; /*in func*/ +if result = false then /*in func*/ + perform pg_sleep(0.1); /*in func*/ +end if; /*in func*/ +end loop; /*in func*/ +return result; /*in func*/ +end; /*in func*/ +$$ language plpgsql; + +-- Given an append only table with partitions that is ready to be compacted +CREATE TABLE insert_while_vacuum_drop_@orientation@ (a int, b int) with (appendonly=true, orientation=@orientation@) + DISTRIBUTED BY (a) + PARTITION BY RANGE (b) +(START (1) END (2) EVERY (1)); + +INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1); +DELETE FROM insert_while_vacuum_drop_@orientation@; + +-- And VACUUM drop phase is blocked before it opens the child relation on the primary +SELECT gp_inject_fault('vacuum_relation_open_relation_during_drop_phase', 'suspend', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; +1&: VACUUM insert_while_vacuum_drop_@orientation@; +SELECT gp_wait_until_triggered_fault('vacuum_relation_open_relation_during_drop_phase', 1, dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; + +-- And INSERT is blocked until it acquires the RowExclusiveLock on the child relation +2&: INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1); +SELECT wait_until_acquired_lock_on_rel('insert_while_vacuum_drop_@orientation@_1_prt_1', 'RowExclusiveLock', content) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; + +-- Reset the fault on VACUUM and the two sessions should not be blocking each other +SELECT gp_inject_fault('vacuum_relation_open_relation_during_drop_phase', 'reset', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; +1<: +2<: + +-- The following INSERT transaction should still work and should not fail on +-- "ERROR cannot insert into segno (1) for AO relid that is in state AOSEG_STATE_AWAITING_DROP" +INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1); \ No newline at end of file diff --git a/src/test/isolation2/output/uao/insert_while_vacuum_drop.source b/src/test/isolation2/output/uao/insert_while_vacuum_drop.source new file mode 100644 index 0000000000000000000000000000000000000000..5a82189f6ef0e835ed6132b2b8bfdfefd4518f70 --- /dev/null +++ b/src/test/isolation2/output/uao/insert_while_vacuum_drop.source @@ -0,0 +1,61 @@ +-- @Description Ensures that an INSERT while VACUUM drop phase does not leave +-- the segfile state inconsistent on master and the primary. +-- +CREATE EXTENSION IF NOT EXISTS gp_inject_fault; +CREATE +select gp_inject_fault('all', 'reset', 1); + gp_inject_fault +----------------- + t +(1 row) + +-- Helper function +CREATE or REPLACE FUNCTION wait_until_acquired_lock_on_rel (rel_name text, lmode text, segment_id integer) RETURNS /*in func*/ bool AS $$ /*in func*/ declare /*in func*/ result bool; /*in func*/ begin /*in func*/ result := false; /*in func*/ -- Wait until lock is acquired /*in func*/ +while result = false loop /*in func*/ select l.granted INTO result /*in func*/ from pg_locks l, pg_class c /*in func*/ where l.relation = c.oid /*in func*/ and c.relname=rel_name /*in func*/ and l.mode=lmode /*in func*/ and l.gp_segment_id=segment_id; /*in func*/ if result = false then /*in func*/ perform pg_sleep(0.1); /*in func*/ end if; /*in func*/ end loop; /*in func*/ return result; /*in func*/ end; /*in func*/ $$ language plpgsql; +CREATE + +-- Given an append only table with partitions that is ready to be compacted +CREATE TABLE insert_while_vacuum_drop_@orientation@ (a int, b int) with (appendonly=true, orientation=@orientation@) DISTRIBUTED BY (a) PARTITION BY RANGE (b) (START (1) END (2) EVERY (1)); +CREATE + +INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1); +INSERT 1 +DELETE FROM insert_while_vacuum_drop_@orientation@; +DELETE 1 + +-- And VACUUM drop phase is blocked before it opens the child relation on the primary +SELECT gp_inject_fault('vacuum_relation_open_relation_during_drop_phase', 'suspend', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; + gp_inject_fault +----------------- + t +(1 row) +1&: VACUUM insert_while_vacuum_drop_@orientation@; +SELECT gp_wait_until_triggered_fault('vacuum_relation_open_relation_during_drop_phase', 1, dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; + gp_wait_until_triggered_fault +------------------------------- + t +(1 row) + +-- And INSERT is blocked until it acquires the RowExclusiveLock on the child relation +2&: INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1); +SELECT wait_until_acquired_lock_on_rel('insert_while_vacuum_drop_@orientation@_1_prt_1', 'RowExclusiveLock', content) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; + wait_until_acquired_lock_on_rel +--------------------------------- + +(1 row) + +-- Reset the fault on VACUUM and the two sessions should not be blocking each other +SELECT gp_inject_fault('vacuum_relation_open_relation_during_drop_phase', 'reset', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; + gp_inject_fault +----------------- + t +(1 row) +1<: <... completed> +VACUUM +2<: <... completed> +INSERT 1 + +-- The following INSERT transaction should still work and should not fail on +-- "ERROR cannot insert into segno (1) for AO relid that is in state AOSEG_STATE_AWAITING_DROP" +INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1); +INSERT 1 diff --git a/src/test/regress/expected/partition_locking.out b/src/test/regress/expected/partition_locking.out index c08b960ae4713135609cd281adeb2a1cfe0428ec..5eeeba8480ed20d6e6a907c3d56aea48236c7d5a 100644 --- a/src/test/regress/expected/partition_locking.out +++ b/src/test/regress/expected/partition_locking.out @@ -174,11 +174,14 @@ select * from locktest_master where coalesce not like 'gp_%' and coalesce not li partlockt | RowExclusiveLock | relation | master partlockt_1_prt_1 | AccessExclusiveLock | append-only segment file | master partlockt_1_prt_1 | AccessShareLock | relation | master + partlockt_1_prt_1 | RowExclusiveLock | relation | master partlockt_1_prt_2 | AccessExclusiveLock | append-only segment file | master partlockt_1_prt_2 | AccessShareLock | relation | master + partlockt_1_prt_2 | RowExclusiveLock | relation | master partlockt_1_prt_3 | AccessExclusiveLock | append-only segment file | master partlockt_1_prt_3 | AccessShareLock | relation | master -(7 rows) + partlockt_1_prt_3 | RowExclusiveLock | relation | master +(10 rows) select * from locktest_segments where coalesce not like 'gp_%' and coalesce not like 'pg_%'; coalesce | mode | locktype | node @@ -314,10 +317,19 @@ begin; -- insert locking insert into partlockt values(3, 'f'); select * from locktest_master where coalesce not like 'gp_%' and coalesce not like 'pg_%'; - coalesce | mode | locktype | node ------------+------------------+----------+-------- - partlockt | RowExclusiveLock | relation | master -(1 row) + coalesce | mode | locktype | node +-------------------+------------------+----------+-------- + partlockt | RowExclusiveLock | relation | master + partlockt_1_prt_1 | RowExclusiveLock | relation | master + partlockt_1_prt_2 | RowExclusiveLock | relation | master + partlockt_1_prt_3 | RowExclusiveLock | relation | master + partlockt_1_prt_4 | RowExclusiveLock | relation | master + partlockt_1_prt_5 | RowExclusiveLock | relation | master + partlockt_1_prt_6 | RowExclusiveLock | relation | master + partlockt_1_prt_7 | RowExclusiveLock | relation | master + partlockt_1_prt_8 | RowExclusiveLock | relation | master + partlockt_1_prt_9 | RowExclusiveLock | relation | master +(10 rows) select * from locktest_segments where coalesce not like 'gp_%' and coalesce not like 'pg_%'; coalesce | mode | locktype | node diff --git a/src/test/regress/expected/partition_locking_optimizer.out b/src/test/regress/expected/partition_locking_optimizer.out index cab015e6e834b0e14facb3c3a6e77183499231f9..54fa995a3971c9f9db063ca0e321b2030517d46d 100644 --- a/src/test/regress/expected/partition_locking_optimizer.out +++ b/src/test/regress/expected/partition_locking_optimizer.out @@ -174,11 +174,14 @@ select * from locktest_master where coalesce not like 'gp_%' and coalesce not li partlockt | RowExclusiveLock | relation | master partlockt_1_prt_1 | AccessExclusiveLock | append-only segment file | master partlockt_1_prt_1 | AccessShareLock | relation | master + partlockt_1_prt_1 | RowExclusiveLock | relation | master partlockt_1_prt_2 | AccessExclusiveLock | append-only segment file | master partlockt_1_prt_2 | AccessShareLock | relation | master + partlockt_1_prt_2 | RowExclusiveLock | relation | master partlockt_1_prt_3 | AccessExclusiveLock | append-only segment file | master partlockt_1_prt_3 | AccessShareLock | relation | master -(7 rows) + partlockt_1_prt_3 | RowExclusiveLock | relation | master +(10 rows) select * from locktest_segments where coalesce not like 'gp_%' and coalesce not like 'pg_%'; coalesce | mode | locktype | node @@ -312,10 +315,19 @@ begin; -- insert locking insert into partlockt values(3, 'f'); select * from locktest_master where coalesce not like 'gp_%' and coalesce not like 'pg_%'; - coalesce | mode | locktype | node ------------+------------------+----------+-------- - partlockt | RowExclusiveLock | relation | master -(1 row) + coalesce | mode | locktype | node +-------------------+------------------+----------+-------- + partlockt | RowExclusiveLock | relation | master + partlockt_1_prt_1 | RowExclusiveLock | relation | master + partlockt_1_prt_2 | RowExclusiveLock | relation | master + partlockt_1_prt_3 | RowExclusiveLock | relation | master + partlockt_1_prt_4 | RowExclusiveLock | relation | master + partlockt_1_prt_5 | RowExclusiveLock | relation | master + partlockt_1_prt_6 | RowExclusiveLock | relation | master + partlockt_1_prt_7 | RowExclusiveLock | relation | master + partlockt_1_prt_8 | RowExclusiveLock | relation | master + partlockt_1_prt_9 | RowExclusiveLock | relation | master +(10 rows) select * from locktest_segments where coalesce not like 'gp_%' and coalesce not like 'pg_%'; coalesce | mode | locktype | node