提交 c7fce844 编写于 作者: A Alexandra Wang 提交者: Lei (Alexandra) Wang

Lock child partition tables at INSERT InitPlan() on QD

INSERT needs to Lock the child table on QD, otherwise the following
dead lock scenario may happen if INSERT runs concurrently with VACUUM
drop phase on an AppendOnly partitioned table.

1. VACUUM on QD: acquired AccessExclusiveLock on the child table
2. INSERT on QE: acquired RowExclusiveLock on the child table
3. VACUUM on QE: waiting for AccessExclusiveLock on the child table
4. INSERT on QD: waiting for AccessShareLock at ExecutorEnd() on the
  child table, this is after QE sends back which child it inserted

Note that in step 2, INSERT only locks the child table on QE, it does
not lock the child table on QD in the previous code. This patch adds the
lock on QD as well to prevent the above dead lock.

Added `insert_while_vacuum_drop` and updated `partition_locking` to
reflect the changes
上级 ea22aff4
......@@ -675,7 +675,7 @@ vacuumStatement_Relation(VacuumStmt *vacstmt, Oid relid,
{
Assert(Gp_role == GP_ROLE_EXECUTE);
lmode = AccessExclusiveLock;
SIMPLE_FAULT_INJECTOR(VacuumRelationOpenRelationDuringDropPhase);
}
else if (!(vacstmt->options & VACOPT_VACUUM))
lmode = ShareUpdateExclusiveLock;
......
......@@ -1759,7 +1759,35 @@ InitPlan(QueryDesc *queryDesc, int eflags)
* get each element's relation oid.
*/
if (containRoot)
all_relids = find_all_inheritors(relid, NoLock, NULL);
{
/*
* For partition table, INSERT plan only contains the root table
* in the result relations, whereas DELETE and UPDATE contain
* both root table and the partition tables.
*
* Without locking the partition relations on QD when INSERT
* the following dead lock scenario may happen between INSERT and
* AppendOnly VACUUM drop phase on the partition table:
*
* 1. AO VACUUM drop on QD: acquired AccessExclusiveLock
* 2. INSERT on QE: acquired RowExclusiveLock
* 3. AO VACUUM drop on QE: waiting for AccessExclusiveLock
* 4. INSERT on QD: waiting for AccessShareLock at ExecutorEnd()
*
* 2 blocks 3, 1 blocks 4, 1 and 2 will not release their locks
* until 3 and 4 proceed. Hence INSERT needs to Lock the partition
* tables on QD here (before 2) to prevent this dead lock.
*
* FIXME: Ideally we may want to
* 1) Lock the partition table during the parse stage just as when
* we lock the root oid
* 2) Only lock the particular partition table that we are
* inserting into, right now we don't have that info here.
*/
lockmode = (operation == CMD_INSERT && rel_is_partitioned(relid)) ?
RowExclusiveLock : NoLock;
all_relids = find_all_inheritors(relid, lockmode, NULL);
}
else
{
ListCell *lc;
......
-- @Description Ensures that an INSERT while VACUUM drop phase does not leave
-- the segfile state inconsistent on master and the primary.
--
CREATE EXTENSION IF NOT EXISTS gp_inject_fault;
select gp_inject_fault('all', 'reset', 1);
-- Helper function
CREATE or REPLACE FUNCTION wait_until_acquired_lock_on_rel (rel_name text, lmode text, segment_id integer) RETURNS /*in func*/
bool AS $$ /*in func*/
declare /*in func*/
result bool; /*in func*/
begin /*in func*/
result := false; /*in func*/
-- Wait until lock is acquired /*in func*/
while result = false loop /*in func*/
select l.granted INTO result /*in func*/
from pg_locks l, pg_class c /*in func*/
where l.relation = c.oid /*in func*/
and c.relname=rel_name /*in func*/
and l.mode=lmode /*in func*/
and l.gp_segment_id=segment_id; /*in func*/
if result = false then /*in func*/
perform pg_sleep(0.1); /*in func*/
end if; /*in func*/
end loop; /*in func*/
return result; /*in func*/
end; /*in func*/
$$ language plpgsql;
-- Given an append only table with partitions that is ready to be compacted
CREATE TABLE insert_while_vacuum_drop_@orientation@ (a int, b int) with (appendonly=true, orientation=@orientation@)
DISTRIBUTED BY (a)
PARTITION BY RANGE (b)
(START (1) END (2) EVERY (1));
INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1);
DELETE FROM insert_while_vacuum_drop_@orientation@;
-- And VACUUM drop phase is blocked before it opens the child relation on the primary
SELECT gp_inject_fault('vacuum_relation_open_relation_during_drop_phase', 'suspend', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
1&: VACUUM insert_while_vacuum_drop_@orientation@;
SELECT gp_wait_until_triggered_fault('vacuum_relation_open_relation_during_drop_phase', 1, dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
-- And INSERT is blocked until it acquires the RowExclusiveLock on the child relation
2&: INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1);
SELECT wait_until_acquired_lock_on_rel('insert_while_vacuum_drop_@orientation@_1_prt_1', 'RowExclusiveLock', content) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
-- Reset the fault on VACUUM and the two sessions should not be blocking each other
SELECT gp_inject_fault('vacuum_relation_open_relation_during_drop_phase', 'reset', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
1<:
2<:
-- The following INSERT transaction should still work and should not fail on
-- "ERROR cannot insert into segno (1) for AO relid <XX> that is in state AOSEG_STATE_AWAITING_DROP"
INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1);
\ No newline at end of file
-- @Description Ensures that an INSERT while VACUUM drop phase does not leave
-- the segfile state inconsistent on master and the primary.
--
CREATE EXTENSION IF NOT EXISTS gp_inject_fault;
CREATE
select gp_inject_fault('all', 'reset', 1);
gp_inject_fault
-----------------
t
(1 row)
-- Helper function
CREATE or REPLACE FUNCTION wait_until_acquired_lock_on_rel (rel_name text, lmode text, segment_id integer) RETURNS /*in func*/ bool AS $$ /*in func*/ declare /*in func*/ result bool; /*in func*/ begin /*in func*/ result := false; /*in func*/ -- Wait until lock is acquired /*in func*/
while result = false loop /*in func*/ select l.granted INTO result /*in func*/ from pg_locks l, pg_class c /*in func*/ where l.relation = c.oid /*in func*/ and c.relname=rel_name /*in func*/ and l.mode=lmode /*in func*/ and l.gp_segment_id=segment_id; /*in func*/ if result = false then /*in func*/ perform pg_sleep(0.1); /*in func*/ end if; /*in func*/ end loop; /*in func*/ return result; /*in func*/ end; /*in func*/ $$ language plpgsql;
CREATE
-- Given an append only table with partitions that is ready to be compacted
CREATE TABLE insert_while_vacuum_drop_@orientation@ (a int, b int) with (appendonly=true, orientation=@orientation@) DISTRIBUTED BY (a) PARTITION BY RANGE (b) (START (1) END (2) EVERY (1));
CREATE
INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1);
INSERT 1
DELETE FROM insert_while_vacuum_drop_@orientation@;
DELETE 1
-- And VACUUM drop phase is blocked before it opens the child relation on the primary
SELECT gp_inject_fault('vacuum_relation_open_relation_during_drop_phase', 'suspend', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
gp_inject_fault
-----------------
t
(1 row)
1&: VACUUM insert_while_vacuum_drop_@orientation@; <waiting ...>
SELECT gp_wait_until_triggered_fault('vacuum_relation_open_relation_during_drop_phase', 1, dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
gp_wait_until_triggered_fault
-------------------------------
t
(1 row)
-- And INSERT is blocked until it acquires the RowExclusiveLock on the child relation
2&: INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1); <waiting ...>
SELECT wait_until_acquired_lock_on_rel('insert_while_vacuum_drop_@orientation@_1_prt_1', 'RowExclusiveLock', content) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
wait_until_acquired_lock_on_rel
---------------------------------
(1 row)
-- Reset the fault on VACUUM and the two sessions should not be blocking each other
SELECT gp_inject_fault('vacuum_relation_open_relation_during_drop_phase', 'reset', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
gp_inject_fault
-----------------
t
(1 row)
1<: <... completed>
VACUUM
2<: <... completed>
INSERT 1
-- The following INSERT transaction should still work and should not fail on
-- "ERROR cannot insert into segno (1) for AO relid <XX> that is in state AOSEG_STATE_AWAITING_DROP"
INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1);
INSERT 1
......@@ -174,11 +174,14 @@ select * from locktest_master where coalesce not like 'gp_%' and coalesce not li
partlockt | RowExclusiveLock | relation | master
partlockt_1_prt_1 | AccessExclusiveLock | append-only segment file | master
partlockt_1_prt_1 | AccessShareLock | relation | master
partlockt_1_prt_1 | RowExclusiveLock | relation | master
partlockt_1_prt_2 | AccessExclusiveLock | append-only segment file | master
partlockt_1_prt_2 | AccessShareLock | relation | master
partlockt_1_prt_2 | RowExclusiveLock | relation | master
partlockt_1_prt_3 | AccessExclusiveLock | append-only segment file | master
partlockt_1_prt_3 | AccessShareLock | relation | master
(7 rows)
partlockt_1_prt_3 | RowExclusiveLock | relation | master
(10 rows)
select * from locktest_segments where coalesce not like 'gp_%' and coalesce not like 'pg_%';
coalesce | mode | locktype | node
......@@ -314,10 +317,19 @@ begin;
-- insert locking
insert into partlockt values(3, 'f');
select * from locktest_master where coalesce not like 'gp_%' and coalesce not like 'pg_%';
coalesce | mode | locktype | node
-----------+------------------+----------+--------
partlockt | RowExclusiveLock | relation | master
(1 row)
coalesce | mode | locktype | node
-------------------+------------------+----------+--------
partlockt | RowExclusiveLock | relation | master
partlockt_1_prt_1 | RowExclusiveLock | relation | master
partlockt_1_prt_2 | RowExclusiveLock | relation | master
partlockt_1_prt_3 | RowExclusiveLock | relation | master
partlockt_1_prt_4 | RowExclusiveLock | relation | master
partlockt_1_prt_5 | RowExclusiveLock | relation | master
partlockt_1_prt_6 | RowExclusiveLock | relation | master
partlockt_1_prt_7 | RowExclusiveLock | relation | master
partlockt_1_prt_8 | RowExclusiveLock | relation | master
partlockt_1_prt_9 | RowExclusiveLock | relation | master
(10 rows)
select * from locktest_segments where coalesce not like 'gp_%' and coalesce not like 'pg_%';
coalesce | mode | locktype | node
......
......@@ -174,11 +174,14 @@ select * from locktest_master where coalesce not like 'gp_%' and coalesce not li
partlockt | RowExclusiveLock | relation | master
partlockt_1_prt_1 | AccessExclusiveLock | append-only segment file | master
partlockt_1_prt_1 | AccessShareLock | relation | master
partlockt_1_prt_1 | RowExclusiveLock | relation | master
partlockt_1_prt_2 | AccessExclusiveLock | append-only segment file | master
partlockt_1_prt_2 | AccessShareLock | relation | master
partlockt_1_prt_2 | RowExclusiveLock | relation | master
partlockt_1_prt_3 | AccessExclusiveLock | append-only segment file | master
partlockt_1_prt_3 | AccessShareLock | relation | master
(7 rows)
partlockt_1_prt_3 | RowExclusiveLock | relation | master
(10 rows)
select * from locktest_segments where coalesce not like 'gp_%' and coalesce not like 'pg_%';
coalesce | mode | locktype | node
......@@ -312,10 +315,19 @@ begin;
-- insert locking
insert into partlockt values(3, 'f');
select * from locktest_master where coalesce not like 'gp_%' and coalesce not like 'pg_%';
coalesce | mode | locktype | node
-----------+------------------+----------+--------
partlockt | RowExclusiveLock | relation | master
(1 row)
coalesce | mode | locktype | node
-------------------+------------------+----------+--------
partlockt | RowExclusiveLock | relation | master
partlockt_1_prt_1 | RowExclusiveLock | relation | master
partlockt_1_prt_2 | RowExclusiveLock | relation | master
partlockt_1_prt_3 | RowExclusiveLock | relation | master
partlockt_1_prt_4 | RowExclusiveLock | relation | master
partlockt_1_prt_5 | RowExclusiveLock | relation | master
partlockt_1_prt_6 | RowExclusiveLock | relation | master
partlockt_1_prt_7 | RowExclusiveLock | relation | master
partlockt_1_prt_8 | RowExclusiveLock | relation | master
partlockt_1_prt_9 | RowExclusiveLock | relation | master
(10 rows)
select * from locktest_segments where coalesce not like 'gp_%' and coalesce not like 'pg_%';
coalesce | mode | locktype | node
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册