提交 d7d09b27 编写于 作者: B Bhuvnesh Chaudhary 提交者: Bhuvnesh

Lock leaf parts for DML statement if GDD is off

For partitioned tables, in case of DELETE/UPDATE operation on the root
table, we must acquire ExclusiveLock on the root and leaf partitions, so
that any other operation requesting lock higher than AccessShareLock and
ShareLock must wait on QD.

If we don't acquire lock for the leaf partitions, other concurrent DML
operation may be dispatched to the segments and a deadlock can occur.
Refer to the test case added for GDD.

resultRelations may not have all the entries including the root and leaf
partition tables, so in this fix we explicitly acquire locks on all the
tables.

While GDD is on, it will be able to detect the deadlock and cancel one
of the query, so we don't acquire locks on the leaf parts if the DML
operation is on the root.
Co-authored-by: NAlexandra Wang <lewang@pivotal.io>
Signed-off-by: NAlexandra Wang <lewang@pivotal.io>
Co-authored-by: NAlexandra Wang <lewang@pivotal.io>
上级 46c89f63
......@@ -1712,6 +1712,18 @@ InitPlan(QueryDesc *queryDesc, int eflags)
resultRelationOid = getrelid(resultRelationIndex, rangeTable);
if (operation == CMD_UPDATE || operation == CMD_DELETE)
{
/*
* On QD, the lock on the table has already been taken during parsing, so if it's a child
* partition, we don't need to take a lock. If there a a deadlock GDD will come in place
* and resolve the deadlock. ORCA Update / Delete plans only contains the root relation, so
* no locks on leaf partition are taken here. The below changes makes planner as well to not
* take locks on leaf partitions with GDD on.
* Note: With GDD off, ORCA and planner both will acquire locks on the leaf partitions.
*/
if (Gp_role == GP_ROLE_DISPATCH && rel_is_child_partition(resultRelationOid) && gp_enable_global_deadlock_detector)
{
lockmode = NoLock;
}
resultRelation = CdbOpenRelation(resultRelationOid,
lockmode,
false, /* noWait */
......@@ -1772,16 +1784,18 @@ InitPlan(QueryDesc *queryDesc, int eflags)
* - Otherwise, all_relids is a map of result_partitions to
* get each element's relation oid.
*/
if (containRoot)
{
/*
* For partition table, INSERT plan only contains the root table
* in the result relations, whereas DELETE and UPDATE contain
* both root table and the partition tables.
* For partition tables, if GDD is off, any DML statement on root
* partition, must acquire locks on the leaf partitions to avoid
* deadlocks.
*
* Without locking the partition relations on QD when INSERT
* the following dead lock scenario may happen between INSERT and
* AppendOnly VACUUM drop phase on the partition table:
* with Planner the following dead lock scenario may happen
* between INSERT and AppendOnly VACUUM drop phase on the
* partition table:
*
* 1. AO VACUUM drop on QD: acquired AccessExclusiveLock
* 2. INSERT on QE: acquired RowExclusiveLock
......@@ -1792,14 +1806,31 @@ InitPlan(QueryDesc *queryDesc, int eflags)
* until 3 and 4 proceed. Hence INSERT needs to Lock the partition
* tables on QD here (before 2) to prevent this dead lock.
*
* FIXME: Ideally we may want to
* 1) Lock the partition table during the parse stage just as when
* we lock the root oid
* 2) Only lock the particular partition table that we are
* inserting into, right now we don't have that info here.
* Deadlock can also occur in case of DELETE as below
* Session1: BEGIN; delete from foo_1_prt_1 WHERE c = 999999; => Holds
* Exclusive lock on foo_1_prt_1 on QD and marks the tuple c updated by the
* current transaction;
* Session2: BEGIN; delete from foo WHERE c = 1; => Holds Exclusive lock
* on foo on QD and marks the tuple c = 1 updated by current transaction
* Session1: DELETE FROM foo_1_prt_1 WHERE c = 1; => This wait, as Session
* 2 has already taken the lock, Session1 will wait to acquire the
* transaction lock.
* Session2: DELETE FROM foo WHERE c = 999999; => This waits, as Session 1
* has already taken the lock, Session 2 will wait to acquire the
* transaction lock.
* This will cause a deadlock.
* Similar scenario apply for UPDATE as well.
*/
lockmode = (operation == CMD_INSERT && rel_is_partitioned(relid)) ?
RowExclusiveLock : NoLock;
lockmode = NoLock;
if ((operation == CMD_DELETE || operation == CMD_INSERT || operation == CMD_UPDATE) &&
!gp_enable_global_deadlock_detector &&
rel_is_partitioned(relid))
{
if (operation == CMD_INSERT)
lockmode = RowExclusiveLock;
else
lockmode = ExclusiveLock;
}
all_relids = find_all_inheritors(relid, lockmode, NULL);
}
else
......
-- For partitioned tables if a DML is run on the root table, we should
-- take ExclusiveLock on the root and the child partitions. If lock is
-- not taken on the leaf partition on the QD, there may be issues where
-- a concurrent DML on the leaf partition may execute first and can cause
-- issues. For example
-- 1. It can cause a deadlock between the 2 DML operations.
-- Consider the below example.
-- INSERT INTO part_tbl SELECT i, 1, i FROM generate_series(1,1000000)i;
-- Session 1: BEGIN; DELETE FROM part_tbl; ==> Let's say it holds Exclusive lock on root table only. (not on leaf)
-- Session 2: BEGIN; DELETE FROM part_tbl where a = 999999 or a = 1; ==> Delete will be dispatched to the segment as there is no lock on QD.
-- If Session 1, first deletes the tuple a = 1 on segment 0, Session 2 will wait for transaction lock as it attempting to delete the same tuple.
-- If Session 2, first deletes the tuple a = 999999 on segment 1, Session 1 will wait for transaction lock as it is attempting to delete the same tuple.
-- This will cause a deadlock.
-- Note: Same applies to UPDATE as well.
DROP TABLE IF EXISTS part_tbl;
DROP
CREATE TABLE part_tbl (a int, b int, c int) PARTITION BY RANGE (b) (start(1) end(2) every(1));
CREATE
INSERT INTO part_tbl SELECT i, 1, i FROM generate_series(1,12)i;
INSERT 12
1: BEGIN;
BEGIN
-- DELETE will acquire Exclusive lock on root and leaf partition on QD.
1: DELETE FROM part_tbl;
DELETE 12
-- Delete must hold an exclusive lock on the leaf partition on QD.
SELECT GRANTED FROM pg_locks WHERE relation = 'part_tbl_1_prt_1'::regclass::oid AND mode='ExclusiveLock' AND gp_segment_id=-1 AND locktype='relation';
granted
---------
t
(1 row)
-- DELETE on the leaf partition must wait on the QD as Session 1 already holds ExclusiveLock.
2&: DELETE FROM part_tbl_1_prt_1 WHERE b = 10; <waiting ...>
SELECT relation::regclass, mode, granted FROM pg_locks WHERE gp_segment_id=-1 AND granted='f';
relation | mode | granted
------------------+---------------+---------
part_tbl_1_prt_1 | ExclusiveLock | f
(1 row)
1: COMMIT;
COMMIT
2<: <... completed>
DELETE 0
1: BEGIN;
BEGIN
-- UPDATE will acquire Exclusive lock on root and leaf partition on QD.
1: UPDATE part_tbl SET c = 1; SELECT GRANTED FROM pg_locks WHERE relation = 'part_tbl_1_prt_1'::regclass::oid AND mode='ExclusiveLock' AND gp_segment_id=-1 AND locktype='relation';
granted
---------
t
(1 row)
-- UPDATE on leaf must be blocked on QD as previous UPDATE acquires Exclusive lock on the root and the leaf partitions
2&: UPDATE part_tbl_1_prt_1 set c = 10; <waiting ...>
SELECT relation::regclass, mode, granted FROM pg_locks WHERE gp_segment_id=-1 AND granted='f';
relation | mode | granted
------------------+---------------+---------
part_tbl_1_prt_1 | ExclusiveLock | f
(1 row)
1: COMMIT;
COMMIT
2<: <... completed>
UPDATE 0
1: BEGIN;
BEGIN
1: INSERT INTO part_tbl SELECT 1,1,1;
INSERT 1
SELECT GRANTED FROM pg_locks WHERE relation = 'part_tbl_1_prt_1'::regclass::oid AND mode='RowExclusiveLock' AND gp_segment_id=-1 AND locktype='relation';
granted
---------
t
(1 row)
1: COMMIT;
COMMIT
DROP TABLE part_tbl;
DROP
DROP TABLE IF EXISTS part_tbl;
DROP
CREATE TABLE part_tbl (a int, b int, c int) PARTITION BY RANGE(b) (START(1) END(2) EVERY(1));
CREATE
INSERT INTO part_tbl SELECT i, 1, i FROM generate_series(1,10)i;
INSERT 10
-- check gdd is enabled
show gp_enable_global_deadlock_detector;
gp_enable_global_deadlock_detector
------------------------------------
on
(1 row)
1:BEGIN;
BEGIN
1:DELETE FROM part_tbl_1_prt_1 WHERE c = 9;
DELETE 1
2:BEGIN;
BEGIN
2:DELETE FROM part_tbl where c = 1;
DELETE 1
-- the below delete will wait to acquire the transaction lock to delete the tuple
-- held by Session 2
1&:DELETE FROM part_tbl_1_prt_1 WHERE c = 1; <waiting ...>
-- the below delete will wait to acquire the transaction lock to delete the tuple
-- held by Session 1
2&:DELETE FROM part_tbl where c = 9; <waiting ...>
1<: <... completed>
DELETE 1
2<: <... completed>
ERROR: canceling statement due to user request: "cancelled by global deadlock detector"
-- since gdd is on, Session 2 will be cancelled.
1:ROLLBACK;
ROLLBACK
2:ROLLBACK;
ROLLBACK
DROP TABLE IF EXISTS part_tbl;
DROP
DROP TABLE IF EXISTS part_tbl_upd_del;
DROP
-- check gdd is on
show gp_enable_global_deadlock_detector;
gp_enable_global_deadlock_detector
------------------------------------
on
(1 row)
CREATE TABLE part_tbl_upd_del (a int, b int, c int) PARTITION BY RANGE(b) (START(1) END(2) EVERY(1));
CREATE
INSERT INTO part_tbl_upd_del SELECT i, 1, i FROM generate_series(1,10)i;
INSERT 10
1: BEGIN;
BEGIN
1: DELETE FROM part_tbl_upd_del;
DELETE 10
-- since the delete operation is on root part and gdd is on, there will be no lock on leaf partition on QD
1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del_1_prt_1'::regclass::oid AND gp_segment_id = -1;
?column?
----------
(0 rows)
-- lock on qd will be there for root partition
1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del'::regclass::oid AND gp_segment_id = -1;
?column?
----------
1
(1 row)
1: ROLLBACK;
ROLLBACK
1: BEGIN;
BEGIN
1: UPDATE part_tbl_upd_del SET c = 1 WHERE c = 1;
UPDATE 1
-- since the update operation is on root part and gdd is on, there will be no lock on leaf partition on QD
1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del_1_prt_1'::regclass::oid AND gp_segment_id = -1;
?column?
----------
(0 rows)
-- lock on qd will be there for root partition
1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del'::regclass::oid AND gp_segment_id = -1;
?column?
----------
1
(1 row)
1: ROLLBACK;
ROLLBACK
1: BEGIN;
BEGIN
1: DELETE FROM part_tbl_upd_del_1_prt_1;
DELETE 10
-- since the delete operation is on leaf part, there will be a lock on QD
1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del_1_prt_1'::regclass::oid AND gp_segment_id = -1 AND mode='RowExclusiveLock';
?column?
----------
1
(1 row)
1: ROLLBACK;
ROLLBACK
1: BEGIN;
BEGIN
1: UPDATE part_tbl_upd_del_1_prt_1 SET c = 1 WHERE c = 1;
UPDATE 1
-- since the update operation is on leaf part, there will be a lock on QD
1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del_1_prt_1'::regclass::oid AND gp_segment_id = -1 AND mode='RowExclusiveLock';
?column?
----------
1
(1 row)
1: ROLLBACK;
ROLLBACK
-- Ensures that deadlock between an INSERT with planner and
-- appendonly VACUUM drop phase can be detected.
-- ORCA during translation of INSERT statement tries to get AccessShareLock on
-- leaf partition but Vacuum already has AccessExclusiveLock on the leaf
-- partition so it keeps waiting and the deadlock is not observed.
CREATE EXTENSION IF NOT EXISTS gp_inject_fault;
CREATE
select gp_inject_fault2('all', 'reset', dbid, hostname, port) from gp_segment_configuration;
gp_inject_fault2
------------------
Success:
Success:
Success:
Success:
Success:
Success:
Success:
Success:
(8 rows)
-- Helper function
CREATE or REPLACE FUNCTION wait_until_acquired_lock_on_rel (rel_name text, lmode text, segment_id integer) RETURNS /*in func*/ bool AS $$ /*in func*/ declare /*in func*/ result bool; /*in func*/ begin /*in func*/ result := false; /*in func*/ -- Wait until lock is acquired /*in func*/ while result IS NOT true loop /*in func*/ select l.granted INTO result /*in func*/ from pg_locks l /*in func*/ where l.relation::regclass = rel_name::regclass /*in func*/ and l.mode=lmode /*in func*/ and l.gp_segment_id=segment_id; /*in func*/ perform pg_sleep(0.1); /*in func*/ end loop; /*in func*/ return result; /*in func*/ end; /*in func*/ $$ language plpgsql;
CREATE
-- Given an append only table with partitions that is ready to be compacted
CREATE TABLE ao_part_tbl (a int, b int) with (appendonly=true, orientation=row) DISTRIBUTED BY (a) PARTITION BY RANGE (b) (START (1) END (2) EVERY (1));
CREATE
INSERT INTO ao_part_tbl VALUES (1, 1);
INSERT 1
DELETE FROM ao_part_tbl;
DELETE 1
-- Check gdd is enabled
show gp_enable_global_deadlock_detector;
gp_enable_global_deadlock_detector
------------------------------------
on
(1 row)
-- VACUUM drop phase is blocked before it opens the child relation on the primary
SELECT gp_inject_fault2('vacuum_relation_open_relation_during_drop_phase', 'suspend', dbid, hostname, port) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
gp_inject_fault2
------------------
Success:
(1 row)
-- VACUUM takes AccessExclusiveLock on the leaf partition on QD
1&: VACUUM ao_part_tbl; <waiting ...>
SELECT gp_wait_until_triggered_fault2('vacuum_relation_open_relation_during_drop_phase', 1, dbid, hostname, port) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
gp_wait_until_triggered_fault2
--------------------------------
Success:
(1 row)
-- And INSERT is blocked until it acquires the RowExclusiveLock on the child relation
2: set optimizer to off;
SET
-- INSERT takes RowExclusiveLock on the leaf partition on QE
2&: INSERT INTO ao_part_tbl VALUES (1, 1); <waiting ...>
SELECT wait_until_acquired_lock_on_rel('ao_part_tbl_1_prt_1', 'RowExclusiveLock', content) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
wait_until_acquired_lock_on_rel
---------------------------------
t
(1 row)
-- Reset the fault on VACUUM.
SELECT gp_inject_fault2('vacuum_relation_open_relation_during_drop_phase', 'reset', dbid, hostname, port) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
gp_inject_fault2
------------------
Success:
(1 row)
-- VACUUM waits for AccessExclusiveLock on the leaf table on QE
1<: <... completed>
VACUUM
-- INSERT watis for AccessShareLock on the leaf table on QD, hence deadlock
2<: <... completed>
ERROR: canceling statement due to user request: "cancelled by global deadlock detector"
-- since gdd is on, Session 2 will be cancelled.
1:ROLLBACK;
ROLLBACK
2:ROLLBACK;
ROLLBACK
DROP TABLE IF EXISTS ao_part_tbl;
DROP
DROP TABLE IF EXISTS part_tbl;
DROP
CREATE TABLE part_tbl (a int, b int, c int) PARTITION BY RANGE(b) (START(1) END(2) EVERY(1));
CREATE
INSERT INTO part_tbl SELECT i, 1, i FROM generate_series(1,10)i;
INSERT 10
-- check gdd is enabled
show gp_enable_global_deadlock_detector;
gp_enable_global_deadlock_detector
------------------------------------
on
(1 row)
1:BEGIN;
BEGIN
1:UPDATE part_tbl_1_prt_1 SET c = 9 WHERE c = 9;
UPDATE 1
2:BEGIN;
BEGIN
2:UPDATE part_tbl SET c = 1 WHERE c = 1;
UPDATE 1
-- the below update will wait to acquire the transaction lock to update the tuple
-- held by Session 2
1&:UPDATE part_tbl_1_prt_1 SET c = 1 WHERE c = 1; <waiting ...>
-- the below update will wait to acquire the transaction lock to update the tuple
-- held by Session 1
2&:UPDATE part_tbl SET c = 9 WHERE c = 9; <waiting ...>
1<: <... completed>
UPDATE 1
2<: <... completed>
ERROR: canceling statement due to user request: "cancelled by global deadlock detector"
-- since gdd is on, Session 2 will be cancelled.
1:ROLLBACK;
ROLLBACK
2:ROLLBACK;
ROLLBACK
DROP TABLE IF EXISTS part_tbl;
DROP
-- @Description Ensures that an INSERT while VACUUM drop phase does not leave
-- the segfile state inconsistent on master and the primary.
--
select gp_inject_fault('all', 'reset', 1);
-- Helper function
CREATE or REPLACE FUNCTION wait_until_acquired_lock_on_rel (rel_name text, lmode text, segment_id integer) RETURNS /*in func*/
bool AS $$ /*in func*/
declare /*in func*/
result bool; /*in func*/
begin /*in func*/
result := false; /*in func*/
-- Wait until lock is acquired /*in func*/
while result = false loop /*in func*/
select l.granted INTO result /*in func*/
from pg_locks l, pg_class c /*in func*/
where l.relation = c.oid /*in func*/
and c.relname=rel_name /*in func*/
and l.mode=lmode /*in func*/
and l.gp_segment_id=segment_id; /*in func*/
if result = false then /*in func*/
perform pg_sleep(0.1); /*in func*/
end if; /*in func*/
end loop; /*in func*/
return result; /*in func*/
end; /*in func*/
$$ language plpgsql;
-- Given an append only table with partitions that is ready to be compacted
CREATE TABLE insert_while_vacuum_drop_@orientation@ (a int, b int) with (appendonly=true, orientation=@orientation@)
DISTRIBUTED BY (a)
PARTITION BY RANGE (b)
(START (1) END (2) EVERY (1));
INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1);
DELETE FROM insert_while_vacuum_drop_@orientation@;
-- And VACUUM drop phase is blocked before it opens the child relation on the primary
SELECT gp_inject_fault('vacuum_relation_open_relation_during_drop_phase', 'suspend', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
1&: VACUUM insert_while_vacuum_drop_@orientation@;
SELECT gp_wait_until_triggered_fault('vacuum_relation_open_relation_during_drop_phase', 1, dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
-- And INSERT is blocked until it acquires the RowExclusiveLock on the child relation
2&: INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1);
SELECT wait_until_acquired_lock_on_rel('insert_while_vacuum_drop_@orientation@_1_prt_1', 'RowExclusiveLock', content) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
-- Reset the fault on VACUUM and the two sessions should not be blocking each other
SELECT gp_inject_fault('vacuum_relation_open_relation_during_drop_phase', 'reset', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
1<:
2<:
-- The following INSERT transaction should still work and should not fail on
-- "ERROR cannot insert into segno (1) for AO relid <XX> that is in state AOSEG_STATE_AWAITING_DROP"
INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1);
\ No newline at end of file
test: lockmodes
test: ao_partition_lock
test: dml_on_root_locks_all_parts
test: select_dropped_table
# test update hash col under utility mode
......@@ -28,6 +29,10 @@ ignore: gdd/non-lock-107
test: gdd/avoid-qd-deadlock
test: gdd/prepare-for-local
test: gdd/local-deadlock-03
test: gdd/delete-deadlock-root-leaf-concurrent-op
test: gdd/planner_insert_while_vacuum_drop
test: gdd/update-deadlock-root-leaf-concurrent-op
test: gdd/dml_locks_only_targeted_table_in_query
# gdd end
test: gdd/end
......
-- @Description Ensures that an INSERT while VACUUM drop phase does not leave
-- the segfile state inconsistent on master and the primary.
--
select gp_inject_fault('all', 'reset', 1);
gp_inject_fault
-----------------
t
(1 row)
-- Helper function
CREATE or REPLACE FUNCTION wait_until_acquired_lock_on_rel (rel_name text, lmode text, segment_id integer) RETURNS /*in func*/ bool AS $$ /*in func*/ declare /*in func*/ result bool; /*in func*/ begin /*in func*/ result := false; /*in func*/ -- Wait until lock is acquired /*in func*/
while result = false loop /*in func*/ select l.granted INTO result /*in func*/ from pg_locks l, pg_class c /*in func*/ where l.relation = c.oid /*in func*/ and c.relname=rel_name /*in func*/ and l.mode=lmode /*in func*/ and l.gp_segment_id=segment_id; /*in func*/ if result = false then /*in func*/ perform pg_sleep(0.1); /*in func*/ end if; /*in func*/ end loop; /*in func*/ return result; /*in func*/ end; /*in func*/ $$ language plpgsql;
CREATE
-- Given an append only table with partitions that is ready to be compacted
CREATE TABLE insert_while_vacuum_drop_@orientation@ (a int, b int) with (appendonly=true, orientation=@orientation@) DISTRIBUTED BY (a) PARTITION BY RANGE (b) (START (1) END (2) EVERY (1));
CREATE
INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1);
INSERT 1
DELETE FROM insert_while_vacuum_drop_@orientation@;
DELETE 1
-- And VACUUM drop phase is blocked before it opens the child relation on the primary
SELECT gp_inject_fault('vacuum_relation_open_relation_during_drop_phase', 'suspend', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
gp_inject_fault
-----------------
t
(1 row)
1&: VACUUM insert_while_vacuum_drop_@orientation@; <waiting ...>
SELECT gp_wait_until_triggered_fault('vacuum_relation_open_relation_during_drop_phase', 1, dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
gp_wait_until_triggered_fault
-------------------------------
t
(1 row)
-- And INSERT is blocked until it acquires the RowExclusiveLock on the child relation
2&: INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1); <waiting ...>
SELECT wait_until_acquired_lock_on_rel('insert_while_vacuum_drop_@orientation@_1_prt_1', 'RowExclusiveLock', content) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
wait_until_acquired_lock_on_rel
---------------------------------
(1 row)
-- Reset the fault on VACUUM and the two sessions should not be blocking each other
SELECT gp_inject_fault('vacuum_relation_open_relation_during_drop_phase', 'reset', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
gp_inject_fault
-----------------
t
(1 row)
1<: <... completed>
VACUUM
2<: <... completed>
INSERT 1
-- The following INSERT transaction should still work and should not fail on
-- "ERROR cannot insert into segno (1) for AO relid <XX> that is in state AOSEG_STATE_AWAITING_DROP"
INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1);
INSERT 1
-- For partitioned tables if a DML is run on the root table, we should
-- take ExclusiveLock on the root and the child partitions. If lock is
-- not taken on the leaf partition on the QD, there may be issues where
-- a concurrent DML on the leaf partition may execute first and can cause
-- issues. For example
-- 1. It can cause a deadlock between the 2 DML operations.
-- Consider the below example.
-- INSERT INTO part_tbl SELECT i, 1, i FROM generate_series(1,1000000)i;
-- Session 1: BEGIN; DELETE FROM part_tbl; ==> Let's say it holds Exclusive lock on root table only. (not on leaf)
-- Session 2: BEGIN; DELETE FROM part_tbl where a = 999999 or a = 1; ==> Delete will be dispatched to the segment as there is no lock on QD.
-- If Session 1, first deletes the tuple a = 1 on segment 0, Session 2 will wait for transaction lock as it attempting to delete the same tuple.
-- If Session 2, first deletes the tuple a = 999999 on segment 1, Session 1 will wait for transaction lock as it is attempting to delete the same tuple.
-- This will cause a deadlock.
-- Note: Same applies to UPDATE as well.
DROP TABLE IF EXISTS part_tbl;
CREATE TABLE part_tbl (a int, b int, c int) PARTITION BY RANGE (b) (start(1) end(2) every(1));
INSERT INTO part_tbl SELECT i, 1, i FROM generate_series(1,12)i;
1: BEGIN;
-- DELETE will acquire Exclusive lock on root and leaf partition on QD.
1: DELETE FROM part_tbl;
-- Delete must hold an exclusive lock on the leaf partition on QD.
SELECT GRANTED FROM pg_locks WHERE relation = 'part_tbl_1_prt_1'::regclass::oid AND mode='ExclusiveLock' AND gp_segment_id=-1 AND locktype='relation';
-- DELETE on the leaf partition must wait on the QD as Session 1 already holds ExclusiveLock.
2&: DELETE FROM part_tbl_1_prt_1 WHERE b = 10;
SELECT relation::regclass, mode, granted FROM pg_locks WHERE gp_segment_id=-1 AND granted='f';
1: COMMIT;
2<:
1: BEGIN;
-- UPDATE will acquire Exclusive lock on root and leaf partition on QD.
1: UPDATE part_tbl SET c = 1;
SELECT GRANTED FROM pg_locks WHERE relation = 'part_tbl_1_prt_1'::regclass::oid AND mode='ExclusiveLock' AND gp_segment_id=-1 AND locktype='relation';
-- UPDATE on leaf must be blocked on QD as previous UPDATE acquires Exclusive lock on the root and the leaf partitions
2&: UPDATE part_tbl_1_prt_1 set c = 10;
SELECT relation::regclass, mode, granted FROM pg_locks WHERE gp_segment_id=-1 AND granted='f';
1: COMMIT;
2<:
1: BEGIN;
1: INSERT INTO part_tbl SELECT 1,1,1;
SELECT GRANTED FROM pg_locks WHERE relation = 'part_tbl_1_prt_1'::regclass::oid AND mode='RowExclusiveLock' AND gp_segment_id=-1 AND locktype='relation';
1: COMMIT;
DROP TABLE part_tbl;
DROP TABLE IF EXISTS part_tbl;
CREATE TABLE part_tbl (a int, b int, c int) PARTITION BY RANGE(b) (START(1) END(2) EVERY(1));
INSERT INTO part_tbl SELECT i, 1, i FROM generate_series(1,10)i;
-- check gdd is enabled
show gp_enable_global_deadlock_detector;
1:BEGIN;
1:DELETE FROM part_tbl_1_prt_1 WHERE c = 9;
2:BEGIN;
2:DELETE FROM part_tbl where c = 1;
-- the below delete will wait to acquire the transaction lock to delete the tuple
-- held by Session 2
1&:DELETE FROM part_tbl_1_prt_1 WHERE c = 1;
-- the below delete will wait to acquire the transaction lock to delete the tuple
-- held by Session 1
2&:DELETE FROM part_tbl where c = 9;
1<:
2<:
-- since gdd is on, Session 2 will be cancelled.
1:ROLLBACK;
2:ROLLBACK;
DROP TABLE IF EXISTS part_tbl;
DROP TABLE IF EXISTS part_tbl_upd_del;
-- check gdd is on
show gp_enable_global_deadlock_detector;
CREATE TABLE part_tbl_upd_del (a int, b int, c int) PARTITION BY RANGE(b) (START(1) END(2) EVERY(1));
INSERT INTO part_tbl_upd_del SELECT i, 1, i FROM generate_series(1,10)i;
1: BEGIN;
1: DELETE FROM part_tbl_upd_del;
-- since the delete operation is on root part and gdd is on, there will be no lock on leaf partition on QD
1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del_1_prt_1'::regclass::oid AND gp_segment_id = -1;
-- lock on qd will be there for root partition
1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del'::regclass::oid AND gp_segment_id = -1;
1: ROLLBACK;
1: BEGIN;
1: UPDATE part_tbl_upd_del SET c = 1 WHERE c = 1;
-- since the update operation is on root part and gdd is on, there will be no lock on leaf partition on QD
1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del_1_prt_1'::regclass::oid AND gp_segment_id = -1;
-- lock on qd will be there for root partition
1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del'::regclass::oid AND gp_segment_id = -1;
1: ROLLBACK;
1: BEGIN;
1: DELETE FROM part_tbl_upd_del_1_prt_1;
-- since the delete operation is on leaf part, there will be a lock on QD
1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del_1_prt_1'::regclass::oid AND gp_segment_id = -1 AND mode='RowExclusiveLock';
1: ROLLBACK;
1: BEGIN;
1: UPDATE part_tbl_upd_del_1_prt_1 SET c = 1 WHERE c = 1;
-- since the update operation is on leaf part, there will be a lock on QD
1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del_1_prt_1'::regclass::oid AND gp_segment_id = -1 AND mode='RowExclusiveLock';
1: ROLLBACK;
-- Ensures that deadlock between an INSERT with planner and
-- appendonly VACUUM drop phase can be detected.
-- ORCA during translation of INSERT statement tries to get AccessShareLock on
-- leaf partition but Vacuum already has AccessExclusiveLock on the leaf
-- partition so it keeps waiting and the deadlock is not observed.
CREATE EXTENSION IF NOT EXISTS gp_inject_fault;
select gp_inject_fault2('all', 'reset', dbid, hostname, port) from gp_segment_configuration;
-- Helper function
CREATE or REPLACE FUNCTION wait_until_acquired_lock_on_rel (rel_name text, lmode text, segment_id integer) RETURNS /*in func*/
bool AS $$ /*in func*/
declare /*in func*/
result bool; /*in func*/
begin /*in func*/
result := false; /*in func*/
-- Wait until lock is acquired /*in func*/
while result IS NOT true loop /*in func*/
select l.granted INTO result /*in func*/
from pg_locks l /*in func*/
where l.relation::regclass = rel_name::regclass /*in func*/
and l.mode=lmode /*in func*/
and l.gp_segment_id=segment_id; /*in func*/
perform pg_sleep(0.1); /*in func*/
end loop; /*in func*/
return result; /*in func*/
end; /*in func*/
$$ language plpgsql;
-- Given an append only table with partitions that is ready to be compacted
CREATE TABLE ao_part_tbl (a int, b int) with (appendonly=true, orientation=row)
DISTRIBUTED BY (a)
PARTITION BY RANGE (b)
(START (1) END (2) EVERY (1));
INSERT INTO ao_part_tbl VALUES (1, 1);
DELETE FROM ao_part_tbl;
-- Check gdd is enabled
show gp_enable_global_deadlock_detector;
-- VACUUM drop phase is blocked before it opens the child relation on the primary
SELECT gp_inject_fault2('vacuum_relation_open_relation_during_drop_phase', 'suspend', dbid, hostname, port) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
-- VACUUM takes AccessExclusiveLock on the leaf partition on QD
1&: VACUUM ao_part_tbl;
SELECT gp_wait_until_triggered_fault2('vacuum_relation_open_relation_during_drop_phase', 1, dbid, hostname, port) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
-- And INSERT is blocked until it acquires the RowExclusiveLock on the child relation
2: set optimizer to off;
-- INSERT takes RowExclusiveLock on the leaf partition on QE
2&: INSERT INTO ao_part_tbl VALUES (1, 1);
SELECT wait_until_acquired_lock_on_rel('ao_part_tbl_1_prt_1', 'RowExclusiveLock', content) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
-- Reset the fault on VACUUM.
SELECT gp_inject_fault2('vacuum_relation_open_relation_during_drop_phase', 'reset', dbid, hostname, port) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
-- VACUUM waits for AccessExclusiveLock on the leaf table on QE
1<:
-- INSERT watis for AccessShareLock on the leaf table on QD, hence deadlock
2<:
-- since gdd is on, Session 2 will be cancelled.
1:ROLLBACK;
2:ROLLBACK;
DROP TABLE IF EXISTS ao_part_tbl;
DROP TABLE IF EXISTS part_tbl;
CREATE TABLE part_tbl (a int, b int, c int) PARTITION BY RANGE(b) (START(1) END(2) EVERY(1));
INSERT INTO part_tbl SELECT i, 1, i FROM generate_series(1,10)i;
-- check gdd is enabled
show gp_enable_global_deadlock_detector;
1:BEGIN;
1:UPDATE part_tbl_1_prt_1 SET c = 9 WHERE c = 9;
2:BEGIN;
2:UPDATE part_tbl SET c = 1 WHERE c = 1;
-- the below update will wait to acquire the transaction lock to update the tuple
-- held by Session 2
1&:UPDATE part_tbl_1_prt_1 SET c = 1 WHERE c = 1;
-- the below update will wait to acquire the transaction lock to update the tuple
-- held by Session 1
2&:UPDATE part_tbl SET c = 9 WHERE c = 9;
1<:
2<:
-- since gdd is on, Session 2 will be cancelled.
1:ROLLBACK;
2:ROLLBACK;
DROP TABLE IF EXISTS part_tbl;
......@@ -342,10 +342,19 @@ begin;
delete from partlockt where i = 4;
-- Known_opt_diff: MPP-20936
select * from locktest_master where coalesce not like 'gp_%' and coalesce not like 'pg_%';
coalesce | mode | locktype | node
-----------+---------------+----------+--------
partlockt | ExclusiveLock | relation | master
(1 row)
coalesce | mode | locktype | node
-------------------+---------------+----------+--------
partlockt | ExclusiveLock | relation | master
partlockt_1_prt_1 | ExclusiveLock | relation | master
partlockt_1_prt_2 | ExclusiveLock | relation | master
partlockt_1_prt_3 | ExclusiveLock | relation | master
partlockt_1_prt_4 | ExclusiveLock | relation | master
partlockt_1_prt_5 | ExclusiveLock | relation | master
partlockt_1_prt_6 | ExclusiveLock | relation | master
partlockt_1_prt_7 | ExclusiveLock | relation | master
partlockt_1_prt_8 | ExclusiveLock | relation | master
partlockt_1_prt_9 | ExclusiveLock | relation | master
(10 rows)
select * from locktest_segments where coalesce not like 'gp_%' and coalesce not like 'pg_%';
coalesce | mode | locktype | node
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册