From d7d09b27a3a265386ca702a10de5623ca67d64e0 Mon Sep 17 00:00:00 2001 From: Bhuvnesh Chaudhary Date: Fri, 12 Jul 2019 17:29:54 -0700 Subject: [PATCH] Lock leaf parts for DML statement if GDD is off For partitioned tables, in case of DELETE/UPDATE operation on the root table, we must acquire ExclusiveLock on the root and leaf partitions, so that any other operation requesting lock higher than AccessShareLock and ShareLock must wait on QD. If we don't acquire lock for the leaf partitions, other concurrent DML operation may be dispatched to the segments and a deadlock can occur. Refer to the test case added for GDD. resultRelations may not have all the entries including the root and leaf partition tables, so in this fix we explicitly acquire locks on all the tables. While GDD is on, it will be able to detect the deadlock and cancel one of the query, so we don't acquire locks on the leaf parts if the DML operation is on the root. Co-authored-by: Alexandra Wang Signed-off-by: Alexandra Wang Co-authored-by: Alexandra Wang --- src/backend/executor/execMain.c | 55 +++++++++--- .../expected/dml_on_root_locks_all_parts.out | 83 ++++++++++++++++++ ...elete-deadlock-root-leaf-concurrent-op.out | 44 ++++++++++ ...dml_locks_only_targeted_table_in_query.out | 78 +++++++++++++++++ .../gdd/planner_insert_while_vacuum_drop.out | 86 +++++++++++++++++++ ...pdate-deadlock-root-leaf-concurrent-op.out | 44 ++++++++++ .../input/uao/insert_while_vacuum_drop.source | 54 ------------ src/test/isolation2/isolation2_schedule | 5 ++ .../uao/insert_while_vacuum_drop.source | 59 ------------- .../sql/dml_on_root_locks_all_parts.sql | 50 +++++++++++ ...elete-deadlock-root-leaf-concurrent-op.sql | 28 ++++++ ...dml_locks_only_targeted_table_in_query.sql | 37 ++++++++ .../gdd/planner_insert_while_vacuum_drop.sql | 64 ++++++++++++++ ...pdate-deadlock-root-leaf-concurrent-op.sql | 28 ++++++ .../expected/partition_locking_optimizer.out | 17 +++- 15 files changed, 603 insertions(+), 129 deletions(-) create mode 100644 src/test/isolation2/expected/dml_on_root_locks_all_parts.out create mode 100644 src/test/isolation2/expected/gdd/delete-deadlock-root-leaf-concurrent-op.out create mode 100644 src/test/isolation2/expected/gdd/dml_locks_only_targeted_table_in_query.out create mode 100644 src/test/isolation2/expected/gdd/planner_insert_while_vacuum_drop.out create mode 100644 src/test/isolation2/expected/gdd/update-deadlock-root-leaf-concurrent-op.out delete mode 100644 src/test/isolation2/input/uao/insert_while_vacuum_drop.source delete mode 100644 src/test/isolation2/output/uao/insert_while_vacuum_drop.source create mode 100644 src/test/isolation2/sql/dml_on_root_locks_all_parts.sql create mode 100644 src/test/isolation2/sql/gdd/delete-deadlock-root-leaf-concurrent-op.sql create mode 100644 src/test/isolation2/sql/gdd/dml_locks_only_targeted_table_in_query.sql create mode 100644 src/test/isolation2/sql/gdd/planner_insert_while_vacuum_drop.sql create mode 100644 src/test/isolation2/sql/gdd/update-deadlock-root-leaf-concurrent-op.sql diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 845b7da740..1535dcc0f8 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -1712,6 +1712,18 @@ InitPlan(QueryDesc *queryDesc, int eflags) resultRelationOid = getrelid(resultRelationIndex, rangeTable); if (operation == CMD_UPDATE || operation == CMD_DELETE) { + /* + * On QD, the lock on the table has already been taken during parsing, so if it's a child + * partition, we don't need to take a lock. If there a a deadlock GDD will come in place + * and resolve the deadlock. ORCA Update / Delete plans only contains the root relation, so + * no locks on leaf partition are taken here. The below changes makes planner as well to not + * take locks on leaf partitions with GDD on. + * Note: With GDD off, ORCA and planner both will acquire locks on the leaf partitions. + */ + if (Gp_role == GP_ROLE_DISPATCH && rel_is_child_partition(resultRelationOid) && gp_enable_global_deadlock_detector) + { + lockmode = NoLock; + } resultRelation = CdbOpenRelation(resultRelationOid, lockmode, false, /* noWait */ @@ -1772,16 +1784,18 @@ InitPlan(QueryDesc *queryDesc, int eflags) * - Otherwise, all_relids is a map of result_partitions to * get each element's relation oid. */ + if (containRoot) { /* - * For partition table, INSERT plan only contains the root table - * in the result relations, whereas DELETE and UPDATE contain - * both root table and the partition tables. + * For partition tables, if GDD is off, any DML statement on root + * partition, must acquire locks on the leaf partitions to avoid + * deadlocks. * * Without locking the partition relations on QD when INSERT - * the following dead lock scenario may happen between INSERT and - * AppendOnly VACUUM drop phase on the partition table: + * with Planner the following dead lock scenario may happen + * between INSERT and AppendOnly VACUUM drop phase on the + * partition table: * * 1. AO VACUUM drop on QD: acquired AccessExclusiveLock * 2. INSERT on QE: acquired RowExclusiveLock @@ -1792,14 +1806,31 @@ InitPlan(QueryDesc *queryDesc, int eflags) * until 3 and 4 proceed. Hence INSERT needs to Lock the partition * tables on QD here (before 2) to prevent this dead lock. * - * FIXME: Ideally we may want to - * 1) Lock the partition table during the parse stage just as when - * we lock the root oid - * 2) Only lock the particular partition table that we are - * inserting into, right now we don't have that info here. + * Deadlock can also occur in case of DELETE as below + * Session1: BEGIN; delete from foo_1_prt_1 WHERE c = 999999; => Holds + * Exclusive lock on foo_1_prt_1 on QD and marks the tuple c updated by the + * current transaction; + * Session2: BEGIN; delete from foo WHERE c = 1; => Holds Exclusive lock + * on foo on QD and marks the tuple c = 1 updated by current transaction + * Session1: DELETE FROM foo_1_prt_1 WHERE c = 1; => This wait, as Session + * 2 has already taken the lock, Session1 will wait to acquire the + * transaction lock. + * Session2: DELETE FROM foo WHERE c = 999999; => This waits, as Session 1 + * has already taken the lock, Session 2 will wait to acquire the + * transaction lock. + * This will cause a deadlock. + * Similar scenario apply for UPDATE as well. */ - lockmode = (operation == CMD_INSERT && rel_is_partitioned(relid)) ? - RowExclusiveLock : NoLock; + lockmode = NoLock; + if ((operation == CMD_DELETE || operation == CMD_INSERT || operation == CMD_UPDATE) && + !gp_enable_global_deadlock_detector && + rel_is_partitioned(relid)) + { + if (operation == CMD_INSERT) + lockmode = RowExclusiveLock; + else + lockmode = ExclusiveLock; + } all_relids = find_all_inheritors(relid, lockmode, NULL); } else diff --git a/src/test/isolation2/expected/dml_on_root_locks_all_parts.out b/src/test/isolation2/expected/dml_on_root_locks_all_parts.out new file mode 100644 index 0000000000..e93a6091c8 --- /dev/null +++ b/src/test/isolation2/expected/dml_on_root_locks_all_parts.out @@ -0,0 +1,83 @@ +-- For partitioned tables if a DML is run on the root table, we should +-- take ExclusiveLock on the root and the child partitions. If lock is +-- not taken on the leaf partition on the QD, there may be issues where +-- a concurrent DML on the leaf partition may execute first and can cause +-- issues. For example +-- 1. It can cause a deadlock between the 2 DML operations. +-- Consider the below example. +-- INSERT INTO part_tbl SELECT i, 1, i FROM generate_series(1,1000000)i; +-- Session 1: BEGIN; DELETE FROM part_tbl; ==> Let's say it holds Exclusive lock on root table only. (not on leaf) +-- Session 2: BEGIN; DELETE FROM part_tbl where a = 999999 or a = 1; ==> Delete will be dispatched to the segment as there is no lock on QD. +-- If Session 1, first deletes the tuple a = 1 on segment 0, Session 2 will wait for transaction lock as it attempting to delete the same tuple. +-- If Session 2, first deletes the tuple a = 999999 on segment 1, Session 1 will wait for transaction lock as it is attempting to delete the same tuple. +-- This will cause a deadlock. +-- Note: Same applies to UPDATE as well. +DROP TABLE IF EXISTS part_tbl; +DROP +CREATE TABLE part_tbl (a int, b int, c int) PARTITION BY RANGE (b) (start(1) end(2) every(1)); +CREATE + +INSERT INTO part_tbl SELECT i, 1, i FROM generate_series(1,12)i; +INSERT 12 + +1: BEGIN; +BEGIN +-- DELETE will acquire Exclusive lock on root and leaf partition on QD. +1: DELETE FROM part_tbl; +DELETE 12 + +-- Delete must hold an exclusive lock on the leaf partition on QD. +SELECT GRANTED FROM pg_locks WHERE relation = 'part_tbl_1_prt_1'::regclass::oid AND mode='ExclusiveLock' AND gp_segment_id=-1 AND locktype='relation'; + granted +--------- + t +(1 row) + +-- DELETE on the leaf partition must wait on the QD as Session 1 already holds ExclusiveLock. +2&: DELETE FROM part_tbl_1_prt_1 WHERE b = 10; +SELECT relation::regclass, mode, granted FROM pg_locks WHERE gp_segment_id=-1 AND granted='f'; + relation | mode | granted +------------------+---------------+--------- + part_tbl_1_prt_1 | ExclusiveLock | f +(1 row) + +1: COMMIT; +COMMIT +2<: <... completed> +DELETE 0 + +1: BEGIN; +BEGIN +-- UPDATE will acquire Exclusive lock on root and leaf partition on QD. +1: UPDATE part_tbl SET c = 1; SELECT GRANTED FROM pg_locks WHERE relation = 'part_tbl_1_prt_1'::regclass::oid AND mode='ExclusiveLock' AND gp_segment_id=-1 AND locktype='relation'; + granted +--------- + t +(1 row) + +-- UPDATE on leaf must be blocked on QD as previous UPDATE acquires Exclusive lock on the root and the leaf partitions +2&: UPDATE part_tbl_1_prt_1 set c = 10; +SELECT relation::regclass, mode, granted FROM pg_locks WHERE gp_segment_id=-1 AND granted='f'; + relation | mode | granted +------------------+---------------+--------- + part_tbl_1_prt_1 | ExclusiveLock | f +(1 row) + +1: COMMIT; +COMMIT +2<: <... completed> +UPDATE 0 + +1: BEGIN; +BEGIN +1: INSERT INTO part_tbl SELECT 1,1,1; +INSERT 1 +SELECT GRANTED FROM pg_locks WHERE relation = 'part_tbl_1_prt_1'::regclass::oid AND mode='RowExclusiveLock' AND gp_segment_id=-1 AND locktype='relation'; + granted +--------- + t +(1 row) +1: COMMIT; +COMMIT +DROP TABLE part_tbl; +DROP diff --git a/src/test/isolation2/expected/gdd/delete-deadlock-root-leaf-concurrent-op.out b/src/test/isolation2/expected/gdd/delete-deadlock-root-leaf-concurrent-op.out new file mode 100644 index 0000000000..55c5407d82 --- /dev/null +++ b/src/test/isolation2/expected/gdd/delete-deadlock-root-leaf-concurrent-op.out @@ -0,0 +1,44 @@ +DROP TABLE IF EXISTS part_tbl; +DROP +CREATE TABLE part_tbl (a int, b int, c int) PARTITION BY RANGE(b) (START(1) END(2) EVERY(1)); +CREATE +INSERT INTO part_tbl SELECT i, 1, i FROM generate_series(1,10)i; +INSERT 10 + +-- check gdd is enabled +show gp_enable_global_deadlock_detector; + gp_enable_global_deadlock_detector +------------------------------------ + on +(1 row) +1:BEGIN; +BEGIN +1:DELETE FROM part_tbl_1_prt_1 WHERE c = 9; +DELETE 1 + +2:BEGIN; +BEGIN +2:DELETE FROM part_tbl where c = 1; +DELETE 1 + +-- the below delete will wait to acquire the transaction lock to delete the tuple +-- held by Session 2 +1&:DELETE FROM part_tbl_1_prt_1 WHERE c = 1; + +-- the below delete will wait to acquire the transaction lock to delete the tuple +-- held by Session 1 +2&:DELETE FROM part_tbl where c = 9; + +1<: <... completed> +DELETE 1 +2<: <... completed> +ERROR: canceling statement due to user request: "cancelled by global deadlock detector" + +-- since gdd is on, Session 2 will be cancelled. + +1:ROLLBACK; +ROLLBACK +2:ROLLBACK; +ROLLBACK +DROP TABLE IF EXISTS part_tbl; +DROP diff --git a/src/test/isolation2/expected/gdd/dml_locks_only_targeted_table_in_query.out b/src/test/isolation2/expected/gdd/dml_locks_only_targeted_table_in_query.out new file mode 100644 index 0000000000..3c3e5b51ba --- /dev/null +++ b/src/test/isolation2/expected/gdd/dml_locks_only_targeted_table_in_query.out @@ -0,0 +1,78 @@ +DROP TABLE IF EXISTS part_tbl_upd_del; +DROP + +-- check gdd is on +show gp_enable_global_deadlock_detector; + gp_enable_global_deadlock_detector +------------------------------------ + on +(1 row) + +CREATE TABLE part_tbl_upd_del (a int, b int, c int) PARTITION BY RANGE(b) (START(1) END(2) EVERY(1)); +CREATE +INSERT INTO part_tbl_upd_del SELECT i, 1, i FROM generate_series(1,10)i; +INSERT 10 + +1: BEGIN; +BEGIN +1: DELETE FROM part_tbl_upd_del; +DELETE 10 +-- since the delete operation is on root part and gdd is on, there will be no lock on leaf partition on QD +1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del_1_prt_1'::regclass::oid AND gp_segment_id = -1; + ?column? +---------- +(0 rows) +-- lock on qd will be there for root partition +1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del'::regclass::oid AND gp_segment_id = -1; + ?column? +---------- + 1 +(1 row) +1: ROLLBACK; +ROLLBACK + + +1: BEGIN; +BEGIN +1: UPDATE part_tbl_upd_del SET c = 1 WHERE c = 1; +UPDATE 1 +-- since the update operation is on root part and gdd is on, there will be no lock on leaf partition on QD +1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del_1_prt_1'::regclass::oid AND gp_segment_id = -1; + ?column? +---------- +(0 rows) +-- lock on qd will be there for root partition +1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del'::regclass::oid AND gp_segment_id = -1; + ?column? +---------- + 1 +(1 row) +1: ROLLBACK; +ROLLBACK + +1: BEGIN; +BEGIN +1: DELETE FROM part_tbl_upd_del_1_prt_1; +DELETE 10 +-- since the delete operation is on leaf part, there will be a lock on QD +1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del_1_prt_1'::regclass::oid AND gp_segment_id = -1 AND mode='RowExclusiveLock'; + ?column? +---------- + 1 +(1 row) +1: ROLLBACK; +ROLLBACK + + +1: BEGIN; +BEGIN +1: UPDATE part_tbl_upd_del_1_prt_1 SET c = 1 WHERE c = 1; +UPDATE 1 +-- since the update operation is on leaf part, there will be a lock on QD +1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del_1_prt_1'::regclass::oid AND gp_segment_id = -1 AND mode='RowExclusiveLock'; + ?column? +---------- + 1 +(1 row) +1: ROLLBACK; +ROLLBACK diff --git a/src/test/isolation2/expected/gdd/planner_insert_while_vacuum_drop.out b/src/test/isolation2/expected/gdd/planner_insert_while_vacuum_drop.out new file mode 100644 index 0000000000..524874de6e --- /dev/null +++ b/src/test/isolation2/expected/gdd/planner_insert_while_vacuum_drop.out @@ -0,0 +1,86 @@ +-- Ensures that deadlock between an INSERT with planner and +-- appendonly VACUUM drop phase can be detected. +-- ORCA during translation of INSERT statement tries to get AccessShareLock on +-- leaf partition but Vacuum already has AccessExclusiveLock on the leaf +-- partition so it keeps waiting and the deadlock is not observed. +CREATE EXTENSION IF NOT EXISTS gp_inject_fault; +CREATE +select gp_inject_fault2('all', 'reset', dbid, hostname, port) from gp_segment_configuration; + gp_inject_fault2 +------------------ + Success: + Success: + Success: + Success: + Success: + Success: + Success: + Success: +(8 rows) + +-- Helper function +CREATE or REPLACE FUNCTION wait_until_acquired_lock_on_rel (rel_name text, lmode text, segment_id integer) RETURNS /*in func*/ bool AS $$ /*in func*/ declare /*in func*/ result bool; /*in func*/ begin /*in func*/ result := false; /*in func*/ -- Wait until lock is acquired /*in func*/ while result IS NOT true loop /*in func*/ select l.granted INTO result /*in func*/ from pg_locks l /*in func*/ where l.relation::regclass = rel_name::regclass /*in func*/ and l.mode=lmode /*in func*/ and l.gp_segment_id=segment_id; /*in func*/ perform pg_sleep(0.1); /*in func*/ end loop; /*in func*/ return result; /*in func*/ end; /*in func*/ $$ language plpgsql; +CREATE + +-- Given an append only table with partitions that is ready to be compacted +CREATE TABLE ao_part_tbl (a int, b int) with (appendonly=true, orientation=row) DISTRIBUTED BY (a) PARTITION BY RANGE (b) (START (1) END (2) EVERY (1)); +CREATE + +INSERT INTO ao_part_tbl VALUES (1, 1); +INSERT 1 +DELETE FROM ao_part_tbl; +DELETE 1 + +-- Check gdd is enabled +show gp_enable_global_deadlock_detector; + gp_enable_global_deadlock_detector +------------------------------------ + on +(1 row) + +-- VACUUM drop phase is blocked before it opens the child relation on the primary +SELECT gp_inject_fault2('vacuum_relation_open_relation_during_drop_phase', 'suspend', dbid, hostname, port) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; + gp_inject_fault2 +------------------ + Success: +(1 row) +-- VACUUM takes AccessExclusiveLock on the leaf partition on QD +1&: VACUUM ao_part_tbl; +SELECT gp_wait_until_triggered_fault2('vacuum_relation_open_relation_during_drop_phase', 1, dbid, hostname, port) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; + gp_wait_until_triggered_fault2 +-------------------------------- + Success: +(1 row) + +-- And INSERT is blocked until it acquires the RowExclusiveLock on the child relation +2: set optimizer to off; +SET +-- INSERT takes RowExclusiveLock on the leaf partition on QE +2&: INSERT INTO ao_part_tbl VALUES (1, 1); +SELECT wait_until_acquired_lock_on_rel('ao_part_tbl_1_prt_1', 'RowExclusiveLock', content) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; + wait_until_acquired_lock_on_rel +--------------------------------- + t +(1 row) + +-- Reset the fault on VACUUM. +SELECT gp_inject_fault2('vacuum_relation_open_relation_during_drop_phase', 'reset', dbid, hostname, port) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; + gp_inject_fault2 +------------------ + Success: +(1 row) +-- VACUUM waits for AccessExclusiveLock on the leaf table on QE +1<: <... completed> +VACUUM +-- INSERT watis for AccessShareLock on the leaf table on QD, hence deadlock +2<: <... completed> +ERROR: canceling statement due to user request: "cancelled by global deadlock detector" + +-- since gdd is on, Session 2 will be cancelled. + +1:ROLLBACK; +ROLLBACK +2:ROLLBACK; +ROLLBACK +DROP TABLE IF EXISTS ao_part_tbl; +DROP diff --git a/src/test/isolation2/expected/gdd/update-deadlock-root-leaf-concurrent-op.out b/src/test/isolation2/expected/gdd/update-deadlock-root-leaf-concurrent-op.out new file mode 100644 index 0000000000..b28c139540 --- /dev/null +++ b/src/test/isolation2/expected/gdd/update-deadlock-root-leaf-concurrent-op.out @@ -0,0 +1,44 @@ +DROP TABLE IF EXISTS part_tbl; +DROP +CREATE TABLE part_tbl (a int, b int, c int) PARTITION BY RANGE(b) (START(1) END(2) EVERY(1)); +CREATE +INSERT INTO part_tbl SELECT i, 1, i FROM generate_series(1,10)i; +INSERT 10 + +-- check gdd is enabled +show gp_enable_global_deadlock_detector; + gp_enable_global_deadlock_detector +------------------------------------ + on +(1 row) +1:BEGIN; +BEGIN +1:UPDATE part_tbl_1_prt_1 SET c = 9 WHERE c = 9; +UPDATE 1 + +2:BEGIN; +BEGIN +2:UPDATE part_tbl SET c = 1 WHERE c = 1; +UPDATE 1 + +-- the below update will wait to acquire the transaction lock to update the tuple +-- held by Session 2 +1&:UPDATE part_tbl_1_prt_1 SET c = 1 WHERE c = 1; + +-- the below update will wait to acquire the transaction lock to update the tuple +-- held by Session 1 +2&:UPDATE part_tbl SET c = 9 WHERE c = 9; + +1<: <... completed> +UPDATE 1 +2<: <... completed> +ERROR: canceling statement due to user request: "cancelled by global deadlock detector" + +-- since gdd is on, Session 2 will be cancelled. + +1:ROLLBACK; +ROLLBACK +2:ROLLBACK; +ROLLBACK +DROP TABLE IF EXISTS part_tbl; +DROP diff --git a/src/test/isolation2/input/uao/insert_while_vacuum_drop.source b/src/test/isolation2/input/uao/insert_while_vacuum_drop.source deleted file mode 100644 index 441b206bee..0000000000 --- a/src/test/isolation2/input/uao/insert_while_vacuum_drop.source +++ /dev/null @@ -1,54 +0,0 @@ --- @Description Ensures that an INSERT while VACUUM drop phase does not leave --- the segfile state inconsistent on master and the primary. --- -select gp_inject_fault('all', 'reset', 1); - --- Helper function -CREATE or REPLACE FUNCTION wait_until_acquired_lock_on_rel (rel_name text, lmode text, segment_id integer) RETURNS /*in func*/ - bool AS $$ /*in func*/ -declare /*in func*/ - result bool; /*in func*/ -begin /*in func*/ -result := false; /*in func*/ --- Wait until lock is acquired /*in func*/ -while result = false loop /*in func*/ -select l.granted INTO result /*in func*/ -from pg_locks l, pg_class c /*in func*/ -where l.relation = c.oid /*in func*/ - and c.relname=rel_name /*in func*/ - and l.mode=lmode /*in func*/ - and l.gp_segment_id=segment_id; /*in func*/ -if result = false then /*in func*/ - perform pg_sleep(0.1); /*in func*/ -end if; /*in func*/ -end loop; /*in func*/ -return result; /*in func*/ -end; /*in func*/ -$$ language plpgsql; - --- Given an append only table with partitions that is ready to be compacted -CREATE TABLE insert_while_vacuum_drop_@orientation@ (a int, b int) with (appendonly=true, orientation=@orientation@) - DISTRIBUTED BY (a) - PARTITION BY RANGE (b) -(START (1) END (2) EVERY (1)); - -INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1); -DELETE FROM insert_while_vacuum_drop_@orientation@; - --- And VACUUM drop phase is blocked before it opens the child relation on the primary -SELECT gp_inject_fault('vacuum_relation_open_relation_during_drop_phase', 'suspend', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; -1&: VACUUM insert_while_vacuum_drop_@orientation@; -SELECT gp_wait_until_triggered_fault('vacuum_relation_open_relation_during_drop_phase', 1, dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; - --- And INSERT is blocked until it acquires the RowExclusiveLock on the child relation -2&: INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1); -SELECT wait_until_acquired_lock_on_rel('insert_while_vacuum_drop_@orientation@_1_prt_1', 'RowExclusiveLock', content) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; - --- Reset the fault on VACUUM and the two sessions should not be blocking each other -SELECT gp_inject_fault('vacuum_relation_open_relation_during_drop_phase', 'reset', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; -1<: -2<: - --- The following INSERT transaction should still work and should not fail on --- "ERROR cannot insert into segno (1) for AO relid that is in state AOSEG_STATE_AWAITING_DROP" -INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1); \ No newline at end of file diff --git a/src/test/isolation2/isolation2_schedule b/src/test/isolation2/isolation2_schedule index f83ee5d1ac..436538dfaa 100644 --- a/src/test/isolation2/isolation2_schedule +++ b/src/test/isolation2/isolation2_schedule @@ -1,6 +1,7 @@ test: lockmodes test: ao_partition_lock +test: dml_on_root_locks_all_parts test: select_dropped_table # test update hash col under utility mode @@ -28,6 +29,10 @@ ignore: gdd/non-lock-107 test: gdd/avoid-qd-deadlock test: gdd/prepare-for-local test: gdd/local-deadlock-03 +test: gdd/delete-deadlock-root-leaf-concurrent-op +test: gdd/planner_insert_while_vacuum_drop +test: gdd/update-deadlock-root-leaf-concurrent-op +test: gdd/dml_locks_only_targeted_table_in_query # gdd end test: gdd/end diff --git a/src/test/isolation2/output/uao/insert_while_vacuum_drop.source b/src/test/isolation2/output/uao/insert_while_vacuum_drop.source deleted file mode 100644 index a219427260..0000000000 --- a/src/test/isolation2/output/uao/insert_while_vacuum_drop.source +++ /dev/null @@ -1,59 +0,0 @@ --- @Description Ensures that an INSERT while VACUUM drop phase does not leave --- the segfile state inconsistent on master and the primary. --- -select gp_inject_fault('all', 'reset', 1); - gp_inject_fault ------------------ - t -(1 row) - --- Helper function -CREATE or REPLACE FUNCTION wait_until_acquired_lock_on_rel (rel_name text, lmode text, segment_id integer) RETURNS /*in func*/ bool AS $$ /*in func*/ declare /*in func*/ result bool; /*in func*/ begin /*in func*/ result := false; /*in func*/ -- Wait until lock is acquired /*in func*/ -while result = false loop /*in func*/ select l.granted INTO result /*in func*/ from pg_locks l, pg_class c /*in func*/ where l.relation = c.oid /*in func*/ and c.relname=rel_name /*in func*/ and l.mode=lmode /*in func*/ and l.gp_segment_id=segment_id; /*in func*/ if result = false then /*in func*/ perform pg_sleep(0.1); /*in func*/ end if; /*in func*/ end loop; /*in func*/ return result; /*in func*/ end; /*in func*/ $$ language plpgsql; -CREATE - --- Given an append only table with partitions that is ready to be compacted -CREATE TABLE insert_while_vacuum_drop_@orientation@ (a int, b int) with (appendonly=true, orientation=@orientation@) DISTRIBUTED BY (a) PARTITION BY RANGE (b) (START (1) END (2) EVERY (1)); -CREATE - -INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1); -INSERT 1 -DELETE FROM insert_while_vacuum_drop_@orientation@; -DELETE 1 - --- And VACUUM drop phase is blocked before it opens the child relation on the primary -SELECT gp_inject_fault('vacuum_relation_open_relation_during_drop_phase', 'suspend', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; - gp_inject_fault ------------------ - t -(1 row) -1&: VACUUM insert_while_vacuum_drop_@orientation@; -SELECT gp_wait_until_triggered_fault('vacuum_relation_open_relation_during_drop_phase', 1, dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; - gp_wait_until_triggered_fault -------------------------------- - t -(1 row) - --- And INSERT is blocked until it acquires the RowExclusiveLock on the child relation -2&: INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1); -SELECT wait_until_acquired_lock_on_rel('insert_while_vacuum_drop_@orientation@_1_prt_1', 'RowExclusiveLock', content) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; - wait_until_acquired_lock_on_rel ---------------------------------- - -(1 row) - --- Reset the fault on VACUUM and the two sessions should not be blocking each other -SELECT gp_inject_fault('vacuum_relation_open_relation_during_drop_phase', 'reset', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; - gp_inject_fault ------------------ - t -(1 row) -1<: <... completed> -VACUUM -2<: <... completed> -INSERT 1 - --- The following INSERT transaction should still work and should not fail on --- "ERROR cannot insert into segno (1) for AO relid that is in state AOSEG_STATE_AWAITING_DROP" -INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1); -INSERT 1 diff --git a/src/test/isolation2/sql/dml_on_root_locks_all_parts.sql b/src/test/isolation2/sql/dml_on_root_locks_all_parts.sql new file mode 100644 index 0000000000..c6b000bf4d --- /dev/null +++ b/src/test/isolation2/sql/dml_on_root_locks_all_parts.sql @@ -0,0 +1,50 @@ +-- For partitioned tables if a DML is run on the root table, we should +-- take ExclusiveLock on the root and the child partitions. If lock is +-- not taken on the leaf partition on the QD, there may be issues where +-- a concurrent DML on the leaf partition may execute first and can cause +-- issues. For example +-- 1. It can cause a deadlock between the 2 DML operations. +-- Consider the below example. +-- INSERT INTO part_tbl SELECT i, 1, i FROM generate_series(1,1000000)i; +-- Session 1: BEGIN; DELETE FROM part_tbl; ==> Let's say it holds Exclusive lock on root table only. (not on leaf) +-- Session 2: BEGIN; DELETE FROM part_tbl where a = 999999 or a = 1; ==> Delete will be dispatched to the segment as there is no lock on QD. +-- If Session 1, first deletes the tuple a = 1 on segment 0, Session 2 will wait for transaction lock as it attempting to delete the same tuple. +-- If Session 2, first deletes the tuple a = 999999 on segment 1, Session 1 will wait for transaction lock as it is attempting to delete the same tuple. +-- This will cause a deadlock. +-- Note: Same applies to UPDATE as well. +DROP TABLE IF EXISTS part_tbl; +CREATE TABLE part_tbl (a int, b int, c int) PARTITION BY RANGE (b) (start(1) end(2) every(1)); + +INSERT INTO part_tbl SELECT i, 1, i FROM generate_series(1,12)i; + +1: BEGIN; +-- DELETE will acquire Exclusive lock on root and leaf partition on QD. +1: DELETE FROM part_tbl; + +-- Delete must hold an exclusive lock on the leaf partition on QD. +SELECT GRANTED FROM pg_locks WHERE relation = 'part_tbl_1_prt_1'::regclass::oid AND mode='ExclusiveLock' AND gp_segment_id=-1 AND locktype='relation'; + +-- DELETE on the leaf partition must wait on the QD as Session 1 already holds ExclusiveLock. +2&: DELETE FROM part_tbl_1_prt_1 WHERE b = 10; +SELECT relation::regclass, mode, granted FROM pg_locks WHERE gp_segment_id=-1 AND granted='f'; + +1: COMMIT; +2<: + +1: BEGIN; +-- UPDATE will acquire Exclusive lock on root and leaf partition on QD. +1: UPDATE part_tbl SET c = 1; +SELECT GRANTED FROM pg_locks WHERE relation = 'part_tbl_1_prt_1'::regclass::oid AND mode='ExclusiveLock' AND gp_segment_id=-1 AND locktype='relation'; + +-- UPDATE on leaf must be blocked on QD as previous UPDATE acquires Exclusive lock on the root and the leaf partitions +2&: UPDATE part_tbl_1_prt_1 set c = 10; +SELECT relation::regclass, mode, granted FROM pg_locks WHERE gp_segment_id=-1 AND granted='f'; + +1: COMMIT; +2<: + +1: BEGIN; +1: INSERT INTO part_tbl SELECT 1,1,1; +SELECT GRANTED FROM pg_locks WHERE relation = 'part_tbl_1_prt_1'::regclass::oid AND mode='RowExclusiveLock' AND gp_segment_id=-1 AND locktype='relation'; +1: COMMIT; +DROP TABLE part_tbl; diff --git a/src/test/isolation2/sql/gdd/delete-deadlock-root-leaf-concurrent-op.sql b/src/test/isolation2/sql/gdd/delete-deadlock-root-leaf-concurrent-op.sql new file mode 100644 index 0000000000..dd8e0fe94a --- /dev/null +++ b/src/test/isolation2/sql/gdd/delete-deadlock-root-leaf-concurrent-op.sql @@ -0,0 +1,28 @@ +DROP TABLE IF EXISTS part_tbl; +CREATE TABLE part_tbl (a int, b int, c int) PARTITION BY RANGE(b) (START(1) END(2) EVERY(1)); +INSERT INTO part_tbl SELECT i, 1, i FROM generate_series(1,10)i; + +-- check gdd is enabled +show gp_enable_global_deadlock_detector; +1:BEGIN; +1:DELETE FROM part_tbl_1_prt_1 WHERE c = 9; + +2:BEGIN; +2:DELETE FROM part_tbl where c = 1; + +-- the below delete will wait to acquire the transaction lock to delete the tuple +-- held by Session 2 +1&:DELETE FROM part_tbl_1_prt_1 WHERE c = 1; + +-- the below delete will wait to acquire the transaction lock to delete the tuple +-- held by Session 1 +2&:DELETE FROM part_tbl where c = 9; + +1<: +2<: + +-- since gdd is on, Session 2 will be cancelled. + +1:ROLLBACK; +2:ROLLBACK; +DROP TABLE IF EXISTS part_tbl; diff --git a/src/test/isolation2/sql/gdd/dml_locks_only_targeted_table_in_query.sql b/src/test/isolation2/sql/gdd/dml_locks_only_targeted_table_in_query.sql new file mode 100644 index 0000000000..92d1589bdf --- /dev/null +++ b/src/test/isolation2/sql/gdd/dml_locks_only_targeted_table_in_query.sql @@ -0,0 +1,37 @@ +DROP TABLE IF EXISTS part_tbl_upd_del; + +-- check gdd is on +show gp_enable_global_deadlock_detector; + +CREATE TABLE part_tbl_upd_del (a int, b int, c int) PARTITION BY RANGE(b) (START(1) END(2) EVERY(1)); +INSERT INTO part_tbl_upd_del SELECT i, 1, i FROM generate_series(1,10)i; + +1: BEGIN; +1: DELETE FROM part_tbl_upd_del; +-- since the delete operation is on root part and gdd is on, there will be no lock on leaf partition on QD +1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del_1_prt_1'::regclass::oid AND gp_segment_id = -1; +-- lock on qd will be there for root partition +1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del'::regclass::oid AND gp_segment_id = -1; +1: ROLLBACK; + + +1: BEGIN; +1: UPDATE part_tbl_upd_del SET c = 1 WHERE c = 1; +-- since the update operation is on root part and gdd is on, there will be no lock on leaf partition on QD +1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del_1_prt_1'::regclass::oid AND gp_segment_id = -1; +-- lock on qd will be there for root partition +1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del'::regclass::oid AND gp_segment_id = -1; +1: ROLLBACK; + +1: BEGIN; +1: DELETE FROM part_tbl_upd_del_1_prt_1; +-- since the delete operation is on leaf part, there will be a lock on QD +1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del_1_prt_1'::regclass::oid AND gp_segment_id = -1 AND mode='RowExclusiveLock'; +1: ROLLBACK; + + +1: BEGIN; +1: UPDATE part_tbl_upd_del_1_prt_1 SET c = 1 WHERE c = 1; +-- since the update operation is on leaf part, there will be a lock on QD +1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del_1_prt_1'::regclass::oid AND gp_segment_id = -1 AND mode='RowExclusiveLock'; +1: ROLLBACK; diff --git a/src/test/isolation2/sql/gdd/planner_insert_while_vacuum_drop.sql b/src/test/isolation2/sql/gdd/planner_insert_while_vacuum_drop.sql new file mode 100644 index 0000000000..a52fd4a4e8 --- /dev/null +++ b/src/test/isolation2/sql/gdd/planner_insert_while_vacuum_drop.sql @@ -0,0 +1,64 @@ +-- Ensures that deadlock between an INSERT with planner and +-- appendonly VACUUM drop phase can be detected. +-- ORCA during translation of INSERT statement tries to get AccessShareLock on +-- leaf partition but Vacuum already has AccessExclusiveLock on the leaf +-- partition so it keeps waiting and the deadlock is not observed. +CREATE EXTENSION IF NOT EXISTS gp_inject_fault; +select gp_inject_fault2('all', 'reset', dbid, hostname, port) from gp_segment_configuration; + +-- Helper function +CREATE or REPLACE FUNCTION wait_until_acquired_lock_on_rel (rel_name text, lmode text, segment_id integer) RETURNS /*in func*/ + bool AS $$ /*in func*/ +declare /*in func*/ + result bool; /*in func*/ +begin /*in func*/ + result := false; /*in func*/ + -- Wait until lock is acquired /*in func*/ + while result IS NOT true loop /*in func*/ + select l.granted INTO result /*in func*/ + from pg_locks l /*in func*/ + where l.relation::regclass = rel_name::regclass /*in func*/ + and l.mode=lmode /*in func*/ + and l.gp_segment_id=segment_id; /*in func*/ + perform pg_sleep(0.1); /*in func*/ + end loop; /*in func*/ + return result; /*in func*/ +end; /*in func*/ +$$ language plpgsql; + +-- Given an append only table with partitions that is ready to be compacted +CREATE TABLE ao_part_tbl (a int, b int) with (appendonly=true, orientation=row) + DISTRIBUTED BY (a) + PARTITION BY RANGE (b) +(START (1) END (2) EVERY (1)); + +INSERT INTO ao_part_tbl VALUES (1, 1); +DELETE FROM ao_part_tbl; + +-- Check gdd is enabled +show gp_enable_global_deadlock_detector; + +-- VACUUM drop phase is blocked before it opens the child relation on the primary +SELECT gp_inject_fault2('vacuum_relation_open_relation_during_drop_phase', 'suspend', dbid, hostname, port) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; +-- VACUUM takes AccessExclusiveLock on the leaf partition on QD +1&: VACUUM ao_part_tbl; +SELECT gp_wait_until_triggered_fault2('vacuum_relation_open_relation_during_drop_phase', 1, dbid, hostname, port) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; + +-- And INSERT is blocked until it acquires the RowExclusiveLock on the child relation +2: set optimizer to off; +-- INSERT takes RowExclusiveLock on the leaf partition on QE +2&: INSERT INTO ao_part_tbl VALUES (1, 1); +SELECT wait_until_acquired_lock_on_rel('ao_part_tbl_1_prt_1', 'RowExclusiveLock', content) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; + +-- Reset the fault on VACUUM. +SELECT gp_inject_fault2('vacuum_relation_open_relation_during_drop_phase', 'reset', dbid, hostname, port) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; +-- VACUUM waits for AccessExclusiveLock on the leaf table on QE +1<: +-- INSERT watis for AccessShareLock on the leaf table on QD, hence deadlock +2<: + +-- since gdd is on, Session 2 will be cancelled. + +1:ROLLBACK; +2:ROLLBACK; +DROP TABLE IF EXISTS ao_part_tbl; diff --git a/src/test/isolation2/sql/gdd/update-deadlock-root-leaf-concurrent-op.sql b/src/test/isolation2/sql/gdd/update-deadlock-root-leaf-concurrent-op.sql new file mode 100644 index 0000000000..4567c8879c --- /dev/null +++ b/src/test/isolation2/sql/gdd/update-deadlock-root-leaf-concurrent-op.sql @@ -0,0 +1,28 @@ +DROP TABLE IF EXISTS part_tbl; +CREATE TABLE part_tbl (a int, b int, c int) PARTITION BY RANGE(b) (START(1) END(2) EVERY(1)); +INSERT INTO part_tbl SELECT i, 1, i FROM generate_series(1,10)i; + +-- check gdd is enabled +show gp_enable_global_deadlock_detector; +1:BEGIN; +1:UPDATE part_tbl_1_prt_1 SET c = 9 WHERE c = 9; + +2:BEGIN; +2:UPDATE part_tbl SET c = 1 WHERE c = 1; + +-- the below update will wait to acquire the transaction lock to update the tuple +-- held by Session 2 +1&:UPDATE part_tbl_1_prt_1 SET c = 1 WHERE c = 1; + +-- the below update will wait to acquire the transaction lock to update the tuple +-- held by Session 1 +2&:UPDATE part_tbl SET c = 9 WHERE c = 9; + +1<: +2<: + +-- since gdd is on, Session 2 will be cancelled. + +1:ROLLBACK; +2:ROLLBACK; +DROP TABLE IF EXISTS part_tbl; diff --git a/src/test/regress/expected/partition_locking_optimizer.out b/src/test/regress/expected/partition_locking_optimizer.out index 54fa995a39..25f0339ded 100644 --- a/src/test/regress/expected/partition_locking_optimizer.out +++ b/src/test/regress/expected/partition_locking_optimizer.out @@ -342,10 +342,19 @@ begin; delete from partlockt where i = 4; -- Known_opt_diff: MPP-20936 select * from locktest_master where coalesce not like 'gp_%' and coalesce not like 'pg_%'; - coalesce | mode | locktype | node ------------+---------------+----------+-------- - partlockt | ExclusiveLock | relation | master -(1 row) + coalesce | mode | locktype | node +-------------------+---------------+----------+-------- + partlockt | ExclusiveLock | relation | master + partlockt_1_prt_1 | ExclusiveLock | relation | master + partlockt_1_prt_2 | ExclusiveLock | relation | master + partlockt_1_prt_3 | ExclusiveLock | relation | master + partlockt_1_prt_4 | ExclusiveLock | relation | master + partlockt_1_prt_5 | ExclusiveLock | relation | master + partlockt_1_prt_6 | ExclusiveLock | relation | master + partlockt_1_prt_7 | ExclusiveLock | relation | master + partlockt_1_prt_8 | ExclusiveLock | relation | master + partlockt_1_prt_9 | ExclusiveLock | relation | master +(10 rows) select * from locktest_segments where coalesce not like 'gp_%' and coalesce not like 'pg_%'; coalesce | mode | locktype | node -- GitLab