diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 845b7da740df0d9d864ae1fcbd26125e7e44ef80..1535dcc0f8de50677380d620b575c2d65a393696 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -1712,6 +1712,18 @@ InitPlan(QueryDesc *queryDesc, int eflags) resultRelationOid = getrelid(resultRelationIndex, rangeTable); if (operation == CMD_UPDATE || operation == CMD_DELETE) { + /* + * On QD, the lock on the table has already been taken during parsing, so if it's a child + * partition, we don't need to take a lock. If there a a deadlock GDD will come in place + * and resolve the deadlock. ORCA Update / Delete plans only contains the root relation, so + * no locks on leaf partition are taken here. The below changes makes planner as well to not + * take locks on leaf partitions with GDD on. + * Note: With GDD off, ORCA and planner both will acquire locks on the leaf partitions. + */ + if (Gp_role == GP_ROLE_DISPATCH && rel_is_child_partition(resultRelationOid) && gp_enable_global_deadlock_detector) + { + lockmode = NoLock; + } resultRelation = CdbOpenRelation(resultRelationOid, lockmode, false, /* noWait */ @@ -1772,16 +1784,18 @@ InitPlan(QueryDesc *queryDesc, int eflags) * - Otherwise, all_relids is a map of result_partitions to * get each element's relation oid. */ + if (containRoot) { /* - * For partition table, INSERT plan only contains the root table - * in the result relations, whereas DELETE and UPDATE contain - * both root table and the partition tables. + * For partition tables, if GDD is off, any DML statement on root + * partition, must acquire locks on the leaf partitions to avoid + * deadlocks. * * Without locking the partition relations on QD when INSERT - * the following dead lock scenario may happen between INSERT and - * AppendOnly VACUUM drop phase on the partition table: + * with Planner the following dead lock scenario may happen + * between INSERT and AppendOnly VACUUM drop phase on the + * partition table: * * 1. AO VACUUM drop on QD: acquired AccessExclusiveLock * 2. INSERT on QE: acquired RowExclusiveLock @@ -1792,14 +1806,31 @@ InitPlan(QueryDesc *queryDesc, int eflags) * until 3 and 4 proceed. Hence INSERT needs to Lock the partition * tables on QD here (before 2) to prevent this dead lock. * - * FIXME: Ideally we may want to - * 1) Lock the partition table during the parse stage just as when - * we lock the root oid - * 2) Only lock the particular partition table that we are - * inserting into, right now we don't have that info here. + * Deadlock can also occur in case of DELETE as below + * Session1: BEGIN; delete from foo_1_prt_1 WHERE c = 999999; => Holds + * Exclusive lock on foo_1_prt_1 on QD and marks the tuple c updated by the + * current transaction; + * Session2: BEGIN; delete from foo WHERE c = 1; => Holds Exclusive lock + * on foo on QD and marks the tuple c = 1 updated by current transaction + * Session1: DELETE FROM foo_1_prt_1 WHERE c = 1; => This wait, as Session + * 2 has already taken the lock, Session1 will wait to acquire the + * transaction lock. + * Session2: DELETE FROM foo WHERE c = 999999; => This waits, as Session 1 + * has already taken the lock, Session 2 will wait to acquire the + * transaction lock. + * This will cause a deadlock. + * Similar scenario apply for UPDATE as well. */ - lockmode = (operation == CMD_INSERT && rel_is_partitioned(relid)) ? - RowExclusiveLock : NoLock; + lockmode = NoLock; + if ((operation == CMD_DELETE || operation == CMD_INSERT || operation == CMD_UPDATE) && + !gp_enable_global_deadlock_detector && + rel_is_partitioned(relid)) + { + if (operation == CMD_INSERT) + lockmode = RowExclusiveLock; + else + lockmode = ExclusiveLock; + } all_relids = find_all_inheritors(relid, lockmode, NULL); } else diff --git a/src/test/isolation2/expected/dml_on_root_locks_all_parts.out b/src/test/isolation2/expected/dml_on_root_locks_all_parts.out new file mode 100644 index 0000000000000000000000000000000000000000..e93a6091c85d3f657f98dea310a4750b1b7ba690 --- /dev/null +++ b/src/test/isolation2/expected/dml_on_root_locks_all_parts.out @@ -0,0 +1,83 @@ +-- For partitioned tables if a DML is run on the root table, we should +-- take ExclusiveLock on the root and the child partitions. If lock is +-- not taken on the leaf partition on the QD, there may be issues where +-- a concurrent DML on the leaf partition may execute first and can cause +-- issues. For example +-- 1. It can cause a deadlock between the 2 DML operations. +-- Consider the below example. +-- INSERT INTO part_tbl SELECT i, 1, i FROM generate_series(1,1000000)i; +-- Session 1: BEGIN; DELETE FROM part_tbl; ==> Let's say it holds Exclusive lock on root table only. (not on leaf) +-- Session 2: BEGIN; DELETE FROM part_tbl where a = 999999 or a = 1; ==> Delete will be dispatched to the segment as there is no lock on QD. +-- If Session 1, first deletes the tuple a = 1 on segment 0, Session 2 will wait for transaction lock as it attempting to delete the same tuple. +-- If Session 2, first deletes the tuple a = 999999 on segment 1, Session 1 will wait for transaction lock as it is attempting to delete the same tuple. +-- This will cause a deadlock. +-- Note: Same applies to UPDATE as well. +DROP TABLE IF EXISTS part_tbl; +DROP +CREATE TABLE part_tbl (a int, b int, c int) PARTITION BY RANGE (b) (start(1) end(2) every(1)); +CREATE + +INSERT INTO part_tbl SELECT i, 1, i FROM generate_series(1,12)i; +INSERT 12 + +1: BEGIN; +BEGIN +-- DELETE will acquire Exclusive lock on root and leaf partition on QD. +1: DELETE FROM part_tbl; +DELETE 12 + +-- Delete must hold an exclusive lock on the leaf partition on QD. +SELECT GRANTED FROM pg_locks WHERE relation = 'part_tbl_1_prt_1'::regclass::oid AND mode='ExclusiveLock' AND gp_segment_id=-1 AND locktype='relation'; + granted +--------- + t +(1 row) + +-- DELETE on the leaf partition must wait on the QD as Session 1 already holds ExclusiveLock. +2&: DELETE FROM part_tbl_1_prt_1 WHERE b = 10; +SELECT relation::regclass, mode, granted FROM pg_locks WHERE gp_segment_id=-1 AND granted='f'; + relation | mode | granted +------------------+---------------+--------- + part_tbl_1_prt_1 | ExclusiveLock | f +(1 row) + +1: COMMIT; +COMMIT +2<: <... completed> +DELETE 0 + +1: BEGIN; +BEGIN +-- UPDATE will acquire Exclusive lock on root and leaf partition on QD. +1: UPDATE part_tbl SET c = 1; SELECT GRANTED FROM pg_locks WHERE relation = 'part_tbl_1_prt_1'::regclass::oid AND mode='ExclusiveLock' AND gp_segment_id=-1 AND locktype='relation'; + granted +--------- + t +(1 row) + +-- UPDATE on leaf must be blocked on QD as previous UPDATE acquires Exclusive lock on the root and the leaf partitions +2&: UPDATE part_tbl_1_prt_1 set c = 10; +SELECT relation::regclass, mode, granted FROM pg_locks WHERE gp_segment_id=-1 AND granted='f'; + relation | mode | granted +------------------+---------------+--------- + part_tbl_1_prt_1 | ExclusiveLock | f +(1 row) + +1: COMMIT; +COMMIT +2<: <... completed> +UPDATE 0 + +1: BEGIN; +BEGIN +1: INSERT INTO part_tbl SELECT 1,1,1; +INSERT 1 +SELECT GRANTED FROM pg_locks WHERE relation = 'part_tbl_1_prt_1'::regclass::oid AND mode='RowExclusiveLock' AND gp_segment_id=-1 AND locktype='relation'; + granted +--------- + t +(1 row) +1: COMMIT; +COMMIT +DROP TABLE part_tbl; +DROP diff --git a/src/test/isolation2/expected/gdd/delete-deadlock-root-leaf-concurrent-op.out b/src/test/isolation2/expected/gdd/delete-deadlock-root-leaf-concurrent-op.out new file mode 100644 index 0000000000000000000000000000000000000000..55c5407d827184f1272ddc6c2a06050537107164 --- /dev/null +++ b/src/test/isolation2/expected/gdd/delete-deadlock-root-leaf-concurrent-op.out @@ -0,0 +1,44 @@ +DROP TABLE IF EXISTS part_tbl; +DROP +CREATE TABLE part_tbl (a int, b int, c int) PARTITION BY RANGE(b) (START(1) END(2) EVERY(1)); +CREATE +INSERT INTO part_tbl SELECT i, 1, i FROM generate_series(1,10)i; +INSERT 10 + +-- check gdd is enabled +show gp_enable_global_deadlock_detector; + gp_enable_global_deadlock_detector +------------------------------------ + on +(1 row) +1:BEGIN; +BEGIN +1:DELETE FROM part_tbl_1_prt_1 WHERE c = 9; +DELETE 1 + +2:BEGIN; +BEGIN +2:DELETE FROM part_tbl where c = 1; +DELETE 1 + +-- the below delete will wait to acquire the transaction lock to delete the tuple +-- held by Session 2 +1&:DELETE FROM part_tbl_1_prt_1 WHERE c = 1; + +-- the below delete will wait to acquire the transaction lock to delete the tuple +-- held by Session 1 +2&:DELETE FROM part_tbl where c = 9; + +1<: <... completed> +DELETE 1 +2<: <... completed> +ERROR: canceling statement due to user request: "cancelled by global deadlock detector" + +-- since gdd is on, Session 2 will be cancelled. + +1:ROLLBACK; +ROLLBACK +2:ROLLBACK; +ROLLBACK +DROP TABLE IF EXISTS part_tbl; +DROP diff --git a/src/test/isolation2/expected/gdd/dml_locks_only_targeted_table_in_query.out b/src/test/isolation2/expected/gdd/dml_locks_only_targeted_table_in_query.out new file mode 100644 index 0000000000000000000000000000000000000000..3c3e5b51badb1816bf03cd3967cf13c0538ca6a7 --- /dev/null +++ b/src/test/isolation2/expected/gdd/dml_locks_only_targeted_table_in_query.out @@ -0,0 +1,78 @@ +DROP TABLE IF EXISTS part_tbl_upd_del; +DROP + +-- check gdd is on +show gp_enable_global_deadlock_detector; + gp_enable_global_deadlock_detector +------------------------------------ + on +(1 row) + +CREATE TABLE part_tbl_upd_del (a int, b int, c int) PARTITION BY RANGE(b) (START(1) END(2) EVERY(1)); +CREATE +INSERT INTO part_tbl_upd_del SELECT i, 1, i FROM generate_series(1,10)i; +INSERT 10 + +1: BEGIN; +BEGIN +1: DELETE FROM part_tbl_upd_del; +DELETE 10 +-- since the delete operation is on root part and gdd is on, there will be no lock on leaf partition on QD +1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del_1_prt_1'::regclass::oid AND gp_segment_id = -1; + ?column? +---------- +(0 rows) +-- lock on qd will be there for root partition +1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del'::regclass::oid AND gp_segment_id = -1; + ?column? +---------- + 1 +(1 row) +1: ROLLBACK; +ROLLBACK + + +1: BEGIN; +BEGIN +1: UPDATE part_tbl_upd_del SET c = 1 WHERE c = 1; +UPDATE 1 +-- since the update operation is on root part and gdd is on, there will be no lock on leaf partition on QD +1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del_1_prt_1'::regclass::oid AND gp_segment_id = -1; + ?column? +---------- +(0 rows) +-- lock on qd will be there for root partition +1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del'::regclass::oid AND gp_segment_id = -1; + ?column? +---------- + 1 +(1 row) +1: ROLLBACK; +ROLLBACK + +1: BEGIN; +BEGIN +1: DELETE FROM part_tbl_upd_del_1_prt_1; +DELETE 10 +-- since the delete operation is on leaf part, there will be a lock on QD +1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del_1_prt_1'::regclass::oid AND gp_segment_id = -1 AND mode='RowExclusiveLock'; + ?column? +---------- + 1 +(1 row) +1: ROLLBACK; +ROLLBACK + + +1: BEGIN; +BEGIN +1: UPDATE part_tbl_upd_del_1_prt_1 SET c = 1 WHERE c = 1; +UPDATE 1 +-- since the update operation is on leaf part, there will be a lock on QD +1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del_1_prt_1'::regclass::oid AND gp_segment_id = -1 AND mode='RowExclusiveLock'; + ?column? +---------- + 1 +(1 row) +1: ROLLBACK; +ROLLBACK diff --git a/src/test/isolation2/expected/gdd/planner_insert_while_vacuum_drop.out b/src/test/isolation2/expected/gdd/planner_insert_while_vacuum_drop.out new file mode 100644 index 0000000000000000000000000000000000000000..524874de6e68d51922eb17ebacc6e1fdb136cbba --- /dev/null +++ b/src/test/isolation2/expected/gdd/planner_insert_while_vacuum_drop.out @@ -0,0 +1,86 @@ +-- Ensures that deadlock between an INSERT with planner and +-- appendonly VACUUM drop phase can be detected. +-- ORCA during translation of INSERT statement tries to get AccessShareLock on +-- leaf partition but Vacuum already has AccessExclusiveLock on the leaf +-- partition so it keeps waiting and the deadlock is not observed. +CREATE EXTENSION IF NOT EXISTS gp_inject_fault; +CREATE +select gp_inject_fault2('all', 'reset', dbid, hostname, port) from gp_segment_configuration; + gp_inject_fault2 +------------------ + Success: + Success: + Success: + Success: + Success: + Success: + Success: + Success: +(8 rows) + +-- Helper function +CREATE or REPLACE FUNCTION wait_until_acquired_lock_on_rel (rel_name text, lmode text, segment_id integer) RETURNS /*in func*/ bool AS $$ /*in func*/ declare /*in func*/ result bool; /*in func*/ begin /*in func*/ result := false; /*in func*/ -- Wait until lock is acquired /*in func*/ while result IS NOT true loop /*in func*/ select l.granted INTO result /*in func*/ from pg_locks l /*in func*/ where l.relation::regclass = rel_name::regclass /*in func*/ and l.mode=lmode /*in func*/ and l.gp_segment_id=segment_id; /*in func*/ perform pg_sleep(0.1); /*in func*/ end loop; /*in func*/ return result; /*in func*/ end; /*in func*/ $$ language plpgsql; +CREATE + +-- Given an append only table with partitions that is ready to be compacted +CREATE TABLE ao_part_tbl (a int, b int) with (appendonly=true, orientation=row) DISTRIBUTED BY (a) PARTITION BY RANGE (b) (START (1) END (2) EVERY (1)); +CREATE + +INSERT INTO ao_part_tbl VALUES (1, 1); +INSERT 1 +DELETE FROM ao_part_tbl; +DELETE 1 + +-- Check gdd is enabled +show gp_enable_global_deadlock_detector; + gp_enable_global_deadlock_detector +------------------------------------ + on +(1 row) + +-- VACUUM drop phase is blocked before it opens the child relation on the primary +SELECT gp_inject_fault2('vacuum_relation_open_relation_during_drop_phase', 'suspend', dbid, hostname, port) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; + gp_inject_fault2 +------------------ + Success: +(1 row) +-- VACUUM takes AccessExclusiveLock on the leaf partition on QD +1&: VACUUM ao_part_tbl; +SELECT gp_wait_until_triggered_fault2('vacuum_relation_open_relation_during_drop_phase', 1, dbid, hostname, port) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; + gp_wait_until_triggered_fault2 +-------------------------------- + Success: +(1 row) + +-- And INSERT is blocked until it acquires the RowExclusiveLock on the child relation +2: set optimizer to off; +SET +-- INSERT takes RowExclusiveLock on the leaf partition on QE +2&: INSERT INTO ao_part_tbl VALUES (1, 1); +SELECT wait_until_acquired_lock_on_rel('ao_part_tbl_1_prt_1', 'RowExclusiveLock', content) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; + wait_until_acquired_lock_on_rel +--------------------------------- + t +(1 row) + +-- Reset the fault on VACUUM. +SELECT gp_inject_fault2('vacuum_relation_open_relation_during_drop_phase', 'reset', dbid, hostname, port) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; + gp_inject_fault2 +------------------ + Success: +(1 row) +-- VACUUM waits for AccessExclusiveLock on the leaf table on QE +1<: <... completed> +VACUUM +-- INSERT watis for AccessShareLock on the leaf table on QD, hence deadlock +2<: <... completed> +ERROR: canceling statement due to user request: "cancelled by global deadlock detector" + +-- since gdd is on, Session 2 will be cancelled. + +1:ROLLBACK; +ROLLBACK +2:ROLLBACK; +ROLLBACK +DROP TABLE IF EXISTS ao_part_tbl; +DROP diff --git a/src/test/isolation2/expected/gdd/update-deadlock-root-leaf-concurrent-op.out b/src/test/isolation2/expected/gdd/update-deadlock-root-leaf-concurrent-op.out new file mode 100644 index 0000000000000000000000000000000000000000..b28c1395401aa14edcb410e9f2171bb41b4b94d7 --- /dev/null +++ b/src/test/isolation2/expected/gdd/update-deadlock-root-leaf-concurrent-op.out @@ -0,0 +1,44 @@ +DROP TABLE IF EXISTS part_tbl; +DROP +CREATE TABLE part_tbl (a int, b int, c int) PARTITION BY RANGE(b) (START(1) END(2) EVERY(1)); +CREATE +INSERT INTO part_tbl SELECT i, 1, i FROM generate_series(1,10)i; +INSERT 10 + +-- check gdd is enabled +show gp_enable_global_deadlock_detector; + gp_enable_global_deadlock_detector +------------------------------------ + on +(1 row) +1:BEGIN; +BEGIN +1:UPDATE part_tbl_1_prt_1 SET c = 9 WHERE c = 9; +UPDATE 1 + +2:BEGIN; +BEGIN +2:UPDATE part_tbl SET c = 1 WHERE c = 1; +UPDATE 1 + +-- the below update will wait to acquire the transaction lock to update the tuple +-- held by Session 2 +1&:UPDATE part_tbl_1_prt_1 SET c = 1 WHERE c = 1; + +-- the below update will wait to acquire the transaction lock to update the tuple +-- held by Session 1 +2&:UPDATE part_tbl SET c = 9 WHERE c = 9; + +1<: <... completed> +UPDATE 1 +2<: <... completed> +ERROR: canceling statement due to user request: "cancelled by global deadlock detector" + +-- since gdd is on, Session 2 will be cancelled. + +1:ROLLBACK; +ROLLBACK +2:ROLLBACK; +ROLLBACK +DROP TABLE IF EXISTS part_tbl; +DROP diff --git a/src/test/isolation2/input/uao/insert_while_vacuum_drop.source b/src/test/isolation2/input/uao/insert_while_vacuum_drop.source deleted file mode 100644 index 441b206bee8f93aef74caa1ad697769d18327375..0000000000000000000000000000000000000000 --- a/src/test/isolation2/input/uao/insert_while_vacuum_drop.source +++ /dev/null @@ -1,54 +0,0 @@ --- @Description Ensures that an INSERT while VACUUM drop phase does not leave --- the segfile state inconsistent on master and the primary. --- -select gp_inject_fault('all', 'reset', 1); - --- Helper function -CREATE or REPLACE FUNCTION wait_until_acquired_lock_on_rel (rel_name text, lmode text, segment_id integer) RETURNS /*in func*/ - bool AS $$ /*in func*/ -declare /*in func*/ - result bool; /*in func*/ -begin /*in func*/ -result := false; /*in func*/ --- Wait until lock is acquired /*in func*/ -while result = false loop /*in func*/ -select l.granted INTO result /*in func*/ -from pg_locks l, pg_class c /*in func*/ -where l.relation = c.oid /*in func*/ - and c.relname=rel_name /*in func*/ - and l.mode=lmode /*in func*/ - and l.gp_segment_id=segment_id; /*in func*/ -if result = false then /*in func*/ - perform pg_sleep(0.1); /*in func*/ -end if; /*in func*/ -end loop; /*in func*/ -return result; /*in func*/ -end; /*in func*/ -$$ language plpgsql; - --- Given an append only table with partitions that is ready to be compacted -CREATE TABLE insert_while_vacuum_drop_@orientation@ (a int, b int) with (appendonly=true, orientation=@orientation@) - DISTRIBUTED BY (a) - PARTITION BY RANGE (b) -(START (1) END (2) EVERY (1)); - -INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1); -DELETE FROM insert_while_vacuum_drop_@orientation@; - --- And VACUUM drop phase is blocked before it opens the child relation on the primary -SELECT gp_inject_fault('vacuum_relation_open_relation_during_drop_phase', 'suspend', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; -1&: VACUUM insert_while_vacuum_drop_@orientation@; -SELECT gp_wait_until_triggered_fault('vacuum_relation_open_relation_during_drop_phase', 1, dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; - --- And INSERT is blocked until it acquires the RowExclusiveLock on the child relation -2&: INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1); -SELECT wait_until_acquired_lock_on_rel('insert_while_vacuum_drop_@orientation@_1_prt_1', 'RowExclusiveLock', content) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; - --- Reset the fault on VACUUM and the two sessions should not be blocking each other -SELECT gp_inject_fault('vacuum_relation_open_relation_during_drop_phase', 'reset', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; -1<: -2<: - --- The following INSERT transaction should still work and should not fail on --- "ERROR cannot insert into segno (1) for AO relid that is in state AOSEG_STATE_AWAITING_DROP" -INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1); \ No newline at end of file diff --git a/src/test/isolation2/isolation2_schedule b/src/test/isolation2/isolation2_schedule index f83ee5d1ac720269d97f1bc9ad81a4f29968278d..436538dfaa93e0805633cabb571f978a63308367 100644 --- a/src/test/isolation2/isolation2_schedule +++ b/src/test/isolation2/isolation2_schedule @@ -1,6 +1,7 @@ test: lockmodes test: ao_partition_lock +test: dml_on_root_locks_all_parts test: select_dropped_table # test update hash col under utility mode @@ -28,6 +29,10 @@ ignore: gdd/non-lock-107 test: gdd/avoid-qd-deadlock test: gdd/prepare-for-local test: gdd/local-deadlock-03 +test: gdd/delete-deadlock-root-leaf-concurrent-op +test: gdd/planner_insert_while_vacuum_drop +test: gdd/update-deadlock-root-leaf-concurrent-op +test: gdd/dml_locks_only_targeted_table_in_query # gdd end test: gdd/end diff --git a/src/test/isolation2/output/uao/insert_while_vacuum_drop.source b/src/test/isolation2/output/uao/insert_while_vacuum_drop.source deleted file mode 100644 index a219427260faccb16c5ff9715c0a4b8ced50107e..0000000000000000000000000000000000000000 --- a/src/test/isolation2/output/uao/insert_while_vacuum_drop.source +++ /dev/null @@ -1,59 +0,0 @@ --- @Description Ensures that an INSERT while VACUUM drop phase does not leave --- the segfile state inconsistent on master and the primary. --- -select gp_inject_fault('all', 'reset', 1); - gp_inject_fault ------------------ - t -(1 row) - --- Helper function -CREATE or REPLACE FUNCTION wait_until_acquired_lock_on_rel (rel_name text, lmode text, segment_id integer) RETURNS /*in func*/ bool AS $$ /*in func*/ declare /*in func*/ result bool; /*in func*/ begin /*in func*/ result := false; /*in func*/ -- Wait until lock is acquired /*in func*/ -while result = false loop /*in func*/ select l.granted INTO result /*in func*/ from pg_locks l, pg_class c /*in func*/ where l.relation = c.oid /*in func*/ and c.relname=rel_name /*in func*/ and l.mode=lmode /*in func*/ and l.gp_segment_id=segment_id; /*in func*/ if result = false then /*in func*/ perform pg_sleep(0.1); /*in func*/ end if; /*in func*/ end loop; /*in func*/ return result; /*in func*/ end; /*in func*/ $$ language plpgsql; -CREATE - --- Given an append only table with partitions that is ready to be compacted -CREATE TABLE insert_while_vacuum_drop_@orientation@ (a int, b int) with (appendonly=true, orientation=@orientation@) DISTRIBUTED BY (a) PARTITION BY RANGE (b) (START (1) END (2) EVERY (1)); -CREATE - -INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1); -INSERT 1 -DELETE FROM insert_while_vacuum_drop_@orientation@; -DELETE 1 - --- And VACUUM drop phase is blocked before it opens the child relation on the primary -SELECT gp_inject_fault('vacuum_relation_open_relation_during_drop_phase', 'suspend', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; - gp_inject_fault ------------------ - t -(1 row) -1&: VACUUM insert_while_vacuum_drop_@orientation@; -SELECT gp_wait_until_triggered_fault('vacuum_relation_open_relation_during_drop_phase', 1, dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; - gp_wait_until_triggered_fault -------------------------------- - t -(1 row) - --- And INSERT is blocked until it acquires the RowExclusiveLock on the child relation -2&: INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1); -SELECT wait_until_acquired_lock_on_rel('insert_while_vacuum_drop_@orientation@_1_prt_1', 'RowExclusiveLock', content) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; - wait_until_acquired_lock_on_rel ---------------------------------- - -(1 row) - --- Reset the fault on VACUUM and the two sessions should not be blocking each other -SELECT gp_inject_fault('vacuum_relation_open_relation_during_drop_phase', 'reset', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; - gp_inject_fault ------------------ - t -(1 row) -1<: <... completed> -VACUUM -2<: <... completed> -INSERT 1 - --- The following INSERT transaction should still work and should not fail on --- "ERROR cannot insert into segno (1) for AO relid that is in state AOSEG_STATE_AWAITING_DROP" -INSERT INTO insert_while_vacuum_drop_@orientation@ VALUES (1, 1); -INSERT 1 diff --git a/src/test/isolation2/sql/dml_on_root_locks_all_parts.sql b/src/test/isolation2/sql/dml_on_root_locks_all_parts.sql new file mode 100644 index 0000000000000000000000000000000000000000..c6b000bf4d17dc534e5930989392c26e205d4816 --- /dev/null +++ b/src/test/isolation2/sql/dml_on_root_locks_all_parts.sql @@ -0,0 +1,50 @@ +-- For partitioned tables if a DML is run on the root table, we should +-- take ExclusiveLock on the root and the child partitions. If lock is +-- not taken on the leaf partition on the QD, there may be issues where +-- a concurrent DML on the leaf partition may execute first and can cause +-- issues. For example +-- 1. It can cause a deadlock between the 2 DML operations. +-- Consider the below example. +-- INSERT INTO part_tbl SELECT i, 1, i FROM generate_series(1,1000000)i; +-- Session 1: BEGIN; DELETE FROM part_tbl; ==> Let's say it holds Exclusive lock on root table only. (not on leaf) +-- Session 2: BEGIN; DELETE FROM part_tbl where a = 999999 or a = 1; ==> Delete will be dispatched to the segment as there is no lock on QD. +-- If Session 1, first deletes the tuple a = 1 on segment 0, Session 2 will wait for transaction lock as it attempting to delete the same tuple. +-- If Session 2, first deletes the tuple a = 999999 on segment 1, Session 1 will wait for transaction lock as it is attempting to delete the same tuple. +-- This will cause a deadlock. +-- Note: Same applies to UPDATE as well. +DROP TABLE IF EXISTS part_tbl; +CREATE TABLE part_tbl (a int, b int, c int) PARTITION BY RANGE (b) (start(1) end(2) every(1)); + +INSERT INTO part_tbl SELECT i, 1, i FROM generate_series(1,12)i; + +1: BEGIN; +-- DELETE will acquire Exclusive lock on root and leaf partition on QD. +1: DELETE FROM part_tbl; + +-- Delete must hold an exclusive lock on the leaf partition on QD. +SELECT GRANTED FROM pg_locks WHERE relation = 'part_tbl_1_prt_1'::regclass::oid AND mode='ExclusiveLock' AND gp_segment_id=-1 AND locktype='relation'; + +-- DELETE on the leaf partition must wait on the QD as Session 1 already holds ExclusiveLock. +2&: DELETE FROM part_tbl_1_prt_1 WHERE b = 10; +SELECT relation::regclass, mode, granted FROM pg_locks WHERE gp_segment_id=-1 AND granted='f'; + +1: COMMIT; +2<: + +1: BEGIN; +-- UPDATE will acquire Exclusive lock on root and leaf partition on QD. +1: UPDATE part_tbl SET c = 1; +SELECT GRANTED FROM pg_locks WHERE relation = 'part_tbl_1_prt_1'::regclass::oid AND mode='ExclusiveLock' AND gp_segment_id=-1 AND locktype='relation'; + +-- UPDATE on leaf must be blocked on QD as previous UPDATE acquires Exclusive lock on the root and the leaf partitions +2&: UPDATE part_tbl_1_prt_1 set c = 10; +SELECT relation::regclass, mode, granted FROM pg_locks WHERE gp_segment_id=-1 AND granted='f'; + +1: COMMIT; +2<: + +1: BEGIN; +1: INSERT INTO part_tbl SELECT 1,1,1; +SELECT GRANTED FROM pg_locks WHERE relation = 'part_tbl_1_prt_1'::regclass::oid AND mode='RowExclusiveLock' AND gp_segment_id=-1 AND locktype='relation'; +1: COMMIT; +DROP TABLE part_tbl; diff --git a/src/test/isolation2/sql/gdd/delete-deadlock-root-leaf-concurrent-op.sql b/src/test/isolation2/sql/gdd/delete-deadlock-root-leaf-concurrent-op.sql new file mode 100644 index 0000000000000000000000000000000000000000..dd8e0fe94ab5fd7b86745b1da5e0e451709dc05d --- /dev/null +++ b/src/test/isolation2/sql/gdd/delete-deadlock-root-leaf-concurrent-op.sql @@ -0,0 +1,28 @@ +DROP TABLE IF EXISTS part_tbl; +CREATE TABLE part_tbl (a int, b int, c int) PARTITION BY RANGE(b) (START(1) END(2) EVERY(1)); +INSERT INTO part_tbl SELECT i, 1, i FROM generate_series(1,10)i; + +-- check gdd is enabled +show gp_enable_global_deadlock_detector; +1:BEGIN; +1:DELETE FROM part_tbl_1_prt_1 WHERE c = 9; + +2:BEGIN; +2:DELETE FROM part_tbl where c = 1; + +-- the below delete will wait to acquire the transaction lock to delete the tuple +-- held by Session 2 +1&:DELETE FROM part_tbl_1_prt_1 WHERE c = 1; + +-- the below delete will wait to acquire the transaction lock to delete the tuple +-- held by Session 1 +2&:DELETE FROM part_tbl where c = 9; + +1<: +2<: + +-- since gdd is on, Session 2 will be cancelled. + +1:ROLLBACK; +2:ROLLBACK; +DROP TABLE IF EXISTS part_tbl; diff --git a/src/test/isolation2/sql/gdd/dml_locks_only_targeted_table_in_query.sql b/src/test/isolation2/sql/gdd/dml_locks_only_targeted_table_in_query.sql new file mode 100644 index 0000000000000000000000000000000000000000..92d1589bdf2acef455b54d125f007b4f69a3a832 --- /dev/null +++ b/src/test/isolation2/sql/gdd/dml_locks_only_targeted_table_in_query.sql @@ -0,0 +1,37 @@ +DROP TABLE IF EXISTS part_tbl_upd_del; + +-- check gdd is on +show gp_enable_global_deadlock_detector; + +CREATE TABLE part_tbl_upd_del (a int, b int, c int) PARTITION BY RANGE(b) (START(1) END(2) EVERY(1)); +INSERT INTO part_tbl_upd_del SELECT i, 1, i FROM generate_series(1,10)i; + +1: BEGIN; +1: DELETE FROM part_tbl_upd_del; +-- since the delete operation is on root part and gdd is on, there will be no lock on leaf partition on QD +1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del_1_prt_1'::regclass::oid AND gp_segment_id = -1; +-- lock on qd will be there for root partition +1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del'::regclass::oid AND gp_segment_id = -1; +1: ROLLBACK; + + +1: BEGIN; +1: UPDATE part_tbl_upd_del SET c = 1 WHERE c = 1; +-- since the update operation is on root part and gdd is on, there will be no lock on leaf partition on QD +1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del_1_prt_1'::regclass::oid AND gp_segment_id = -1; +-- lock on qd will be there for root partition +1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del'::regclass::oid AND gp_segment_id = -1; +1: ROLLBACK; + +1: BEGIN; +1: DELETE FROM part_tbl_upd_del_1_prt_1; +-- since the delete operation is on leaf part, there will be a lock on QD +1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del_1_prt_1'::regclass::oid AND gp_segment_id = -1 AND mode='RowExclusiveLock'; +1: ROLLBACK; + + +1: BEGIN; +1: UPDATE part_tbl_upd_del_1_prt_1 SET c = 1 WHERE c = 1; +-- since the update operation is on leaf part, there will be a lock on QD +1: SELECT 1 FROM pg_locks WHERE relation='part_tbl_upd_del_1_prt_1'::regclass::oid AND gp_segment_id = -1 AND mode='RowExclusiveLock'; +1: ROLLBACK; diff --git a/src/test/isolation2/sql/gdd/planner_insert_while_vacuum_drop.sql b/src/test/isolation2/sql/gdd/planner_insert_while_vacuum_drop.sql new file mode 100644 index 0000000000000000000000000000000000000000..a52fd4a4e857fc6cd5ca4e4aa901a4f474ab2c26 --- /dev/null +++ b/src/test/isolation2/sql/gdd/planner_insert_while_vacuum_drop.sql @@ -0,0 +1,64 @@ +-- Ensures that deadlock between an INSERT with planner and +-- appendonly VACUUM drop phase can be detected. +-- ORCA during translation of INSERT statement tries to get AccessShareLock on +-- leaf partition but Vacuum already has AccessExclusiveLock on the leaf +-- partition so it keeps waiting and the deadlock is not observed. +CREATE EXTENSION IF NOT EXISTS gp_inject_fault; +select gp_inject_fault2('all', 'reset', dbid, hostname, port) from gp_segment_configuration; + +-- Helper function +CREATE or REPLACE FUNCTION wait_until_acquired_lock_on_rel (rel_name text, lmode text, segment_id integer) RETURNS /*in func*/ + bool AS $$ /*in func*/ +declare /*in func*/ + result bool; /*in func*/ +begin /*in func*/ + result := false; /*in func*/ + -- Wait until lock is acquired /*in func*/ + while result IS NOT true loop /*in func*/ + select l.granted INTO result /*in func*/ + from pg_locks l /*in func*/ + where l.relation::regclass = rel_name::regclass /*in func*/ + and l.mode=lmode /*in func*/ + and l.gp_segment_id=segment_id; /*in func*/ + perform pg_sleep(0.1); /*in func*/ + end loop; /*in func*/ + return result; /*in func*/ +end; /*in func*/ +$$ language plpgsql; + +-- Given an append only table with partitions that is ready to be compacted +CREATE TABLE ao_part_tbl (a int, b int) with (appendonly=true, orientation=row) + DISTRIBUTED BY (a) + PARTITION BY RANGE (b) +(START (1) END (2) EVERY (1)); + +INSERT INTO ao_part_tbl VALUES (1, 1); +DELETE FROM ao_part_tbl; + +-- Check gdd is enabled +show gp_enable_global_deadlock_detector; + +-- VACUUM drop phase is blocked before it opens the child relation on the primary +SELECT gp_inject_fault2('vacuum_relation_open_relation_during_drop_phase', 'suspend', dbid, hostname, port) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; +-- VACUUM takes AccessExclusiveLock on the leaf partition on QD +1&: VACUUM ao_part_tbl; +SELECT gp_wait_until_triggered_fault2('vacuum_relation_open_relation_during_drop_phase', 1, dbid, hostname, port) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; + +-- And INSERT is blocked until it acquires the RowExclusiveLock on the child relation +2: set optimizer to off; +-- INSERT takes RowExclusiveLock on the leaf partition on QE +2&: INSERT INTO ao_part_tbl VALUES (1, 1); +SELECT wait_until_acquired_lock_on_rel('ao_part_tbl_1_prt_1', 'RowExclusiveLock', content) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; + +-- Reset the fault on VACUUM. +SELECT gp_inject_fault2('vacuum_relation_open_relation_during_drop_phase', 'reset', dbid, hostname, port) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; +-- VACUUM waits for AccessExclusiveLock on the leaf table on QE +1<: +-- INSERT watis for AccessShareLock on the leaf table on QD, hence deadlock +2<: + +-- since gdd is on, Session 2 will be cancelled. + +1:ROLLBACK; +2:ROLLBACK; +DROP TABLE IF EXISTS ao_part_tbl; diff --git a/src/test/isolation2/sql/gdd/update-deadlock-root-leaf-concurrent-op.sql b/src/test/isolation2/sql/gdd/update-deadlock-root-leaf-concurrent-op.sql new file mode 100644 index 0000000000000000000000000000000000000000..4567c8879ceab03d1a4c01684880628a155e346a --- /dev/null +++ b/src/test/isolation2/sql/gdd/update-deadlock-root-leaf-concurrent-op.sql @@ -0,0 +1,28 @@ +DROP TABLE IF EXISTS part_tbl; +CREATE TABLE part_tbl (a int, b int, c int) PARTITION BY RANGE(b) (START(1) END(2) EVERY(1)); +INSERT INTO part_tbl SELECT i, 1, i FROM generate_series(1,10)i; + +-- check gdd is enabled +show gp_enable_global_deadlock_detector; +1:BEGIN; +1:UPDATE part_tbl_1_prt_1 SET c = 9 WHERE c = 9; + +2:BEGIN; +2:UPDATE part_tbl SET c = 1 WHERE c = 1; + +-- the below update will wait to acquire the transaction lock to update the tuple +-- held by Session 2 +1&:UPDATE part_tbl_1_prt_1 SET c = 1 WHERE c = 1; + +-- the below update will wait to acquire the transaction lock to update the tuple +-- held by Session 1 +2&:UPDATE part_tbl SET c = 9 WHERE c = 9; + +1<: +2<: + +-- since gdd is on, Session 2 will be cancelled. + +1:ROLLBACK; +2:ROLLBACK; +DROP TABLE IF EXISTS part_tbl; diff --git a/src/test/regress/expected/partition_locking_optimizer.out b/src/test/regress/expected/partition_locking_optimizer.out index 54fa995a3971c9f9db063ca0e321b2030517d46d..25f0339ded8ebf595ce9f75766486e137e2841a8 100644 --- a/src/test/regress/expected/partition_locking_optimizer.out +++ b/src/test/regress/expected/partition_locking_optimizer.out @@ -342,10 +342,19 @@ begin; delete from partlockt where i = 4; -- Known_opt_diff: MPP-20936 select * from locktest_master where coalesce not like 'gp_%' and coalesce not like 'pg_%'; - coalesce | mode | locktype | node ------------+---------------+----------+-------- - partlockt | ExclusiveLock | relation | master -(1 row) + coalesce | mode | locktype | node +-------------------+---------------+----------+-------- + partlockt | ExclusiveLock | relation | master + partlockt_1_prt_1 | ExclusiveLock | relation | master + partlockt_1_prt_2 | ExclusiveLock | relation | master + partlockt_1_prt_3 | ExclusiveLock | relation | master + partlockt_1_prt_4 | ExclusiveLock | relation | master + partlockt_1_prt_5 | ExclusiveLock | relation | master + partlockt_1_prt_6 | ExclusiveLock | relation | master + partlockt_1_prt_7 | ExclusiveLock | relation | master + partlockt_1_prt_8 | ExclusiveLock | relation | master + partlockt_1_prt_9 | ExclusiveLock | relation | master +(10 rows) select * from locktest_segments where coalesce not like 'gp_%' and coalesce not like 'pg_%'; coalesce | mode | locktype | node