From 9b9e659c385be22968dc7cc9c67212f9428689f0 Mon Sep 17 00:00:00 2001 From: Asim R P Date: Fri, 14 Sep 2018 16:48:33 -0700 Subject: [PATCH] Validate state of an AO segment file before insertion and drop This commit promotes a few assertions into elog(ERROR) so as to avoid new data being appended to a segmene file that is not in available state. Scans on an AO table do not read segment files that are awaiting to be dropped. New data, if inserted in such a segment file, will be lost forever. The accompanying isolation2 test demonstrates a bug that hits these errors. The test uses a newly added UDF to evict an entry from the appendonly hash table. In production, an entry is evicted when the appendonly hash table is filled (default capacity of 1000 entries). Note: the bug will be fixed in a separate patch. Co-authored-by: Adam Berlin --- src/backend/access/aocs/aocsam.c | 4 +- src/backend/access/appendonly/appendonlyam.c | 4 +- .../access/appendonly/appendonlywriter.c | 25 ++-- src/include/access/appendonlywriter.h | 1 + src/include/utils/faultinjector_lists.h | 2 + ...insert_should_not_use_awaiting_drop.source | 65 +++++++++ src/test/isolation2/isolation2_regress.c | 17 +++ src/test/isolation2/isolation2_schedule | 2 + ...insert_should_not_use_awaiting_drop.source | 127 ++++++++++++++++++ 9 files changed, 232 insertions(+), 15 deletions(-) create mode 100644 src/test/isolation2/input/uao/insert_should_not_use_awaiting_drop.source create mode 100644 src/test/isolation2/output/uao/insert_should_not_use_awaiting_drop.source diff --git a/src/backend/access/aocs/aocsam.c b/src/backend/access/aocs/aocsam.c index 94e884ef9b..474d60a989 100644 --- a/src/backend/access/aocs/aocsam.c +++ b/src/backend/access/aocs/aocsam.c @@ -783,7 +783,9 @@ OpenAOCSDatumStreams(AOCSInsertDesc desc) desc->fsInfo = seginfo; /* Never insert into a segment that is awaiting a drop */ - Assert(desc->fsInfo->state != AOSEG_STATE_AWAITING_DROP); + elogif(desc->fsInfo->state == AOSEG_STATE_AWAITING_DROP, ERROR, + "cannot insert into segno (%d) for AO relid %d that is in state AOSEG_STATE_AWAITING_DROP", + desc->cur_segno, RelationGetRelid(desc->aoi_rel)); desc->rowCount = seginfo->total_tupcount; diff --git a/src/backend/access/appendonly/appendonlyam.c b/src/backend/access/appendonly/appendonlyam.c index e545366f02..e15d8172ff 100755 --- a/src/backend/access/appendonly/appendonlyam.c +++ b/src/backend/access/appendonly/appendonlyam.c @@ -471,7 +471,9 @@ SetCurrentFileSegForWrite(AppendOnlyInsertDesc aoInsertDesc) } /* Never insert into a segment that is awaiting a drop */ - Assert(aoInsertDesc->fsInfo->state != AOSEG_STATE_AWAITING_DROP); + elogif(aoInsertDesc->fsInfo->state == AOSEG_STATE_AWAITING_DROP, + ERROR, "cannot insert into segno (%d) from AO relid %d that is in state AOSEG_STATE_AWAITING_DROP", + aoInsertDesc->cur_segno, RelationGetRelid(aoInsertDesc->aoi_rel)); fsinfo = aoInsertDesc->fsInfo; Assert(fsinfo); diff --git a/src/backend/access/appendonly/appendonlywriter.c b/src/backend/access/appendonly/appendonlywriter.c index 9516a9283d..2cfec4110f 100644 --- a/src/backend/access/appendonly/appendonlywriter.c +++ b/src/backend/access/appendonly/appendonlywriter.c @@ -30,6 +30,7 @@ #include "miscadmin.h" #include "storage/lmgr.h" #include "utils/builtins.h" +#include "utils/faultinjector.h" #include "utils/int8.h" #include "utils/lsyscache.h" #include "utils/snapmgr.h" @@ -57,7 +58,6 @@ static bool appendOnlyInsertXact = false; */ static bool AOHashTableInit(void); static AORelHashEntry AppendOnlyRelHashNew(Oid relid, bool *exists); -static AORelHashEntry AORelGetHashEntry(Oid relid); static AORelHashEntry AORelLookupHashEntry(Oid relid); static bool AORelCreateHashEntry(Oid relid); static bool *GetFileSegStateInfoFromSegments(Relation parentrel); @@ -190,6 +190,8 @@ AORelCreateHashEntry(Oid relid) */ LWLockRelease(AOSegFileLock); + SIMPLE_FAULT_INJECTOR(BeforeCreatingAnAOHashEntry); + /* * Now get all the segment files information for this relation from the QD * aoseg table. then update our segment file array in this hash entry. @@ -330,7 +332,7 @@ AORelCreateHashEntry(Oid relid) } /* - * AORelRemoveEntry -- remove the hash entry for a given relation. + * AORelRemoveHashEntry -- remove the hash entry for a given relation. * * Notes * The append only lightweight lock (AOSegFileLock) *must* be held for @@ -390,7 +392,7 @@ AORelLookupHashEntry(Oid relid) * AORelGetEntry -- Same as AORelLookupEntry but here the caller is expecting * an entry to exist. We error out if it doesn't. */ -static AORelHashEntry +AORelHashEntry AORelGetHashEntry(Oid relid) { AORelHashEntryData *aoentry = AORelLookupHashEntry(relid); @@ -760,9 +762,11 @@ RegisterSegnoForCompactionDrop(Oid relid, List *compactedSegmentFileList) "relation \"%s\" (%d)", i, get_rel_name(relid), relid))); - Assert(segfilestat->state == COMPACTED_AWAITING_DROP); - segfilestat->xid = CurrentXid; appendOnlyInsertXact = true; + segfilestat->xid = CurrentXid; + elogif(segfilestat->state != COMPACTED_AWAITING_DROP, + ERROR, "expected segno (%d) from AO relid %d in state COMPACTED_AWAITING_DROP but found in state %d", + i, relid, segfilestat->state); segfilestat->state = DROP_USE; } } @@ -1846,12 +1850,6 @@ AtAbort_AppendOnly(void) } /* bingo! */ - Assert(segfilestat->state == INSERT_USE || - segfilestat->state == COMPACTION_USE || - segfilestat->state == DROP_USE || - segfilestat->state == PSEUDO_COMPACTION_USE || - segfilestat->state == COMPACTED_DROP_SKIPPED); - ereportif(Debug_appendonly_print_segfile_choice, LOG, (errmsg("AtAbort_AppendOnly: found a segno that inserted in our txn for " "table %d. Cleaning segno %d tupcount: old " @@ -1885,7 +1883,8 @@ AtEOXact_AppendOnly_StateTransition(AORelHashEntry aoentry, int segno, AOSegfileState oldstate; Assert(segfilestat); - Assert(segfilestat->state == INSERT_USE || + Assert(segfilestat->aborted || + segfilestat->state == INSERT_USE || segfilestat->state == COMPACTION_USE || segfilestat->state == DROP_USE || segfilestat->state == PSEUDO_COMPACTION_USE || @@ -1935,7 +1934,7 @@ AtEOXact_AppendOnly_StateTransition(AORelHashEntry aoentry, int segno, } else { - Assert(false); + Assert(segfilestat->aborted); } ereportif(Debug_appendonly_print_segfile_choice, LOG, diff --git a/src/include/access/appendonlywriter.h b/src/include/access/appendonlywriter.h index 80f08b53b3..57af80496f 100644 --- a/src/include/access/appendonlywriter.h +++ b/src/include/access/appendonlywriter.h @@ -226,6 +226,7 @@ extern void UpdateMasterAosegTotals(Relation parentrel, extern void UpdateMasterAosegTotalsFromSegments(Relation parentrel, Snapshot appendOnlyMetaDataSnapshot, List *segmentNumList, int64 modcount_added); +extern AORelHashEntry AORelGetHashEntry(Oid relid); extern bool AORelRemoveHashEntry(Oid relid); extern void AtCommit_AppendOnly(void); extern void AtAbort_AppendOnly(void); diff --git a/src/include/utils/faultinjector_lists.h b/src/include/utils/faultinjector_lists.h index e19aa23b23..692ff95a28 100644 --- a/src/include/utils/faultinjector_lists.h +++ b/src/include/utils/faultinjector_lists.h @@ -148,6 +148,8 @@ FI_IDENT(AppendOnlyInsert, "appendonly_insert") FI_IDENT(AppendOnlyDelete, "appendonly_delete") /* inject fault before an append-only update */ FI_IDENT(AppendOnlyUpdate, "appendonly_update") +/* inject fault before creating an appendonly hash entry*/ +FI_IDENT(BeforeCreatingAnAOHashEntry, "before_creating_an_ao_hash_entry") /* inject fault in append-only compression function */ FI_IDENT(AppendOnlySkipCompression, "appendonly_skip_compression") /* inject fault while reindex db is in progress */ diff --git a/src/test/isolation2/input/uao/insert_should_not_use_awaiting_drop.source b/src/test/isolation2/input/uao/insert_should_not_use_awaiting_drop.source new file mode 100644 index 0000000000..da03bb6189 --- /dev/null +++ b/src/test/isolation2/input/uao/insert_should_not_use_awaiting_drop.source @@ -0,0 +1,65 @@ +-- Test to validate that AWAITING_DROP segment files are never chosen +-- for inserts. + +-- start_matchsubs +-- s/\s+\(.*\.[ch]:\d+\)/ (SOMEFILE:SOMEFUNC)/ +-- m/relid \d+/ +-- s/relid \d+/relid / +-- end_matchsubs +create extension if not exists gp_inject_fault; + +select gp_inject_fault('all', 'reset', 1); + +create or replace function remove_ao_entry_from_hash_table(oid) +returns void as '@abs_builddir@/isolation2_regress@DLSUFFIX@' +language c immutable strict no sql; + +-- Given an append only table that is ready to be compacted + +drop table if exists test_table_@orientation@; +create table test_table_@orientation@ (a int, b int) with(appendonly=true); +insert into test_table_@orientation@ select i,i from generate_series(1,12)i; +update test_table_@orientation@ set b = -b; + +select remove_ao_entry_from_hash_table('test_table_@orientation@'::regclass::oid); + +select gp_inject_fault('before_creating_an_ao_hash_entry', 'suspend', 1); + +-- And an insert transaction is blocked before assigning a segment +-- file for insertion +1&: INSERT INTO test_table_@orientation@ VALUES (1,1); + +select gp_wait_until_triggered_fault('before_creating_an_ao_hash_entry', 1, 1); + +select gp_inject_fault('vacuum_relation_open_relation_during_drop_phase', 'suspend', 1); +2&: VACUUM test_table_@orientation@; + +select gp_wait_until_triggered_fault('vacuum_relation_open_relation_during_drop_phase', 1, 1); + +-- Then vacuum should have completed compaction leaving segment file 1 +-- in default state on QD but in awaiting drop state on QEs +select segno, state from gp_ao_or_aocs_seg_name('test_table_@orientation@'); +0U: select segno, state from gp_ao_or_aocs_seg_name('test_table_@orientation@'); + +-- And the append only hash table no longer holds a record for test_table_@orientation@ +select remove_ao_entry_from_hash_table('test_table_@orientation@'::regclass::oid); + +-- And the insert fails +select gp_inject_fault('before_creating_an_ao_hash_entry', 'reset', 1); +1<: + +-- And the vacuum completes +select gp_inject_fault('vacuum_relation_open_relation_during_drop_phase', 'reset', 1); +2<: + +-- Then inserts into test table should not be visible +select count(1) from test_table_@orientation@; + +-- And additional inserts should continue to fail +insert into test_table_@orientation@ values (1,1); + +-- When the appendonly hash entry is removed +select remove_ao_entry_from_hash_table('test_table_@orientation@'::regclass::oid); + +-- Then inserts should begin to succeed +insert into test_table_@orientation@ values (1,1); diff --git a/src/test/isolation2/isolation2_regress.c b/src/test/isolation2/isolation2_regress.c index eebf219bb0..d987fe2b3e 100644 --- a/src/test/isolation2/isolation2_regress.c +++ b/src/test/isolation2/isolation2_regress.c @@ -4,6 +4,7 @@ #include "miscadmin.h" #include "access/aocssegfiles.h" +#include "access/appendonlywriter.h" #include "access/heapam.h" #include "storage/bufmgr.h" #include "utils/numeric.h" @@ -11,6 +12,7 @@ PG_MODULE_MAGIC; extern void flush_relation_buffers(PG_FUNCTION_ARGS); +extern void remove_ao_entry_from_hash_table(PG_FUNCTION_ARGS); /* numeric upgrade tests */ extern Datum convertNumericToGPDB4(PG_FUNCTION_ARGS); @@ -26,6 +28,21 @@ flush_relation_buffers(PG_FUNCTION_ARGS) heap_close(r, AccessShareLock); } +PG_FUNCTION_INFO_V1(remove_ao_entry_from_hash_table); +void +remove_ao_entry_from_hash_table(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + + LWLockAcquire(AOSegFileLock, LW_EXCLUSIVE); + AORelHashEntry aoentry = AORelGetHashEntry(relid); + if (aoentry->txns_using_rel != 0) + elog(ERROR, "relid %d is used by %d transactions, cannot remove it yet", + relid, aoentry->txns_using_rel); + AORelRemoveHashEntry(relid); + LWLockRelease(AOSegFileLock); +} + /* Mangle a numeric Datum to match the GPDB4 (Postgres 8.2) format. */ PG_FUNCTION_INFO_V1(convertNumericToGPDB4); Datum diff --git a/src/test/isolation2/isolation2_schedule b/src/test/isolation2/isolation2_schedule index 7af7ef7579..cce9fe351e 100644 --- a/src/test/isolation2/isolation2_schedule +++ b/src/test/isolation2/isolation2_schedule @@ -71,6 +71,7 @@ test: uao/vacuum_self_serializable3_row test: uao/vacuum_while_insert_row test: uao/vacuum_while_vacuum_row test: uao/vacuum_cleanup_row +test: uao/insert_should_not_use_awaiting_drop_row test: reorganize_after_ao_vacuum_skip_drop truncate_after_ao_vacuum_skip_drop mark_all_aoseg_await_drop # Tests on Append-Optimized tables (column-oriented). @@ -117,6 +118,7 @@ test: uao/vacuum_self_serializable3_column test: uao/vacuum_while_insert_column test: uao/vacuum_while_vacuum_column test: uao/vacuum_cleanup_column +test: uao/insert_should_not_use_awaiting_drop_column test: add_column_after_vacuum_skip_drop_column test: vacuum_after_vacuum_skip_drop_column diff --git a/src/test/isolation2/output/uao/insert_should_not_use_awaiting_drop.source b/src/test/isolation2/output/uao/insert_should_not_use_awaiting_drop.source new file mode 100644 index 0000000000..a210c5fb7f --- /dev/null +++ b/src/test/isolation2/output/uao/insert_should_not_use_awaiting_drop.source @@ -0,0 +1,127 @@ +-- Test to validate that AWAITING_DROP segment files are never chosen +-- for inserts. + +-- start_matchsubs +-- s/\s+\(.*\.[ch]:\d+\)/ (SOMEFILE:SOMEFUNC)/ +-- m/relid \d+/ +-- s/relid \d+/relid / +-- end_matchsubs +create extension if not exists gp_inject_fault; +CREATE + +select gp_inject_fault('all', 'reset', 1); +gp_inject_fault +--------------- +t +(1 row) + +create or replace function remove_ao_entry_from_hash_table(oid) returns void as '@abs_builddir@/isolation2_regress@DLSUFFIX@' language c immutable strict no sql; +CREATE + +-- Given an append only table that is ready to be compacted + +drop table if exists test_table_@orientation@; +DROP +create table test_table_@orientation@ (a int, b int) with(appendonly=true); +CREATE +insert into test_table_@orientation@ select i,i from generate_series(1,12)i; +INSERT 12 +update test_table_@orientation@ set b = -b; +UPDATE 12 + +select remove_ao_entry_from_hash_table('test_table_@orientation@'::regclass::oid); +remove_ao_entry_from_hash_table +------------------------------- + +(1 row) + +select gp_inject_fault('before_creating_an_ao_hash_entry', 'suspend', 1); +gp_inject_fault +--------------- +t +(1 row) + +-- And an insert transaction is blocked before assigning a segment +-- file for insertion +1&: INSERT INTO test_table_@orientation@ VALUES (1,1); + +select gp_wait_until_triggered_fault('before_creating_an_ao_hash_entry', 1, 1); +gp_wait_until_triggered_fault +----------------------------- +t +(1 row) + +select gp_inject_fault('vacuum_relation_open_relation_during_drop_phase', 'suspend', 1); +gp_inject_fault +--------------- +t +(1 row) +2&: VACUUM test_table_@orientation@; + +select gp_wait_until_triggered_fault('vacuum_relation_open_relation_during_drop_phase', 1, 1); +gp_wait_until_triggered_fault +----------------------------- +t +(1 row) + +-- Then vacuum should have completed compaction leaving segment file 1 +-- in default state on QD but in awaiting drop state on QEs +select segno, state from gp_ao_or_aocs_seg_name('test_table_@orientation@'); +segno|state +-----+----- +1 |1 +2 |1 +(2 rows) +0U: select segno, state from gp_ao_or_aocs_seg_name('test_table_@orientation@'); +segno|state +-----+----- +1 |2 +2 |1 +(2 rows) + +-- And the append only hash table no longer holds a record for test_table_@orientation@ +select remove_ao_entry_from_hash_table('test_table_@orientation@'::regclass::oid); +remove_ao_entry_from_hash_table +------------------------------- + +(1 row) + +-- And the insert fails +select gp_inject_fault('before_creating_an_ao_hash_entry', 'reset', 1); +gp_inject_fault +--------------- +t +(1 row) +1<: <... completed> +ERROR: cannot insert into segno (1) from AO relid 41063 that is in state AOSEG_STATE_AWAITING_DROP (appendonlyam.c:476) (seg2 127.0.1.1:25434 pid=9708) (appendonlyam.c:476) + +-- And the vacuum completes +select gp_inject_fault('vacuum_relation_open_relation_during_drop_phase', 'reset', 1); +gp_inject_fault +--------------- +t +(1 row) +2<: <... completed> +ERROR: expected segno (1) from AO relid 41063 in state COMPACTED_AWAITING_DROP but found in state 1 (appendonlywriter.c:768) + +-- Then inserts into test table should not be visible +select count(1) from test_table_@orientation@; +count +----- +12 +(1 row) + +-- And additional inserts should continue to fail +insert into test_table_@orientation@ values (1,1); +ERROR: cannot insert into segno (1) from AO relid 41063 that is in state AOSEG_STATE_AWAITING_DROP (appendonlyam.c:476) (seg2 127.0.1.1:25434 pid=9657) (appendonlyam.c:476) + +-- When the appendonly hash entry is removed +select remove_ao_entry_from_hash_table('test_table_@orientation@'::regclass::oid); +remove_ao_entry_from_hash_table +------------------------------- + +(1 row) + +-- Then inserts should begin to succeed +insert into test_table_@orientation@ values (1,1); +INSERT 1 -- GitLab