From 46d9521b6d1df0db74d41fa09347eff5f84749b9 Mon Sep 17 00:00:00 2001 From: Ashwin Agrawal Date: Wed, 23 Nov 2016 16:49:23 -0800 Subject: [PATCH] Update SharedLocalSnapshot correctly for subtransactions QE reader leverages SharedLocalSnapshot to perform visibility checks. QE writer is responsible to keep the SharedLocalSnapshot up to date. Before this fix, SharedLocalSnapshot was only updated by writer while acquiring the snapshot. But if transaction id is assigned to subtransaction after it has taken the snapshot, it was not reflected. Due to this when QE reader called TransactionIdIsCurrentTransactionId, it may get sometimes false based on timings for subtransaction ids used by QE writer to insert/update tuples. Hence to fix the situation, SharedLocalSnapshot is now updated when assigning transaction id and deregistered if subtransaction aborts. Also, adding faultinjector to suspend cursor QE reader instead of guc/sleep used in past. Moving cursor tests from bugbuster to ICG and adding deterministic test to exercise the behavior. Fixes #1276, reported by @pengzhout --- gpMgmt/bin/gppylib/programs/clsInjectFault.py | 1 + src/backend/access/transam/xact.c | 53 ++++++++++++++----- src/backend/storage/ipc/procarray.c | 3 +- src/backend/utils/misc/faultinjector.c | 2 + src/backend/utils/misc/guc_gp.c | 10 ---- src/include/cdb/cdbvars.h | 8 --- src/include/utils/faultinjector.h | 2 +- .../{bugbuster => }/expected/cursor.out | 32 ++++++++--- src/test/regress/expected/gpdtm_plpgsql.out | 6 ++- src/test/regress/greenplum_schedule | 1 + .../regress/{bugbuster => }/sql/cursor.sql | 27 ++++++---- src/test/regress/sql/gpdtm_plpgsql.sql | 6 ++- 12 files changed, 96 insertions(+), 55 deletions(-) rename src/test/regress/{bugbuster => }/expected/cursor.out (93%) rename src/test/regress/{bugbuster => }/sql/cursor.sql (91%) diff --git a/gpMgmt/bin/gppylib/programs/clsInjectFault.py b/gpMgmt/bin/gppylib/programs/clsInjectFault.py index 93b3315d0b..05eeaf6591 100644 --- a/gpMgmt/bin/gppylib/programs/clsInjectFault.py +++ b/gpMgmt/bin/gppylib/programs/clsInjectFault.py @@ -417,6 +417,7 @@ class GpInjectFaultProgram: "process_startup_packet (inject fault when processing startup packet during backend initialization), " \ "quickdie (inject fault when auxiliary processes quitting), " \ "after_one_slice_dispatched (inject fault after one slice was dispatched when dispatching plan), " \ + "cursor_qe_reader_after_snapshot (inject fault after QE READER has populated snashot for cursor)" \ "all (affects all faults injected, used for 'status' and 'reset'), ") addTo.add_option("-c", "--ddl_statement", dest="ddlStatement", type="string", metavar="ddlStatement", diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 5a93914c5a..052c4bb797 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -324,6 +324,11 @@ static void DispatchRollbackToSavepoint(char *name); static bool search_binary_xids(TransactionId *ids, uint32 size, TransactionId xid, int32 *index); +static void +AddSubtransactionsToSharedSnapshot(DistributedTransactionId dxid, + volatile SharedSnapshotSlot *shared_snapshot, + TransactionId *subxids, + uint32 subcnt); extern void FtsCondSetTxnReadOnly(bool *); @@ -535,6 +540,19 @@ AssignTransactionId(TransactionState s) { Assert(TransactionIdPrecedes(s->parent->transactionId, s->transactionId)); SubTransSetParent(s->transactionId, s->parent->transactionId); + elog((Debug_print_full_dtm ? LOG : DEBUG5), + "adding subtransaction xid %u to Shared Local Snapshot", + s->transactionId); + if (SharedLocalSnapshotSlot) + { + TransactionId temp_subxids[1]; + temp_subxids[0] = s->transactionId; + AddSubtransactionsToSharedSnapshot( + SharedLocalSnapshotSlot->QDxid, + SharedLocalSnapshotSlot, + &temp_subxids, + 1); + } } /* @@ -771,14 +789,11 @@ TransactionIdIsCurrentTransactionId(TransactionId xid) isCurrentTransactionId = false; /* Assume. */ /* - * Cursor readers cannot directly access the writer shared - * snapshot -- since it may have been modified by the writer - * since we were declared. But for normal reader if sub-transactions - * fit in-memory can leverage the writers array instead of subxbuf. + * For readers if sub-transactions fit in-memory can leverage the + * writers array instead of subxbuf. */ - if ( (!QEDtxContextInfo.cursorContext) && - (SharedLocalSnapshotSlot->inmemory_subcnt == - SharedLocalSnapshotSlot->total_subcnt)) + if (SharedLocalSnapshotSlot->inmemory_subcnt == + SharedLocalSnapshotSlot->total_subcnt) { for (sub = 0; sub < SharedLocalSnapshotSlot->total_subcnt; sub++) { @@ -2151,12 +2166,12 @@ SetSharedTransactionId_reader(TransactionId xid, CommandId cid) DistributedTransactionContext == DTX_CONTEXT_QE_ENTRY_DB_SINGLETON); /* - * GPDB_83_MERGE_FIXME: I don't understand how the top-level XID - * can ever be different here from what it was in StartTransaction(). - * And usually it isn't, but when I tried adding the below assertion, it was - * tripped. + * For DTX_CONTEXT_QE_READER or DTX_CONTEXT_QE_ENTRY_DB_SINGLETON, during + * StartTransaction(), currently we temporarily set the + * TopTransactionStateData.transactionId to what we find that time in + * SharedLocalSnapshot slot. Since, then QE writer could have moved-on and + * hence we reset the same to update to corrct value here. */ - //Assert(TopTransactionStateData.transactionId == xid); TopTransactionStateData.transactionId = xid; currentCommandId = cid; } @@ -6300,6 +6315,7 @@ static void CleanupSubTransaction(void) { TransactionState s = CurrentTransactionState; + TransactionId localXid = s->transactionId; ShowTransactionState("CleanupSubTransaction"); @@ -6320,6 +6336,19 @@ CleanupSubTransaction(void) s->state = TRANS_DEFAULT; PopTransaction(); + + /* + * While aborting the subtransaction and if we are writer, lets deregister + * ourselves from the SharedLocalSnapshot, so that calls to + * TransactionIdIsCurrentTransaction return FALSE for this transaction for + * QE readers. + */ + if (SharedLocalSnapshotSlot && TransactionIdIsValid(localXid) && + (DistributedTransactionContext != DTX_CONTEXT_QE_READER && + DistributedTransactionContext != DTX_CONTEXT_QE_ENTRY_DB_SINGLETON)) + { + UpdateSubtransactionsInSharedSnapshot(SharedLocalSnapshotSlot->QDxid); + } } /* diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index a41d57e1c2..bc9fe56f05 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -1187,8 +1187,7 @@ GetSnapshotData(Snapshot snapshot, bool serializable) { readSharedLocalSnapshot_forCursor(snapshot); - if (gp_enable_slow_cursor_testmode) - pg_usleep(2 * 1000 * 1000); /* 1 sec. */ + SIMPLE_FAULT_INJECTOR(CursorQEReaderAfterSnapshot); return snapshot; } diff --git a/src/backend/utils/misc/faultinjector.c b/src/backend/utils/misc/faultinjector.c index 580e83471f..1a7d216517 100644 --- a/src/backend/utils/misc/faultinjector.c +++ b/src/backend/utils/misc/faultinjector.c @@ -325,6 +325,8 @@ FaultInjectorIdentifierEnumToString[] = { /* inject fault in cdbdisp_dispatchX*/ _("interconnect_stop_ack_is_lost"), /* inject fault in interconnect to skip sending the stop ack */ + _("cursor_qe_reader_after_snapshot"), + /* inject fault after QE READER has populated snashot for cursor */ _("not recognized"), }; diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index 0e7022c5b7..dbf9fa01bf 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -1087,16 +1087,6 @@ struct config_bool ConfigureNamesBool_gp[] = false, NULL, NULL }, - { - {"gp_enable_slow_cursor_testmode", PGC_USERSET, DEVELOPER_OPTIONS, - gettext_noop("Slow down cursor gangs -- to facilitate race-condition testing."), - NULL, - GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE - }, - &gp_enable_slow_cursor_testmode, - false, NULL, NULL - }, - { {"gp_fts_probe_pause", PGC_SUSET, DEVELOPER_OPTIONS, gettext_noop("Stop active probes."), diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h index 4a2fc95b8d..1cc5e7cee5 100644 --- a/src/include/cdb/cdbvars.h +++ b/src/include/cdb/cdbvars.h @@ -201,14 +201,6 @@ extern int pgstat_track_activity_query_size; */ extern bool gp_enable_slow_writer_testmode; - -/* - * MPP-8622: - * In order to facilitate testing of reader-gang/writer-gang synchronization - * it is very handy to slow down the cursor (opens important race-window). - */ -extern bool gp_enable_slow_cursor_testmode; - /* * MPP-6926: Resource Queues on by default */ diff --git a/src/include/utils/faultinjector.h b/src/include/utils/faultinjector.h index f5212004ab..d3203f2e0a 100644 --- a/src/include/utils/faultinjector.h +++ b/src/include/utils/faultinjector.h @@ -216,7 +216,7 @@ typedef enum FaultInjectorIdentifier_e { AfterOneSliceDispatched, InterconnectStopAckIsLost, - + CursorQEReaderAfterSnapshot, /* INSERT has to be done before that line */ FaultInjectorIdMax, diff --git a/src/test/regress/bugbuster/expected/cursor.out b/src/test/regress/expected/cursor.out similarity index 93% rename from src/test/regress/bugbuster/expected/cursor.out rename to src/test/regress/expected/cursor.out index 8edb9366f1..69a5597e35 100644 --- a/src/test/regress/bugbuster/expected/cursor.out +++ b/src/test/regress/expected/cursor.out @@ -12,11 +12,6 @@ CREATE TABLE lu_customer ( ); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'customer_id' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. --- QA-838 or MPP-8622 --- Added the following GUC + tables are created without random distribution --- Test case might be still intermittently failed --- Ngoc -set GP_ENABLE_SLOW_CURSOR_TESTMODE=ON; BEGIN; SET TRANSACTION ISOLATION LEVEL SERIALIZABLE; CREATE TABLE cursor (a int, b int) distributed by (b); @@ -77,6 +72,31 @@ fetch from foo; (1 row) abort; +-- Test to validate cursor QE reader is correctly able to perform visibility in +-- subtransaction, even after QE writer has moved ahead and updated the tuple +CREATE TABLE cursor_writer_reader (a int, b int) DISTRIBUTED BY (a); +\! gpfaultinjector -q -f cursor_qe_reader_after_snapshot -y suspend --seg_dbid 2 +BEGIN; +INSERT INTO cursor_writer_reader VALUES(1, 666); +DECLARE cursor_c2 CURSOR FOR SELECT * FROM cursor_writer_reader WHERE b=666 ORDER BY 1; +SAVEPOINT x; +UPDATE cursor_writer_reader SET b=333 WHERE b=666; +\! gpfaultinjector -f cursor_qe_reader_after_snapshot -y status --seg_dbid 2 | grep triggered | uniq | wc -l + 1 +\! gpfaultinjector -q -f cursor_qe_reader_after_snapshot -y resume --seg_dbid 2 +FETCH cursor_c2; + a | b +---+----- + 1 | 666 +(1 row) + +SELECT * FROM cursor_writer_reader WHERE b=666 ORDER BY 1; + a | b +---+--- +(0 rows) + +END; +\! gpfaultinjector -q -f cursor_qe_reader_after_snapshot -y reset --seg_dbid 2 -- start_ignore ------------------------------------------------------------------------------ -- LAST MODIFIED: @@ -101,7 +121,6 @@ ERROR: schema "y_schema" does not exist drop schema if exists y_schema; NOTICE: schema "y_schema" does not exist, skipping --end_ignore -set GP_ENABLE_SLOW_CURSOR_TESTMODE=ON; create schema y_schema; create table y_schema.y (a int, b int) distributed by (a); Begin; @@ -278,7 +297,6 @@ ERROR: schema "z_schema" does not exist drop schema if exists z_schema; NOTICE: schema "z_schema" does not exist, skipping --end_ignore -set GP_ENABLE_SLOW_CURSOR_TESTMODE=ON; create schema z_schema; --create table z_schema.y (a int, b int) distributed randomly; create table z_schema.y (a int, b int) distributed by (a); diff --git a/src/test/regress/expected/gpdtm_plpgsql.out b/src/test/regress/expected/gpdtm_plpgsql.out index e87b5c8667..4937000210 100644 --- a/src/test/regress/expected/gpdtm_plpgsql.out +++ b/src/test/regress/expected/gpdtm_plpgsql.out @@ -413,8 +413,10 @@ select * from dtm_plpg_baz order by 1; abort; DROP TABLE dtm_plpg_foo; --- try to guarantee race-condition failures on the tests below. -SET GP_ENABLE_SLOW_CURSOR_TESTMODE=on; +-- Need to check what these tests wish to validate, better to use more +-- deterministic way than sleep. GP_ENABLE_SLOW_CURSOR_TESTMODE GUC was used +-- here to slow down reader gangs, removed the same as its not good way to write +-- the tests. BEGIN; SET TRANSACTION ISOLATION LEVEL SERIALIZABLE; CREATE TABLE dtm_plpg_foo (a int, b int); diff --git a/src/test/regress/greenplum_schedule b/src/test/regress/greenplum_schedule index d1124fc016..c1c8d4943b 100755 --- a/src/test/regress/greenplum_schedule +++ b/src/test/regress/greenplum_schedule @@ -34,6 +34,7 @@ test: dispatch test: segspace_setup test: segspace test: segspace_cleanup +test: cursor # 'query_finish_pending' sets QueryFinishPending flag to true during query execution using fault injectors # so it needs to be in a group by itself diff --git a/src/test/regress/bugbuster/sql/cursor.sql b/src/test/regress/sql/cursor.sql similarity index 91% rename from src/test/regress/bugbuster/sql/cursor.sql rename to src/test/regress/sql/cursor.sql index ef4645d809..4fa5aa5853 100644 --- a/src/test/regress/bugbuster/sql/cursor.sql +++ b/src/test/regress/sql/cursor.sql @@ -10,13 +10,6 @@ CREATE TABLE lu_customer ( cust_city_id numeric(28,0) ); --- QA-838 or MPP-8622 --- Added the following GUC + tables are created without random distribution --- Test case might be still intermittently failed --- Ngoc - -set GP_ENABLE_SLOW_CURSOR_TESTMODE=ON; - BEGIN; SET TRANSACTION ISOLATION LEVEL SERIALIZABLE; CREATE TABLE cursor (a int, b int) distributed by (b); @@ -53,6 +46,22 @@ rollback to x; fetch from foo; abort; +-- Test to validate cursor QE reader is correctly able to perform visibility in +-- subtransaction, even after QE writer has moved ahead and updated the tuple +CREATE TABLE cursor_writer_reader (a int, b int) DISTRIBUTED BY (a); +\! gpfaultinjector -q -f cursor_qe_reader_after_snapshot -y suspend --seg_dbid 2 +BEGIN; +INSERT INTO cursor_writer_reader VALUES(1, 666); +DECLARE cursor_c2 CURSOR FOR SELECT * FROM cursor_writer_reader WHERE b=666 ORDER BY 1; +SAVEPOINT x; +UPDATE cursor_writer_reader SET b=333 WHERE b=666; +\! gpfaultinjector -f cursor_qe_reader_after_snapshot -y status --seg_dbid 2 | grep triggered | uniq | wc -l +\! gpfaultinjector -q -f cursor_qe_reader_after_snapshot -y resume --seg_dbid 2 +FETCH cursor_c2; +SELECT * FROM cursor_writer_reader WHERE b=666 ORDER BY 1; +END; +\! gpfaultinjector -q -f cursor_qe_reader_after_snapshot -y reset --seg_dbid 2 + -- start_ignore ------------------------------------------------------------------------------ @@ -77,8 +86,6 @@ drop table if exists y_schema.y; drop schema if exists y_schema; --end_ignore -set GP_ENABLE_SLOW_CURSOR_TESTMODE=ON; - create schema y_schema; create table y_schema.y (a int, b int) distributed by (a); Begin; @@ -160,8 +167,6 @@ drop table if exists z_schema.y; drop schema if exists z_schema; --end_ignore -set GP_ENABLE_SLOW_CURSOR_TESTMODE=ON; - create schema z_schema; --create table z_schema.y (a int, b int) distributed randomly; create table z_schema.y (a int, b int) distributed by (a); diff --git a/src/test/regress/sql/gpdtm_plpgsql.sql b/src/test/regress/sql/gpdtm_plpgsql.sql index 1f673db183..2553a43106 100644 --- a/src/test/regress/sql/gpdtm_plpgsql.sql +++ b/src/test/regress/sql/gpdtm_plpgsql.sql @@ -205,8 +205,10 @@ abort; DROP TABLE dtm_plpg_foo; --- try to guarantee race-condition failures on the tests below. -SET GP_ENABLE_SLOW_CURSOR_TESTMODE=on; +-- Need to check what these tests wish to validate, better to use more +-- deterministic way than sleep. GP_ENABLE_SLOW_CURSOR_TESTMODE GUC was used +-- here to slow down reader gangs, removed the same as its not good way to write +-- the tests. BEGIN; SET TRANSACTION ISOLATION LEVEL SERIALIZABLE; -- GitLab