From af3bbc3e5380600a2e83953748a037ab8f615e26 Mon Sep 17 00:00:00 2001 From: Paul Guo Date: Tue, 3 Dec 2019 15:24:11 +0800 Subject: [PATCH] Do not terminate the connection and quit in SyncRepWaitForLSN(). We previously had the code to terminate the connection if needed on QE to avoid potential data inconsistency. This is gpdb specific since upstream code there seems to be not friendly for failover + data consistency. However that introduces various abort or assert failures since apparently some shm exit callback functions are not friendly to the current transaction state. Below are some stack examples. Originally I fixed them in those callback functions but I found on gpdb6 after I fixed one, another one (in another callback function) come out. That's why I could collect so many gpdb 6 stacks below. I just collect one gpdb master stack, but I it should have more stacks also if we fixing in those callbacks one by one. Anyway finally I decide to fix by delaying the ereport(FATAL) exec_mpp_dtx_protocol_command() instead, and let QD retry 2PC to ensure the data consistency. Note 1PC retry is currently not implemented but this should be in another PR. gpdb master (7) stack: 2 0x0000000000b48ddc in ExceptionalCondition (conditionName=0xe527e8 "!(ShmemAddrIsValid(nextElem))", errorType=0xe527bd "FailedAssertion", fileName=0xe527b2 "shmqueue.c", lineNumber=74) at assert.c:66 3 0x0000000000996311 in SHMQueueDelete (queue=0x7ff5e6676da8) at shmqueue.c:74 4 0x00000000009689de in SyncRepCleanupAtProcExit () at syncrep.c:436 5 0x00000000009a7b49 in ProcKill (code=1, arg=0) at proc.c:949 6 0x000000000098c001 in shmem_exit (code=1) at ipc.c:288 7 0x000000000098be5f in proc_exit_prepare (code=1) at ipc.c:212 8 0x000000000098bd64 in proc_exit (code=1) at ipc.c:104 9 0x0000000000b4a7d4 in errfinish (dummy=0) at elog.c:738 10 0x000000000096860e in SyncRepWaitForLSN (lsn=210148624, commit=1 '\001') at syncrep.c:303 11 0x000000000055c082 in RecordTransactionCommitPrepared (xid=638, gid=0x2c603ad "1575462785-0000000012", nchildren=0, children=0x2c6d2d0, nrels=0, rels=0x2c6d2d0, ndeldbs=0, deldbs=0x2c6d2d0, ninvalmsgs=0, invalmsgs=0x2c6d2d0, initfileinval=0 '\000') at twophase.c:2283 12 0x000000000055aae3 in FinishPreparedTransaction (gid=0x2c603ad "1575462785-0000000012", isCommit=1 '\001', raiseErrorIfNotFound=0 '\000') at twophase.c:1493 13 0x0000000000c4e4fe in performDtxProtocolCommitPrepared (gid=0x2c603ad "1575462785-0000000012", raiseErrorIfNotFound=0 '\000') at cdbtm.c:2037 14 0x0000000000c4e9d5 in performDtxProtocolCommand (dtxProtocolCommand=DTX_PROTOCOL_COMMAND_RECOVERY_COMMIT_PREPARED, gid=0x2c603ad "1575462785-0000000012", contextInfo=0x1220f20) at cdbtm.c:2215 gpdb 6 stacks: 2 0x0000000000ad9ea5 in ExceptionalCondition (conditionName=0xdbfddb "!(MyProc->syncRepState == 0)", errorType=0xdbfd28 "FailedAssertion", fileName=0xdbfcd0 "syncrep.c", lineNumber=130) at assert.c:66 3 0x000000000091ce81 in SyncRepWaitForLSN (XactCommitLSN=3400317528) at syncrep.c:130 4 0x000000000053991a in RecordTransactionCommit () at xact.c:1663 5 0x000000000053b0b2 in CommitTransaction () at xact.c:2756 6 0x000000000053c024 in CommitTransactionCommand () at xact.c:3646 7 0x00000000005c6c25 in RemoveTempRelationsCallback (code=1, arg=0) at namespace.c:4107 8 0x000000000093c353 in shmem_exit (code=1) at ipc.c:257 9 0x000000000093c248 in proc_exit_prepare (code=1) at ipc.c:214 10 0x000000000093c146 in proc_exit (code=1) at ipc.c:104 11 0x0000000000adb93d in errfinish (dummy=0) at elog.c:754 12 0x000000000091d2ef in SyncRepWaitForLSN (XactCommitLSN=3400294096) at syncrep.c:284 13 0x0000000000549d8e in EndPrepare (gxact=0x7f8a7d5fa0e0) at twophase.c:1241 3 0x0000000000ade6d1 in elog_finish (elevel=22, fmt=0xc3a898 "cannot abort transaction %u, it was already committed") at elog.c:1735 4 0x0000000000539d22 in RecordTransactionAbort (isSubXact=0 '\000') at xact.c:1923 5 0x000000000053b95c in AbortTransaction () at xact.c:3340 6 0x000000000053e0a7 in AbortOutOfAnyTransaction () at xact.c:5248 7 0x00000000005c68b9 in RemoveTempRelationsCallback (code=1, arg=0) at namespace.c:4088 8 0x000000000093c371 in shmem_exit (code=1) at ipc.c:257 9 0x000000000093c266 in proc_exit_prepare (code=1) at ipc.c:214 10 0x000000000093c164 in proc_exit (code=1) at ipc.c:104 11 0x0000000000adb94e in errfinish (dummy=0) at elog.c:754 12 0x000000000091d30d in SyncRepWaitForLSN (XactCommitLSN=19529538376) at syncrep.c:284 13 0x000000000053985a in RecordTransactionCommit () at xact.c:1663 2 0x0000000000adb9a9 in ExceptionalCondition (conditionName=0xdb2560 "!(entry->trans == ((void *)0))", errorType=0xdb2550 "FailedAssertion", fileName=0xdb216a "pgstat.c", lineNumber=842) at assert.c:66 3 0x00000000008d3391 in pgstat_report_stat (force=1 '\001') at pgstat.c:842 4 0x00000000008d65e8 in pgstat_beshutdown_hook (code=1, arg=0) at pgstat.c:2685 5 0x000000000093deba in shmem_exit (code=1) at ipc.c:290 6 0x000000000093dd1a in proc_exit_prepare (code=1) at ipc.c:214 7 0x000000000093dc18 in proc_exit (code=1) at ipc.c:104 8 0x0000000000add441 in errfinish (dummy=0) at elog.c:750 9 0x000000000091ee5c in SyncRepWaitForLSN (XactCommitLSN=225227432) at syncrep.c:333 10 0x0000000000549dd8 in EndPrepare (gxact=0x7f02508680e0) at twophase.c:1241 2 0x0000000000adb9c2 in ExceptionalCondition (conditionName=0xdcb458 "!(!((allPgXact[proc->pgprocno].xid) != ((TransactionId) 0)))", errorType=0xdcb408 "FailedAssertion", fileName=0xdcb3d9 "procarray.c", lineNumber=369) at assert.c:66 3 0x000000000093f614 in ProcArrayRemove (proc=0x7f4f1f5a05d0, latestXid=0) at procarray.c:369 4 0x00000000009586ec in RemoveProcFromArray (code=1, arg=0) at proc.c:904 5 0x000000000093ded3 in shmem_exit (code=1) at ipc.c:290 6 0x000000000093dd33 in proc_exit_prepare (code=1) at ipc.c:214 7 0x000000000093dc31 in proc_exit (code=1) at ipc.c:104 8 0x0000000000add45a in errfinish (dummy=0) at elog.c:750 9 0x000000000091ee75 in SyncRepWaitForLSN (XactCommitLSN=348629504) at syncrep.c:333 10 0x0000000000549dd8 in EndPrepare (gxact=0x7f4f1fa8cce0) at twophase.c:1241 11 0x000000000053b621 in PrepareTransaction () at xact.c:3115 Reviewed-by: Ashwin Agrawal Reviewed-by: Asim R P Cherry-picked from 7b76173061871c4d6bbe974584917295cadf75df --- src/backend/replication/syncrep.c | 16 ++- src/backend/tcop/postgres.c | 10 ++ .../die_commit_pending_replication.out | 107 ++++++++++++++++++ src/test/isolation2/isolation2_schedule | 2 + .../die_commit_pending_replication.sql | 52 +++++++++ 5 files changed, 182 insertions(+), 5 deletions(-) create mode 100644 src/test/isolation2/expected/segwalrep/die_commit_pending_replication.out create mode 100644 src/test/isolation2/sql/segwalrep/die_commit_pending_replication.sql diff --git a/src/backend/replication/syncrep.c b/src/backend/replication/syncrep.c index 1aff542ec7..8276d27d9c 100644 --- a/src/backend/replication/syncrep.c +++ b/src/backend/replication/syncrep.c @@ -263,6 +263,8 @@ SyncRepWaitForLSN(XLogRecPtr XactCommitLSN, bool commit) break; } + SIMPLE_FAULT_INJECTOR("sync_rep_query_die"); + /* * If a wait for synchronous replication is pending, we can neither * acknowledge the commit nor raise ERROR or FATAL. The latter would @@ -278,13 +280,14 @@ SyncRepWaitForLSN(XLogRecPtr XactCommitLSN, bool commit) if (ProcDiePending) { /* - * FATAL only for QE's which use 2PC and hence can handle the - * FATAL and retry. + * For QE we should have done FATAL here so that 2PC can retry, but + * FATAL here makes some shm exit callback functions panic or + * assert fail because the transaction is still not finished, so + * let's defer the quitting to exec_mpp_dtx_protocol_command(). */ - ereport(IS_QUERY_DISPATCHER() ? WARNING:FATAL, + ereport(WARNING, (errcode(ERRCODE_ADMIN_SHUTDOWN), - errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"), - errdetail("The transaction has already committed locally, but might not have been replicated to the standby."))); + errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"))); whereToSendOutput = DestNone; SyncRepCancelWait(); break; @@ -320,6 +323,9 @@ SyncRepWaitForLSN(XLogRecPtr XactCommitLSN, bool commit) if (!PostmasterIsAlive()) { ProcDiePending = true; + ereport(WARNING, + (errcode(ERRCODE_ADMIN_SHUTDOWN), + errmsg("canceling the wait for synchronous replication and terminating connection due to postmaster death"))); whereToSendOutput = DestNone; SyncRepCancelWait(); break; diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 888c31e259..a54ac7e3a3 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -1523,6 +1523,16 @@ exec_mpp_dtx_protocol_command(DtxProtocolCommand dtxProtocolCommand, Debug_dtm_action, DtxProtocolCommandToString(dtxProtocolCommand)))); } + /* + * GPDB: There is a corner case that we need to delay connection + * termination to here. see SyncRepWaitForLSN() for details. + * */ + if (ProcDiePending) + ereport(FATAL, + (errcode(ERRCODE_ADMIN_SHUTDOWN), + errmsg("Terminating the connection (DTM protocol command '%s' " + "for gid=%s", loggingStr, gid))); + EndCommand(commandTag, dest); } diff --git a/src/test/isolation2/expected/segwalrep/die_commit_pending_replication.out b/src/test/isolation2/expected/segwalrep/die_commit_pending_replication.out new file mode 100644 index 0000000000..28d0e53b4a --- /dev/null +++ b/src/test/isolation2/expected/segwalrep/die_commit_pending_replication.out @@ -0,0 +1,107 @@ +-- Adding `2` as first column as the distribution column. +-- `2` should be on the first segment. let's double check here. +create table store_session_id(a int, sess_id int); +CREATE +1: insert into store_session_id select 2, sess_id from pg_stat_activity where pid = pg_backend_pid(); +INSERT 1 +1: select gp_segment_id, a from store_session_id; + gp_segment_id | a +---------------+--- + 0 | 2 +(1 row) + +1: create table die_commit_pending_replication(a int, b int); +CREATE + +-- Suspend to hit commit-prepared point on segment (as we are +-- interested in testing Commit here and not really Prepare) +select gp_inject_fault_infinite('finish_prepared_start_of_function', 'suspend', dbid) from gp_segment_configuration where role='p' and content = 0; + gp_inject_fault_infinite +-------------------------- + Success: +(1 row) +1&: insert into die_commit_pending_replication values(2),(1); +select gp_wait_until_triggered_fault('finish_prepared_start_of_function', 1, dbid) from gp_segment_configuration where role='p' and content = 0; + gp_wait_until_triggered_fault +------------------------------- + Success: +(1 row) + +-- Now pause the wal sender on primary for content 0 +select gp_inject_fault_infinite('wal_sender_loop', 'suspend', dbid) from gp_segment_configuration where role='p' and content = 0; + gp_inject_fault_infinite +-------------------------- + Success: +(1 row) + +-- Insert fault in the ProcDiePending code block. +select gp_inject_fault_infinite('sync_rep_query_die', 'skip', dbid) from gp_segment_configuration where role='p' and content = 0; + gp_inject_fault_infinite +-------------------------- + Success: +(1 row) + +-- Let the transaction move forward with the commit +select gp_inject_fault('finish_prepared_start_of_function', 'reset', dbid) from gp_segment_configuration where role='p' and content = 0; + gp_inject_fault +----------------- + Success: +(1 row) + +-- Ensure the ProcDiePending code block is called. It implies 'replication' in pg_stat_activity. +select gp_wait_until_triggered_fault('sync_rep_query_die', 1, dbid) from gp_segment_configuration where role='p' and content = 0; + gp_wait_until_triggered_fault +------------------------------- + Success: +(1 row) + +-- We can terminate the backend on QE now. +0U: select pg_terminate_backend(pid) from pg_stat_activity where waiting_reason='replication' and sess_id in (select sess_id from store_session_id); + pg_terminate_backend +---------------------- + t +(1 row) + +-- We expect two more occurrence: one for backend quitting and another for retry. +select gp_wait_until_triggered_fault('sync_rep_query_die', 3, dbid) from gp_segment_configuration where role='p' and content = 0; + gp_wait_until_triggered_fault +------------------------------- + Success: +(1 row) + +-- Verify that the sess_id changes due to retry. +0U: select pid,sess_id,waiting_reason,query from pg_stat_activity where sess_id in (select sess_id from store_session_id); + pid | sess_id | waiting_reason | query +-----+---------+----------------+------- +(0 rows) + +-- resume the primary wal replication so that retry could complete. +select gp_inject_fault('wal_sender_loop', 'reset', dbid) from gp_segment_configuration where role='p' and content = 0; + gp_inject_fault +----------------- + Success: +(1 row) + +-- reset sync_rep_query_die +select gp_inject_fault('sync_rep_query_die', 'reset', dbid) from gp_segment_configuration where role='p' and content = 0; + gp_inject_fault +----------------- + Success: +(1 row) + +1<: <... completed> +INSERT 2 + +-- check if the insert fails or not. +select gp_segment_id, * from die_commit_pending_replication; + gp_segment_id | a | b +---------------+---+--- + 0 | 2 | + 1 | 1 | +(2 rows) + +-- cleanup +drop table die_commit_pending_replication; +DROP +drop table store_session_id; +DROP diff --git a/src/test/isolation2/isolation2_schedule b/src/test/isolation2/isolation2_schedule index c68236ca23..05224adf1c 100644 --- a/src/test/isolation2/isolation2_schedule +++ b/src/test/isolation2/isolation2_schedule @@ -179,6 +179,8 @@ test: uao/insert_should_not_use_awaiting_drop_column test: add_column_after_vacuum_skip_drop_column test: vacuum_after_vacuum_skip_drop_column +test: segwalrep/die_commit_pending_replication + # Tests for FTS test: fts_errors test: segwalrep/commit_blocking diff --git a/src/test/isolation2/sql/segwalrep/die_commit_pending_replication.sql b/src/test/isolation2/sql/segwalrep/die_commit_pending_replication.sql new file mode 100644 index 0000000000..f34bde11b8 --- /dev/null +++ b/src/test/isolation2/sql/segwalrep/die_commit_pending_replication.sql @@ -0,0 +1,52 @@ +-- Adding `2` as first column as the distribution column. +-- `2` should be on the first segment. let's double check here. +create table store_session_id(a int, sess_id int); +1: insert into store_session_id select 2, sess_id from pg_stat_activity where pid = pg_backend_pid(); +1: select gp_segment_id, a from store_session_id; + +1: create table die_commit_pending_replication(a int, b int); + +-- Suspend to hit commit-prepared point on segment (as we are +-- interested in testing Commit here and not really Prepare) +select gp_inject_fault_infinite('finish_prepared_start_of_function', 'suspend', dbid) from gp_segment_configuration where role='p' and content = 0; +1&: insert into die_commit_pending_replication values(2),(1); +select gp_wait_until_triggered_fault('finish_prepared_start_of_function', 1, dbid) from gp_segment_configuration where role='p' and content = 0; + +-- Now pause the wal sender on primary for content 0 +select gp_inject_fault_infinite('wal_sender_loop', 'suspend', dbid) from gp_segment_configuration where role='p' and content = 0; + +-- Insert fault in the ProcDiePending code block. +select gp_inject_fault_infinite('sync_rep_query_die', 'skip', dbid) from gp_segment_configuration where role='p' and content = 0; + +-- Let the transaction move forward with the commit +select gp_inject_fault('finish_prepared_start_of_function', 'reset', dbid) from gp_segment_configuration where role='p' and content = 0; + +-- Ensure the ProcDiePending code block is called. It implies 'replication' in pg_stat_activity. +select gp_wait_until_triggered_fault('sync_rep_query_die', 1, dbid) from gp_segment_configuration where role='p' and content = 0; + +-- We can terminate the backend on QE now. +0U: select pg_terminate_backend(pid) from pg_stat_activity + where waiting_reason='replication' and + sess_id in (select sess_id from store_session_id); + +-- We expect two more occurrence: one for backend quitting and another for retry. +select gp_wait_until_triggered_fault('sync_rep_query_die', 3, dbid) from gp_segment_configuration where role='p' and content = 0; + +-- Verify that the sess_id changes due to retry. +0U: select pid,sess_id,waiting_reason,query from pg_stat_activity + where sess_id in (select sess_id from store_session_id); + +-- resume the primary wal replication so that retry could complete. +select gp_inject_fault('wal_sender_loop', 'reset', dbid) from gp_segment_configuration where role='p' and content = 0; + +-- reset sync_rep_query_die +select gp_inject_fault('sync_rep_query_die', 'reset', dbid) from gp_segment_configuration where role='p' and content = 0; + +1<: + +-- check if the insert fails or not. +select gp_segment_id, * from die_commit_pending_replication; + +-- cleanup +drop table die_commit_pending_replication; +drop table store_session_id; -- GitLab