From 5222ad86584e807b725f8b5d0e6f8a04396101b3 Mon Sep 17 00:00:00 2001 From: Paul Guo Date: Tue, 28 Apr 2020 13:23:41 +0800 Subject: [PATCH] Let Fts tolerate the in-progress 'starting up' case on primary nodes. commit d453a4a implemented that for the crash recovery case (not marking the node down and then not promoting the mirror). It seems that we should do that for the usual "starting up" case also(i.e. CAC_STARTUP), besides for the existing "in recovery mode" case (i.e. CAC_RECOVERY). We've seen that fts promotes the "starting up" primary during isolation2 testing due to 'pg_ctl restart'. In this patch we check recovery progress for both CAC_STARTUP an CAC_RECOVERY during fts probe and thus can avoid this. Reviewed-by: Ashwin Agrawal cherry-picked from d71b3afd8ebe4dcc50a9021d04122ab5ab8f4dfe On master the commit message was eliminated by mistake. Added back on gpdb6. --- src/backend/fts/ftsprobe.c | 6 ++++-- src/backend/postmaster/postmaster.c | 7 ++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/backend/fts/ftsprobe.c b/src/backend/fts/ftsprobe.c index c8824e74bd..6437cda267 100644 --- a/src/backend/fts/ftsprobe.c +++ b/src/backend/fts/ftsprobe.c @@ -200,7 +200,8 @@ ftsConnectStart(fts_segment_info *ftsInfo) static void checkIfFailedDueToRecoveryInProgress(fts_segment_info *ftsInfo) { - if (strstr(PQerrorMessage(ftsInfo->conn), _(POSTMASTER_IN_RECOVERY_MSG))) + if (strstr(PQerrorMessage(ftsInfo->conn), _(POSTMASTER_IN_RECOVERY_MSG)) || + strstr(PQerrorMessage(ftsInfo->conn), _(POSTMASTER_IN_STARTUP_MSG))) { XLogRecPtr tmpptr; char *ptr = strstr(PQerrorMessage(ftsInfo->conn), @@ -1068,7 +1069,8 @@ processResponse(fts_context *context) /* If primary is in recovery, do not mark it down and promote mirror */ if (ftsInfo->recovery_making_progress) { - Assert(strstr(PQerrorMessage(ftsInfo->conn), _(POSTMASTER_IN_RECOVERY_MSG))); + Assert(strstr(PQerrorMessage(ftsInfo->conn), _(POSTMASTER_IN_RECOVERY_MSG)) || + strstr(PQerrorMessage(ftsInfo->conn), _(POSTMASTER_IN_STARTUP_MSG))); elogif(gp_log_fts >= GPVARS_VERBOSITY_VERBOSE, LOG, "FTS: detected segment is in recovery mode and making " "progress (content=%d) primary dbid=%d, mirror dbid=%d", diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 1ae386fd20..c3580630b9 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -2519,9 +2519,14 @@ retry1: case CAC_STARTUP: if ((am_ftshandler || IsFaultHandler) && am_mirror) break; + + recptr = last_xlog_replay_location(); + ereport(FATAL, (errcode(ERRCODE_CANNOT_CONNECT_NOW), - errmsg(POSTMASTER_IN_STARTUP_MSG))); + errmsg(POSTMASTER_IN_STARTUP_MSG), + errdetail(POSTMASTER_IN_RECOVERY_DETAIL_MSG " %X/%X", + (uint32) (recptr >> 32), (uint32) recptr))); break; case CAC_SHUTDOWN: ereport(FATAL, -- GitLab