Remove DB_IN_STANDBY_MODE, DB_IN_STANDBY_PROMOTED and use DB_IN_ARCHIVE_RECOVERY instead (#6725)

* Remove DB_IN_STANDBY_MODE, DB_IN_STANDBY_PROMOTED and use DB_IN_ARCHIVE_RECOVERY instead Co-authored-by: N Ashwin Agrawal <aagrawal@pivotal.io> Reviewed-by: N Paul Guo <pguo@pivotal.io>

Remove DB_IN_STANDBY_MODE, DB_IN_STANDBY_PROMOTED and use DB_IN_ARCHIVE_RECOVERY instead (#6725)
* Remove DB_IN_STANDBY_MODE, DB_IN_STANDBY_PROMOTED and use DB_IN_ARCHIVE_RECOVERY instead Co-authored-by: N Ashwin Agrawal <aagrawal@pivotal.io> Reviewed-by: N Paul Guo <pguo@pivotal.io>
27e43d3b · Shaoqi Bai · GitHub · 23527ae5 · 27e43d3b · 27e43d3b
5 changed file
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -6457,18 +6457,6 @@ StartupXLOG(void)
 				   str_time(ControlFile->time)),
 			errhint("This probably means that some data is corrupted and"
 					" you will have to use the last backup for recovery.")));
-	else if (ControlFile->state == DB_IN_STANDBY_MODE)
-		ereport(LOG,
-				(errmsg("database system was interrupted while in standby mode at  %s",
-						str_time(ControlFile->checkPointCopy.time)),
-						errhint("This probably means something unexpected happened either"
-								" during replay at standby or receipt of XLog from primary.")));
-	else if (ControlFile->state == DB_IN_STANDBY_PROMOTED)
-		ereport(LOG,
-				(errmsg("database system was interrupted after standby was promoted at %s",
-						str_time(ControlFile->checkPointCopy.time)),
-				 errhint("If this has occurred more than once something unexpected is happening"
-				" after standby has been promoted")));
 	else if (ControlFile->state == DB_IN_PRODUCTION)
 		ereport(LOG,
 				(errmsg("database system was interrupted; last known up at %s",
@@ -6525,20 +6513,6 @@ StartupXLOG(void)
 	 */
 	readRecoveryCommandFile();

-	if (StandbyModeRequested)
-	{
-		Assert(ControlFile->state != DB_IN_CRASH_RECOVERY);
-
-		/*
-		 * If the standby was promoted (last time) and recovery.conf
-		 * is still found this time with standby mode request,
-		 * it means the standby crashed post promotion but before recovery.conf
-		 * cleanup. Hence, it is not considered a standby request this time.
-		 */
-		if (ControlFile->state == DB_IN_STANDBY_PROMOTED)
-			StandbyModeRequested = false;
-	}
-
 	/*
 	 * Save archive_cleanup_command in shared memory so that other processes
 	 * can see it.
@@ -6598,23 +6572,13 @@ StartupXLOG(void)
 		 * archive recovery directly.
 		 */
 		InArchiveRecovery = true;
-
-		/*
-		 * Currently, it is assumed that a backup file exists iff a base backup
-		 * has been performed and then the recovery.conf file is generated, thus
-		 * standby mode has to be requested
-		 */
-		if (!StandbyModeRequested)
-			ereport(FATAL,
-					(errmsg("Found backup.label file without any standby mode request")));
-
-		/* Activate recovery in standby mode */
-		StandbyMode = true;
-
+		if (StandbyModeRequested)
+			StandbyMode = true;
 		/*
 		 * When a backup_label file is present, we want to roll forward from
 		 * the checkpoint it identifies, rather than using pg_control.
 		 */
+
 		record = ReadCheckpointRecord(xlogreader, checkPointLoc, 0, true);
 		if (record != NULL)
 		{
@@ -6651,12 +6615,6 @@ StartupXLOG(void)
 	}
 	else
 	{
-		if (StandbyModeRequested)
-		{
-			/* Activate recovery in standby mode */
-			StandbyMode = true;
-		}
-
 		/*
 		 * It's possible that archive recovery was requested, but we don't
 		 * know how far we need to replay the WAL before we reach consistency.
@@ -6888,17 +6846,17 @@ StartupXLOG(void)
 					(errmsg("invalid redo record in shutdown checkpoint")));
 		InRecovery = true;
 	}
-	else if (StandbyModeRequested)
+	else if (ControlFile->state != DB_SHUTDOWNED)
+		InRecovery = true;
+	else if (ArchiveRecoveryRequested)
 	{
 		/* force recovery due to presence of recovery.conf */
 		ereport(LOG,
 				(errmsg("setting recovery standby mode active")));
 		InRecovery = true;
 	}
-	else if (ControlFile->state != DB_SHUTDOWNED)
-		InRecovery = true;

-	/* Recovery from xlog */
+	/* REDO */
 	if (InRecovery)
 	{
 		int			rmid;
@@ -6912,12 +6870,8 @@ StartupXLOG(void)
 		 * pg_control with any minimum recovery stop point
 		 */
 		dbstate_at_startup = ControlFile->state;
-		if (StandbyMode)
-		{
-			ereport(LOG,
-					(errmsg("recovery in standby mode in progress")));
-			ControlFile->state = DB_IN_STANDBY_MODE;
-		}
+		if (InArchiveRecovery)
+			ControlFile->state = DB_IN_ARCHIVE_RECOVERY;
 		else
 		{
 			ereport(LOG,
@@ -6930,15 +6884,14 @@ StartupXLOG(void)
 								ControlFile->checkPointCopy.ThisTimeLineID,
 								recoveryTargetTLI)));

-			if (ControlFile->state != DB_IN_STANDBY_PROMOTED)
-				ControlFile->state = DB_IN_CRASH_RECOVERY;
+			ControlFile->state = DB_IN_CRASH_RECOVERY;
 		}

 		ControlFile->prevCheckPoint = ControlFile->checkPoint;
 		ControlFile->checkPoint = checkPointLoc;
 		ControlFile->checkPointCopy = checkPoint;

-		if (StandbyMode)
+		if (InArchiveRecovery)
 		{
 			/* initialize minRecoveryPoint if not set yet */
 			if (ControlFile->minRecoveryPoint < checkPoint.redo)
@@ -6965,7 +6918,6 @@ StartupXLOG(void)
 		 */
 		if (haveBackupLabel)
 		{
-			Assert(ControlFile->state == DB_IN_STANDBY_MODE);
 			ControlFile->backupStartPoint = checkPoint.redo;
 			ControlFile->backupEndRequired = backupEndRequired;

@@ -7479,22 +7431,6 @@ StartupXLOG(void)
 	if (StandbyModeRequested)
 		DisownLatch(&XLogCtl->recoveryWakeupLatch);

-	/*
-	 * We are now done reading the xlog from stream.
-	 */
-	if (StandbyMode)
-	{
-		Assert(ControlFile->state == DB_IN_STANDBY_MODE);
-		StandbyMode = false;
-
-		elog(LOG, "updating pg_control to state DB_IN_STANDBY_PROMOTED");
-
-		/* Transition to promoted mode */
-		ControlFile->state = DB_IN_STANDBY_PROMOTED;
-		ControlFile->time = (pg_time_t) time(NULL);
-		UpdateControlFile();
-	}
-
 	/*
 	 * We are now done reading the xlog from stream. Turn off streaming
 	 * recovery to force fetching the files (which would be required at end of
@@ -7610,19 +7546,6 @@ StartupXLOG(void)
 		writeTimeLineHistory(ThisTimeLineID, recoveryTargetTLI,
 							 EndRecPtr, reason);
 	}
-	else if (ControlFile->state == DB_IN_STANDBY_PROMOTED)
-	{
-		/*
-		 * If standby is promoted, we should advance timeline ID.
-		 */
-		ThisTimeLineID = findNewestTimeLine(recoveryTargetTLI) + 1;
-		ereport(LOG,
-				(errmsg("selected new timeline ID: %u", ThisTimeLineID)));
-		writeTimeLineHistory(ThisTimeLineID, recoveryTargetTLI,
-							 EndRecPtr, "standby promoted");
-
-		XLogFileCopy(endLogSegNo, xlogreader->readPageTLI, endLogSegNo);
-	}

 	/* Save the selected TimeLineID in shared memory, too */
 	XLogCtl->ThisTimeLineID = ThisTimeLineID;
@@ -7829,7 +7752,7 @@ StartupXLOG(void)
 	 * managed by FTS.
 	 */
 	bool needToPromoteCatalog = (IS_QUERY_DISPATCHER() &&
-								 ControlFile->state == DB_IN_STANDBY_PROMOTED);
+								 ControlFile->state == DB_IN_ARCHIVE_RECOVERY);

 	LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
 	ControlFile->state = DB_IN_PRODUCTION;
@@ -8722,20 +8645,11 @@ CreateCheckPoint(int flags)

 	if (shutdown)
 	{
-		/*
-		 * This is an ugly fix to dis-allow changing the pg_control
-		 * state for standby promotion continuity.
-		 *
-		 * Refer to Startup_InProduction() for more details
-		 */
-		if (ControlFile->state != DB_IN_STANDBY_PROMOTED)
-		{
-			LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
-			ControlFile->state = DB_SHUTDOWNING;
-			ControlFile->time = (pg_time_t) time(NULL);
-			UpdateControlFile();
-			LWLockRelease(ControlFileLock);
-		}
+	    LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+	    ControlFile->state = DB_SHUTDOWNING;
+	    ControlFile->time = (pg_time_t) time(NULL);
+	    UpdateControlFile();
+	    LWLockRelease(ControlFileLock);
 	}

 	/*
@@ -9078,12 +8992,7 @@ CreateCheckPoint(int flags)
 	LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
 	if (shutdown)
 	{
-		/*
-		 * Ugly fix to dis-allow changing pg_control state
-		 * for standby promotion continuity
-		 */
-		if (ControlFile->state != DB_IN_STANDBY_PROMOTED)
-			ControlFile->state = DB_SHUTDOWNED;
+	    ControlFile->state = DB_SHUTDOWNED;
 	}

 	ControlFile->prevCheckPoint = ControlFile->checkPoint;
@@ -9451,18 +9360,9 @@ CreateRestartPoint(int flags)
 	 * IN_ARCHIVE_RECOVERY state and an older checkpoint, else do nothing;
 	 * this is a quick hack to make sure nothing really bad happens if somehow
 	 * we get here after the end-of-recovery checkpoint.
-	 *
-	 * GPDB allows replay to also change the control file during
-	 * DB_IN_STANDBY_MODE so that mirror can be restarted from the latest
-	 * checkpoint location. This will save the recovery time of mirror, and also
-	 * allow mirror to remove already replayed xlogs.
-	 *
-	 * FIXME: need to consider consolidating the DB_IN_ARCHIVE_RECOVERY (upstream)
-	 * and DB_IN_STANDBY_MODE (GPDB only)
 	 */
 	LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
-	if ((ControlFile->state == DB_IN_ARCHIVE_RECOVERY
-		     || ControlFile->state == DB_IN_STANDBY_MODE) &&
+	if (ControlFile->state == DB_IN_ARCHIVE_RECOVERY &&
 	    ControlFile->checkPointCopy.redo < lastCheckPoint.redo)
 	{
 		ControlFile->prevCheckPoint = ControlFile->checkPoint;
@@ -10121,16 +10021,6 @@ xlog_redo(XLogRecPtr beginLoc __attribute__((unused)), XLogRecPtr lsn __attribut
 			StandbyRecoverPreparedTransactions(true);
 		}

-		/*
-		 * If we see a shutdown checkpoint while waiting for an end-of-backup
-		 * record, the backup was canceled and the end-of-backup record will
-		 * never arrive.
-		 */
-		if (StandbyMode &&
-			!XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
-			ereport(PANIC,
-			(errmsg("online backup was canceled, recovery cannot continue")));
-
 		/* ControlFile->checkPointCopy always tracks the latest ckpt XID */
 		ControlFile->checkPointCopy.nextXidEpoch = checkPoint.nextXidEpoch;
 		ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;

--- a/src/backend/fts/ftsmessagehandler.c
+++ b/src/backend/fts/ftsmessagehandler.c
@@ -377,7 +377,7 @@ HandleFtsWalRepPromote(void)
 	 * idempotent way.
 	 */
 	DBState state = GetCurrentDBState();
-	if (state == DB_IN_STANDBY_MODE)
+	if (state == DB_IN_ARCHIVE_RECOVERY)
 	{
 		/*
 		 * Reset sync_standby_names on promotion. This is to avoid commits

--- a/src/backend/fts/test/ftsmessagehandler_test.c
+++ b/src/backend/fts/test/ftsmessagehandler_test.c
@@ -148,7 +148,7 @@ test_HandleFtsWalRepPromoteMirror(void **state)
 	max_replication_slots = 1;
 	am_mirror = true;

-	will_return(GetCurrentDBState, DB_IN_STANDBY_MODE);
+	will_return(GetCurrentDBState, DB_IN_ARCHIVE_RECOVERY);
 	will_be_called(UnsetSyncStandbysDefined);
 	will_be_called(SignalPromote);


--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -61,10 +61,6 @@ dbState(DBState state)
 			return _("in crash recovery");
 		case DB_IN_ARCHIVE_RECOVERY:
 			return _("in archive recovery");
-		case DB_IN_STANDBY_MODE:
-			return _("in standby mode");
-		case DB_IN_STANDBY_PROMOTED:
-			return _("in standby mode (promoted)");
 		case DB_IN_PRODUCTION:
 			return _("in production");
 	}

--- a/src/include/catalog/pg_control.h
+++ b/src/include/catalog/pg_control.h
@@ -91,8 +91,6 @@ typedef enum DBState
 	DB_SHUTDOWNING,
 	DB_IN_CRASH_RECOVERY,
 	DB_IN_ARCHIVE_RECOVERY,
-	DB_IN_STANDBY_MODE,
-	DB_IN_STANDBY_PROMOTED,
 	DB_IN_PRODUCTION
 } DBState;