From 0e1b9a058c7c96190c19d3872637828e4d2c565e Mon Sep 17 00:00:00 2001 From: Ashwin Agrawal Date: Tue, 20 Mar 2018 13:30:17 -0700 Subject: [PATCH] Cleanup some FTS functions. --- src/backend/cdb/cdbcopy.c | 9 +++- src/backend/cdb/cdbfts.c | 51 ++------------------- src/backend/cdb/dispatcher/cdbdisp_async.c | 15 +++--- src/backend/cdb/dispatcher/cdbdisp_thread.c | 2 +- src/backend/cdb/dispatcher/cdbgang.c | 6 +-- src/backend/cdb/dispatcher/cdbgang_async.c | 1 + src/backend/cdb/dispatcher/cdbgang_thread.c | 1 + src/include/cdb/cdbfts.h | 4 +- 8 files changed, 26 insertions(+), 63 deletions(-) diff --git a/src/backend/cdb/cdbcopy.c b/src/backend/cdb/cdbcopy.c index 1cf4dd3023..da64efee78 100644 --- a/src/backend/cdb/cdbcopy.c +++ b/src/backend/cdb/cdbcopy.c @@ -28,6 +28,7 @@ #include "cdb/cdbtm.h" #include "cdb/cdbvars.h" #include "commands/copy.h" +#include "storage/pmsignal.h" #include "tcop/tcopprot.h" #include "utils/faultinjector.h" #include "utils/memutils.h" @@ -755,8 +756,12 @@ cdbCopyEndAndFetchRejectNum(CdbCopy *c, int *total_rows_completed) if (failed_count > 0) { elog(LOG, "%s", c->err_msg.data); - elog(LOG, "COPY passes failed segment(s) information to FTS"); - FtsHandleNetFailure(failedSegDBs, failed_count); + elog(LOG, "COPY signals FTS to probe segments"); + SendPostmasterSignal(PMSIGNAL_WAKEN_FTS); + DisconnectAndDestroyAllGangs(true); + ereport(ERROR, + (errmsg_internal("MPP detected %d segment failures, system is reconnected", failed_count), + errSendAlert(true))); } pfree(results); diff --git a/src/backend/cdb/cdbfts.c b/src/backend/cdb/cdbfts.c index 8783864a1a..881d41eb99 100644 --- a/src/backend/cdb/cdbfts.c +++ b/src/backend/cdb/cdbfts.c @@ -133,16 +133,11 @@ FtsNotifyProber(void) * dispatcher: ONLY CALL THREADSAFE FUNCTIONS -- elog() is NOT threadsafe. */ bool -FtsTestConnection(CdbComponentDatabaseInfo *failedDBInfo, bool fullScan) +FtsIsSegmentUp(CdbComponentDatabaseInfo *dBInfo) { /* master is always reported as alive */ - if (failedDBInfo->segindex == MASTER_SEGMENT_ID) - { + if (dBInfo->segindex == MASTER_SEGMENT_ID) return true; - } - - if (fullScan) - FtsNotifyProber(); /* * If fullscan is not requested, caller is just trying to optimize on the @@ -153,43 +148,10 @@ FtsTestConnection(CdbComponentDatabaseInfo *failedDBInfo, bool fullScan) * checking against uninitialzed variable. */ return ftsProbeInfo->fts_statusVersion ? - FTS_STATUS_IS_UP(ftsProbeInfo->fts_status[failedDBInfo->dbid]) : + FTS_STATUS_IS_UP(ftsProbeInfo->fts_status[dBInfo->dbid]) : true; } -/* - * Re-Configure the system: if someone has noticed that the status - * version has been updated, they call this to verify that they've got - * the right configuration. - * - * NOTE: This *always* destroys gangs. And also attempts to inform the - * fault-prober to do a full scan. - */ -void -FtsReConfigureMPP(bool create_new_gangs) -{ - /* need to scan to pick up the latest view */ - FtsNotifyProber(); - - ereport(LOG, (errmsg_internal("FTS: reconfiguration is in progress"), - errSendAlert(true))); - DisconnectAndDestroyAllGangs(true); - - /* Caller should throw an error. */ - return; -} - -void -FtsHandleNetFailure(SegmentDatabaseDescriptor **segDB, int numOfFailed) -{ - elog(LOG, "FtsHandleNetFailure: numOfFailed %d", numOfFailed); - - FtsReConfigureMPP(true); - - ereport(ERROR, (errmsg_internal("MPP detected %d segment failures, system is reconnected", numOfFailed), - errSendAlert(true))); -} - /* * Check if any segment DB is down. * @@ -199,7 +161,6 @@ bool FtsTestSegmentDBIsDown(SegmentDatabaseDescriptor *segdbDesc, int size) { int i = 0; - bool forceRescan = true; for (i = 0; i < size; i++) { @@ -207,21 +168,17 @@ FtsTestSegmentDBIsDown(SegmentDatabaseDescriptor *segdbDesc, int size) elog(DEBUG2, "FtsTestSegmentDBIsDown: looking for real fault on segment dbid %d", segInfo->dbid); - if (!FtsTestConnection(segInfo, forceRescan)) + if (!FtsIsSegmentUp(segInfo)) { ereport(LOG, (errmsg_internal("FTS: found fault with segment dbid %d. " "Reconfiguration is in progress", segInfo->dbid))); return true; } - - /* only force the rescan on the first call. */ - forceRescan = false; } return false; } - void FtsCondSetTxnReadOnly(bool *XactFlag) { diff --git a/src/backend/cdb/dispatcher/cdbdisp_async.c b/src/backend/cdb/dispatcher/cdbdisp_async.c index 052f1ce168..953111f7e0 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_async.c +++ b/src/backend/cdb/dispatcher/cdbdisp_async.c @@ -766,8 +766,6 @@ signalQEs(CdbDispatchCmdAsync *pParms) /* * Check if any segment DB down is detected by FTS. - * - * Issue a FTS probe every 1 minute. */ static void checkSegmentAlive(CdbDispatchCmdAsync *pParms) @@ -776,8 +774,7 @@ checkSegmentAlive(CdbDispatchCmdAsync *pParms) bool forceScan = true; /* - * check the connection still valid, set 1 min time interval this may - * affect performance, should turn it off if required. + * check the connection still valid */ for (i = 0; i < pParms->dispatchCount; i++) { @@ -799,7 +796,13 @@ checkSegmentAlive(CdbDispatchCmdAsync *pParms) ELOG_DISPATCHER_DEBUG("FTS testing connection %d of %d (%s)", i + 1, pParms->dispatchCount, segdbDesc->whoami); - if (!FtsTestConnection(segdbDesc->segment_database_info, forceScan)) + if (forceScan) + { + FtsNotifyProber(); + forceScan = false; + } + + if (!FtsIsSegmentUp(segdbDesc->segment_database_info)) { char *msg = PQerrorMessage(segdbDesc->conn); @@ -815,8 +818,6 @@ checkSegmentAlive(CdbDispatchCmdAsync *pParms) PQfinish(segdbDesc->conn); segdbDesc->conn = NULL; } - - forceScan = false; } } diff --git a/src/backend/cdb/dispatcher/cdbdisp_thread.c b/src/backend/cdb/dispatcher/cdbdisp_thread.c index c8b64b8910..53e1cb66b4 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_thread.c +++ b/src/backend/cdb/dispatcher/cdbdisp_thread.c @@ -936,7 +936,7 @@ cdbdisp_checkSegmentDBAlive(DispatchCommandParms *pParms) WRITE_LOG_DISPATCHER_DEBUG("testing connection %d of %d %s stillRunning %d", i + 1, pParms->db_count, segdbDesc->whoami, dispatchResult->stillRunning); - if (!FtsTestConnection(segdbDesc->segment_database_info, false)) + if (!FtsIsSegmentUp(segdbDesc->segment_database_info)) { cdbdisp_appendMessage(dispatchResult, LOG, "Lost connection to %s. FTS detected segment failures.", diff --git a/src/backend/cdb/dispatcher/cdbgang.c b/src/backend/cdb/dispatcher/cdbgang.c index 928e69a92a..f4e7c2b51c 100644 --- a/src/backend/cdb/dispatcher/cdbgang.c +++ b/src/backend/cdb/dispatcher/cdbgang.c @@ -537,7 +537,7 @@ buildGangDefinition(GangType type, int gang_id, int size, int content) if (size != segCount) { - FtsReConfigureMPP(false); + DisconnectAndDestroyAllGangs(true); elog(ERROR, "Not all primary segment instances are active and connected"); } break; @@ -1317,7 +1317,7 @@ cleanupGang(Gang *gp) return false; /* if segment is down, the gang can not be reused */ - if (!FtsTestConnection(segdbDesc->segment_database_info, false)) + if (!FtsIsSegmentUp(segdbDesc->segment_database_info)) return false; /* Note, we cancel all "still running" queries */ @@ -1827,7 +1827,7 @@ GangOK(Gang *gp) if (cdbconn_isBadConnection(segdbDesc)) return false; - if (!FtsTestConnection(segdbDesc->segment_database_info, false)) + if (!FtsIsSegmentUp(segdbDesc->segment_database_info)) return false; } diff --git a/src/backend/cdb/dispatcher/cdbgang_async.c b/src/backend/cdb/dispatcher/cdbgang_async.c index 5b0fa49f8c..9c8e6cef50 100644 --- a/src/backend/cdb/dispatcher/cdbgang_async.c +++ b/src/backend/cdb/dispatcher/cdbgang_async.c @@ -297,6 +297,7 @@ create_gang_retry: { MemoryContextSwitchTo(GangContext); + FtsNotifyProber(); /* FTS shows some segment DBs are down */ if (FtsTestSegmentDBIsDown(newGangDefinition->db_descriptors, size)) { diff --git a/src/backend/cdb/dispatcher/cdbgang_thread.c b/src/backend/cdb/dispatcher/cdbgang_thread.c index 6fbda94067..add41176c8 100644 --- a/src/backend/cdb/dispatcher/cdbgang_thread.c +++ b/src/backend/cdb/dispatcher/cdbgang_thread.c @@ -223,6 +223,7 @@ create_gang_retry: /* there'er failed connections */ + FtsNotifyProber(); /* FTS shows some segment DBs are down, destroy all gangs. */ if (FtsTestSegmentDBIsDown(newGangDefinition->db_descriptors, size)) { diff --git a/src/include/cdb/cdbfts.h b/src/include/cdb/cdbfts.h index e9005c4175..d0de825695 100644 --- a/src/include/cdb/cdbfts.h +++ b/src/include/cdb/cdbfts.h @@ -55,9 +55,7 @@ extern volatile FtsProbeInfo *ftsProbeInfo; extern int FtsShmemSize(void); extern void FtsShmemInit(void); -extern bool FtsTestConnection(CdbComponentDatabaseInfo *db_to_test, bool full_scan); -extern void FtsReConfigureMPP(bool create_new_gangs); -extern void FtsHandleNetFailure(SegmentDatabaseDescriptor **, int); +extern bool FtsIsSegmentUp(CdbComponentDatabaseInfo *dBInfo); extern bool FtsTestSegmentDBIsDown(SegmentDatabaseDescriptor *, int); extern bool verifyFtsSyncCount(void); -- GitLab