diff --git a/src/backend/cdb/cdbcopy.c b/src/backend/cdb/cdbcopy.c index 1cf4dd30237f7a1c8acf62fc235009fc8a904713..da64efee785c0bb2e019d97af41a174f5256096a 100644 --- a/src/backend/cdb/cdbcopy.c +++ b/src/backend/cdb/cdbcopy.c @@ -28,6 +28,7 @@ #include "cdb/cdbtm.h" #include "cdb/cdbvars.h" #include "commands/copy.h" +#include "storage/pmsignal.h" #include "tcop/tcopprot.h" #include "utils/faultinjector.h" #include "utils/memutils.h" @@ -755,8 +756,12 @@ cdbCopyEndAndFetchRejectNum(CdbCopy *c, int *total_rows_completed) if (failed_count > 0) { elog(LOG, "%s", c->err_msg.data); - elog(LOG, "COPY passes failed segment(s) information to FTS"); - FtsHandleNetFailure(failedSegDBs, failed_count); + elog(LOG, "COPY signals FTS to probe segments"); + SendPostmasterSignal(PMSIGNAL_WAKEN_FTS); + DisconnectAndDestroyAllGangs(true); + ereport(ERROR, + (errmsg_internal("MPP detected %d segment failures, system is reconnected", failed_count), + errSendAlert(true))); } pfree(results); diff --git a/src/backend/cdb/cdbfts.c b/src/backend/cdb/cdbfts.c index 8783864a1a8146cd8a5db7154f792c144e7b9b7d..881d41eb99a99560d1afbb200201942481db8d5b 100644 --- a/src/backend/cdb/cdbfts.c +++ b/src/backend/cdb/cdbfts.c @@ -133,16 +133,11 @@ FtsNotifyProber(void) * dispatcher: ONLY CALL THREADSAFE FUNCTIONS -- elog() is NOT threadsafe. */ bool -FtsTestConnection(CdbComponentDatabaseInfo *failedDBInfo, bool fullScan) +FtsIsSegmentUp(CdbComponentDatabaseInfo *dBInfo) { /* master is always reported as alive */ - if (failedDBInfo->segindex == MASTER_SEGMENT_ID) - { + if (dBInfo->segindex == MASTER_SEGMENT_ID) return true; - } - - if (fullScan) - FtsNotifyProber(); /* * If fullscan is not requested, caller is just trying to optimize on the @@ -153,43 +148,10 @@ FtsTestConnection(CdbComponentDatabaseInfo *failedDBInfo, bool fullScan) * checking against uninitialzed variable. */ return ftsProbeInfo->fts_statusVersion ? - FTS_STATUS_IS_UP(ftsProbeInfo->fts_status[failedDBInfo->dbid]) : + FTS_STATUS_IS_UP(ftsProbeInfo->fts_status[dBInfo->dbid]) : true; } -/* - * Re-Configure the system: if someone has noticed that the status - * version has been updated, they call this to verify that they've got - * the right configuration. - * - * NOTE: This *always* destroys gangs. And also attempts to inform the - * fault-prober to do a full scan. - */ -void -FtsReConfigureMPP(bool create_new_gangs) -{ - /* need to scan to pick up the latest view */ - FtsNotifyProber(); - - ereport(LOG, (errmsg_internal("FTS: reconfiguration is in progress"), - errSendAlert(true))); - DisconnectAndDestroyAllGangs(true); - - /* Caller should throw an error. */ - return; -} - -void -FtsHandleNetFailure(SegmentDatabaseDescriptor **segDB, int numOfFailed) -{ - elog(LOG, "FtsHandleNetFailure: numOfFailed %d", numOfFailed); - - FtsReConfigureMPP(true); - - ereport(ERROR, (errmsg_internal("MPP detected %d segment failures, system is reconnected", numOfFailed), - errSendAlert(true))); -} - /* * Check if any segment DB is down. * @@ -199,7 +161,6 @@ bool FtsTestSegmentDBIsDown(SegmentDatabaseDescriptor *segdbDesc, int size) { int i = 0; - bool forceRescan = true; for (i = 0; i < size; i++) { @@ -207,21 +168,17 @@ FtsTestSegmentDBIsDown(SegmentDatabaseDescriptor *segdbDesc, int size) elog(DEBUG2, "FtsTestSegmentDBIsDown: looking for real fault on segment dbid %d", segInfo->dbid); - if (!FtsTestConnection(segInfo, forceRescan)) + if (!FtsIsSegmentUp(segInfo)) { ereport(LOG, (errmsg_internal("FTS: found fault with segment dbid %d. " "Reconfiguration is in progress", segInfo->dbid))); return true; } - - /* only force the rescan on the first call. */ - forceRescan = false; } return false; } - void FtsCondSetTxnReadOnly(bool *XactFlag) { diff --git a/src/backend/cdb/dispatcher/cdbdisp_async.c b/src/backend/cdb/dispatcher/cdbdisp_async.c index 052f1ce16894f2afd70d3eb5c0a4737d0135da8f..953111f7e00a4f88b4d65ed40683c70a042d74ba 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_async.c +++ b/src/backend/cdb/dispatcher/cdbdisp_async.c @@ -766,8 +766,6 @@ signalQEs(CdbDispatchCmdAsync *pParms) /* * Check if any segment DB down is detected by FTS. - * - * Issue a FTS probe every 1 minute. */ static void checkSegmentAlive(CdbDispatchCmdAsync *pParms) @@ -776,8 +774,7 @@ checkSegmentAlive(CdbDispatchCmdAsync *pParms) bool forceScan = true; /* - * check the connection still valid, set 1 min time interval this may - * affect performance, should turn it off if required. + * check the connection still valid */ for (i = 0; i < pParms->dispatchCount; i++) { @@ -799,7 +796,13 @@ checkSegmentAlive(CdbDispatchCmdAsync *pParms) ELOG_DISPATCHER_DEBUG("FTS testing connection %d of %d (%s)", i + 1, pParms->dispatchCount, segdbDesc->whoami); - if (!FtsTestConnection(segdbDesc->segment_database_info, forceScan)) + if (forceScan) + { + FtsNotifyProber(); + forceScan = false; + } + + if (!FtsIsSegmentUp(segdbDesc->segment_database_info)) { char *msg = PQerrorMessage(segdbDesc->conn); @@ -815,8 +818,6 @@ checkSegmentAlive(CdbDispatchCmdAsync *pParms) PQfinish(segdbDesc->conn); segdbDesc->conn = NULL; } - - forceScan = false; } } diff --git a/src/backend/cdb/dispatcher/cdbdisp_thread.c b/src/backend/cdb/dispatcher/cdbdisp_thread.c index c8b64b8910d217067da1d88fb822b69b3f1eb781..53e1cb66b4e739f493a11f06fd17a757febb87e0 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_thread.c +++ b/src/backend/cdb/dispatcher/cdbdisp_thread.c @@ -936,7 +936,7 @@ cdbdisp_checkSegmentDBAlive(DispatchCommandParms *pParms) WRITE_LOG_DISPATCHER_DEBUG("testing connection %d of %d %s stillRunning %d", i + 1, pParms->db_count, segdbDesc->whoami, dispatchResult->stillRunning); - if (!FtsTestConnection(segdbDesc->segment_database_info, false)) + if (!FtsIsSegmentUp(segdbDesc->segment_database_info)) { cdbdisp_appendMessage(dispatchResult, LOG, "Lost connection to %s. FTS detected segment failures.", diff --git a/src/backend/cdb/dispatcher/cdbgang.c b/src/backend/cdb/dispatcher/cdbgang.c index 928e69a92ac9aab140d47c731e33032cbc65e050..f4e7c2b51c5ddde11fe61baad717dba9edf40251 100644 --- a/src/backend/cdb/dispatcher/cdbgang.c +++ b/src/backend/cdb/dispatcher/cdbgang.c @@ -537,7 +537,7 @@ buildGangDefinition(GangType type, int gang_id, int size, int content) if (size != segCount) { - FtsReConfigureMPP(false); + DisconnectAndDestroyAllGangs(true); elog(ERROR, "Not all primary segment instances are active and connected"); } break; @@ -1317,7 +1317,7 @@ cleanupGang(Gang *gp) return false; /* if segment is down, the gang can not be reused */ - if (!FtsTestConnection(segdbDesc->segment_database_info, false)) + if (!FtsIsSegmentUp(segdbDesc->segment_database_info)) return false; /* Note, we cancel all "still running" queries */ @@ -1827,7 +1827,7 @@ GangOK(Gang *gp) if (cdbconn_isBadConnection(segdbDesc)) return false; - if (!FtsTestConnection(segdbDesc->segment_database_info, false)) + if (!FtsIsSegmentUp(segdbDesc->segment_database_info)) return false; } diff --git a/src/backend/cdb/dispatcher/cdbgang_async.c b/src/backend/cdb/dispatcher/cdbgang_async.c index 5b0fa49f8c61eee928530b875865a113a5b412e5..9c8e6cef507711ec032795190461c1d0fcf62011 100644 --- a/src/backend/cdb/dispatcher/cdbgang_async.c +++ b/src/backend/cdb/dispatcher/cdbgang_async.c @@ -297,6 +297,7 @@ create_gang_retry: { MemoryContextSwitchTo(GangContext); + FtsNotifyProber(); /* FTS shows some segment DBs are down */ if (FtsTestSegmentDBIsDown(newGangDefinition->db_descriptors, size)) { diff --git a/src/backend/cdb/dispatcher/cdbgang_thread.c b/src/backend/cdb/dispatcher/cdbgang_thread.c index 6fbda94067b06bd951c8a454bdb8210aeb4a6562..add41176c86b49cd0e7f02bc18582f8ceb341862 100644 --- a/src/backend/cdb/dispatcher/cdbgang_thread.c +++ b/src/backend/cdb/dispatcher/cdbgang_thread.c @@ -223,6 +223,7 @@ create_gang_retry: /* there'er failed connections */ + FtsNotifyProber(); /* FTS shows some segment DBs are down, destroy all gangs. */ if (FtsTestSegmentDBIsDown(newGangDefinition->db_descriptors, size)) { diff --git a/src/include/cdb/cdbfts.h b/src/include/cdb/cdbfts.h index e9005c4175bb951ebf000cddbbf2bbcc538d081e..d0de825695b3fcc3dafd6165417952672807a8bd 100644 --- a/src/include/cdb/cdbfts.h +++ b/src/include/cdb/cdbfts.h @@ -55,9 +55,7 @@ extern volatile FtsProbeInfo *ftsProbeInfo; extern int FtsShmemSize(void); extern void FtsShmemInit(void); -extern bool FtsTestConnection(CdbComponentDatabaseInfo *db_to_test, bool full_scan); -extern void FtsReConfigureMPP(bool create_new_gangs); -extern void FtsHandleNetFailure(SegmentDatabaseDescriptor **, int); +extern bool FtsIsSegmentUp(CdbComponentDatabaseInfo *dBInfo); extern bool FtsTestSegmentDBIsDown(SegmentDatabaseDescriptor *, int); extern bool verifyFtsSyncCount(void);